diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
152 files changed, 11316 insertions, 7508 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/Kconfig b/drivers/gpu/drm/amd/amdgpu/Kconfig index e8af1f5e8a79..9221e5489069 100644 --- a/drivers/gpu/drm/amd/amdgpu/Kconfig +++ b/drivers/gpu/drm/amd/amdgpu/Kconfig @@ -42,3 +42,4 @@ config DRM_AMDGPU_GART_DEBUGFS source "drivers/gpu/drm/amd/acp/Kconfig" source "drivers/gpu/drm/amd/display/Kconfig" +source "drivers/gpu/drm/amd/amdkfd/Kconfig" diff --git a/drivers/gpu/drm/amd/amdgpu/Makefile b/drivers/gpu/drm/amd/amdgpu/Makefile index bfd332c95b61..f76bcb9c45e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/Makefile +++ b/drivers/gpu/drm/amd/amdgpu/Makefile @@ -35,7 +35,8 @@ ccflags-y := -I$(FULL_AMD_PATH)/include/asic_reg \ -I$(FULL_AMD_DISPLAY_PATH) \ -I$(FULL_AMD_DISPLAY_PATH)/include \ -I$(FULL_AMD_DISPLAY_PATH)/dc \ - -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm + -I$(FULL_AMD_DISPLAY_PATH)/amdgpu_dm \ + -I$(FULL_AMD_PATH)/amdkfd amdgpu-y := amdgpu_drv.o @@ -51,8 +52,8 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \ amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \ amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \ amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \ - amdgpu_queue_mgr.o amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o \ - amdgpu_ids.o + amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \ + amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o # add asic specific block amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \ @@ -62,7 +63,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce amdgpu-y += \ vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \ - vega20_reg_init.o + vega20_reg_init.o nbio_v7_4.o # add DF block amdgpu-y += \ @@ -73,7 +74,7 @@ amdgpu-y += \ amdgpu-y += \ gmc_v7_0.o \ gmc_v8_0.o \ - gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o + gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o # add IH block amdgpu-y += \ @@ -88,7 +89,8 @@ amdgpu-y += \ amdgpu-y += \ amdgpu_psp.o \ psp_v3_1.o \ - psp_v10_0.o + psp_v10_0.o \ + psp_v11_0.o # add SMC block amdgpu-y += \ @@ -103,11 +105,13 @@ amdgpu-y += \ # add GFX block amdgpu-y += \ amdgpu_gfx.o \ + amdgpu_rlc.o \ gfx_v8_0.o \ gfx_v9_0.o # add async DMA block amdgpu-y += \ + amdgpu_sdma.o \ sdma_v2_4.o \ sdma_v3_0.o \ sdma_v4_0.o @@ -134,6 +138,9 @@ amdgpu-y += \ amdgpu-y += amdgpu_amdkfd.o ifneq ($(CONFIG_HSA_AMD),) +AMDKFD_PATH := ../amdkfd +include $(FULL_AMD_PATH)/amdkfd/Makefile +amdgpu-y += $(AMDKFD_FILES) amdgpu-y += \ amdgpu_amdkfd_fence.o \ amdgpu_amdkfd_gpuvm.o \ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h index 447c4c7a36d6..bcef6ea4bcf9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h @@ -28,6 +28,8 @@ #ifndef __AMDGPU_H__ #define __AMDGPU_H__ +#include "amdgpu_ctx.h" + #include <linux/atomic.h> #include <linux/wait.h> #include <linux/list.h> @@ -69,12 +71,35 @@ #include "amdgpu_vcn.h" #include "amdgpu_mn.h" #include "amdgpu_gmc.h" +#include "amdgpu_gfx.h" +#include "amdgpu_sdma.h" #include "amdgpu_dm.h" #include "amdgpu_virt.h" +#include "amdgpu_csa.h" #include "amdgpu_gart.h" #include "amdgpu_debugfs.h" #include "amdgpu_job.h" #include "amdgpu_bo_list.h" +#include "amdgpu_gem.h" +#include "amdgpu_doorbell.h" +#include "amdgpu_amdkfd.h" + +#define MAX_GPU_INSTANCE 16 + +struct amdgpu_gpu_instance +{ + struct amdgpu_device *adev; + int mgpu_fan_enabled; +}; + +struct amdgpu_mgpu_info +{ + struct amdgpu_gpu_instance gpu_ins[MAX_GPU_INSTANCE]; + struct mutex mutex; + uint32_t num_gpu; + uint32_t num_dgpu; + uint32_t num_apu; +}; /* * Modules parameters. @@ -129,6 +154,8 @@ extern int amdgpu_compute_multipipe; extern int amdgpu_gpu_recovery; extern int amdgpu_emu_mode; extern uint amdgpu_smu_memory_pool_size; +extern uint amdgpu_dc_feature_mask; +extern struct amdgpu_mgpu_info mgpu_info; #ifdef CONFIG_DRM_AMDGPU_SI extern int amdgpu_si_support; @@ -137,6 +164,7 @@ extern int amdgpu_si_support; extern int amdgpu_cik_support; #endif +#define AMDGPU_VM_MAX_NUM_CTX 4096 #define AMDGPU_SG_THRESHOLD (256*1024*1024) #define AMDGPU_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */ #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000 @@ -148,9 +176,6 @@ extern int amdgpu_cik_support; #define AMDGPUFB_CONN_LIMIT 4 #define AMDGPU_BIOS_NUM_SCRATCH 16 -/* max number of IP instances */ -#define AMDGPU_MAX_SDMA_INSTANCES 2 - /* hard reset data */ #define AMDGPU_ASIC_RESET_DATA 0x39d5e86b @@ -171,13 +196,6 @@ extern int amdgpu_cik_support; #define AMDGPU_RESET_VCE (1 << 13) #define AMDGPU_RESET_VCE1 (1 << 14) -/* GFX current status */ -#define AMDGPU_GFX_NORMAL_MODE 0x00000000L -#define AMDGPU_GFX_SAFE_MODE 0x00000001L -#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L -#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L -#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L - /* max cursor sizes (in pixels) */ #define CIK_CURSOR_WIDTH 128 #define CIK_CURSOR_HEIGHT 128 @@ -205,13 +223,6 @@ enum amdgpu_cp_irq { AMDGPU_CP_IRQ_LAST }; -enum amdgpu_sdma_irq { - AMDGPU_SDMA_IRQ_TRAP0 = 0, - AMDGPU_SDMA_IRQ_TRAP1, - - AMDGPU_SDMA_IRQ_LAST -}; - enum amdgpu_thermal_irq { AMDGPU_THERMAL_IRQ_LOW_TO_HIGH = 0, AMDGPU_THERMAL_IRQ_HIGH_TO_LOW, @@ -224,6 +235,10 @@ enum amdgpu_kiq_irq { AMDGPU_CP_KIQ_IRQ_LAST }; +#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ +#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ +#define MAX_KIQ_REG_TRY 80 /* 20 -> 80 */ + int amdgpu_device_ip_set_clockgating_state(void *dev, enum amd_ip_block_type block_type, enum amd_clockgating_state state); @@ -271,70 +286,6 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev, int amdgpu_device_ip_block_add(struct amdgpu_device *adev, const struct amdgpu_ip_block_version *ip_block_version); -/* provided by hw blocks that can move/clear data. e.g., gfx or sdma */ -struct amdgpu_buffer_funcs { - /* maximum bytes in a single operation */ - uint32_t copy_max_bytes; - - /* number of dw to reserve per operation */ - unsigned copy_num_dw; - - /* used for buffer migration */ - void (*emit_copy_buffer)(struct amdgpu_ib *ib, - /* src addr in bytes */ - uint64_t src_offset, - /* dst addr in bytes */ - uint64_t dst_offset, - /* number of byte to transfer */ - uint32_t byte_count); - - /* maximum bytes in a single operation */ - uint32_t fill_max_bytes; - - /* number of dw to reserve per operation */ - unsigned fill_num_dw; - - /* used for buffer clearing */ - void (*emit_fill_buffer)(struct amdgpu_ib *ib, - /* value to write to memory */ - uint32_t src_data, - /* dst addr in bytes */ - uint64_t dst_offset, - /* number of byte to fill */ - uint32_t byte_count); -}; - -/* provided by hw blocks that can write ptes, e.g., sdma */ -struct amdgpu_vm_pte_funcs { - /* number of dw to reserve per operation */ - unsigned copy_pte_num_dw; - - /* copy pte entries from GART */ - void (*copy_pte)(struct amdgpu_ib *ib, - uint64_t pe, uint64_t src, - unsigned count); - - /* write pte one entry at a time with addr mapping */ - void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, - uint64_t value, unsigned count, - uint32_t incr); - /* for linear pte/pde updates without addr mapping */ - void (*set_pte_pde)(struct amdgpu_ib *ib, - uint64_t pe, - uint64_t addr, unsigned count, - uint32_t incr, uint64_t flags); -}; - -/* provided by the ih block */ -struct amdgpu_ih_funcs { - /* ring read/write ptr handling, called from interrupt context */ - u32 (*get_wptr)(struct amdgpu_device *adev); - bool (*prescreen_iv)(struct amdgpu_device *adev); - void (*decode_iv)(struct amdgpu_device *adev, - struct amdgpu_iv_entry *entry); - void (*set_rptr)(struct amdgpu_device *adev); -}; - /* * BIOS. */ @@ -360,34 +311,6 @@ struct amdgpu_clock { uint32_t max_pixel_clock; }; -/* - * GEM. - */ - -#define AMDGPU_GEM_DOMAIN_MAX 0x3 -#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) - -void amdgpu_gem_object_free(struct drm_gem_object *obj); -int amdgpu_gem_object_open(struct drm_gem_object *obj, - struct drm_file *file_priv); -void amdgpu_gem_object_close(struct drm_gem_object *obj, - struct drm_file *file_priv); -unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); -struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); -struct drm_gem_object * -amdgpu_gem_prime_import_sg_table(struct drm_device *dev, - struct dma_buf_attachment *attach, - struct sg_table *sg); -struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, - struct drm_gem_object *gobj, - int flags); -struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, - struct dma_buf *dma_buf); -struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); -void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); -void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); -int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); - /* sub-allocation manager, it has to be protected by another lock. * By conception this is an helper for other part of the driver * like the indirect buffer or semaphore, which both have their @@ -437,134 +360,10 @@ struct amdgpu_sa_bo { struct dma_fence *fence; }; -/* - * GEM objects. - */ -void amdgpu_gem_force_release(struct amdgpu_device *adev); -int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, - int alignment, u32 initial_domain, - u64 flags, enum ttm_bo_type type, - struct reservation_object *resv, - struct drm_gem_object **obj); - -int amdgpu_mode_dumb_create(struct drm_file *file_priv, - struct drm_device *dev, - struct drm_mode_create_dumb *args); -int amdgpu_mode_dumb_mmap(struct drm_file *filp, - struct drm_device *dev, - uint32_t handle, uint64_t *offset_p); int amdgpu_fence_slab_init(void); void amdgpu_fence_slab_fini(void); /* - * GPU doorbell structures, functions & helpers - */ -typedef enum _AMDGPU_DOORBELL_ASSIGNMENT -{ - AMDGPU_DOORBELL_KIQ = 0x000, - AMDGPU_DOORBELL_HIQ = 0x001, - AMDGPU_DOORBELL_DIQ = 0x002, - AMDGPU_DOORBELL_MEC_RING0 = 0x010, - AMDGPU_DOORBELL_MEC_RING1 = 0x011, - AMDGPU_DOORBELL_MEC_RING2 = 0x012, - AMDGPU_DOORBELL_MEC_RING3 = 0x013, - AMDGPU_DOORBELL_MEC_RING4 = 0x014, - AMDGPU_DOORBELL_MEC_RING5 = 0x015, - AMDGPU_DOORBELL_MEC_RING6 = 0x016, - AMDGPU_DOORBELL_MEC_RING7 = 0x017, - AMDGPU_DOORBELL_GFX_RING0 = 0x020, - AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0, - AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1, - AMDGPU_DOORBELL_IH = 0x1E8, - AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF, - AMDGPU_DOORBELL_INVALID = 0xFFFF -} AMDGPU_DOORBELL_ASSIGNMENT; - -struct amdgpu_doorbell { - /* doorbell mmio */ - resource_size_t base; - resource_size_t size; - u32 __iomem *ptr; - u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ -}; - -/* - * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space - */ -typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT -{ - /* - * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in - * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. - * Compute related doorbells are allocated from 0x00 to 0x8a - */ - - - /* kernel scheduling */ - AMDGPU_DOORBELL64_KIQ = 0x00, - - /* HSA interface queue and debug queue */ - AMDGPU_DOORBELL64_HIQ = 0x01, - AMDGPU_DOORBELL64_DIQ = 0x02, - - /* Compute engines */ - AMDGPU_DOORBELL64_MEC_RING0 = 0x03, - AMDGPU_DOORBELL64_MEC_RING1 = 0x04, - AMDGPU_DOORBELL64_MEC_RING2 = 0x05, - AMDGPU_DOORBELL64_MEC_RING3 = 0x06, - AMDGPU_DOORBELL64_MEC_RING4 = 0x07, - AMDGPU_DOORBELL64_MEC_RING5 = 0x08, - AMDGPU_DOORBELL64_MEC_RING6 = 0x09, - AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, - - /* User queue doorbell range (128 doorbells) */ - AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, - AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, - - /* Graphics engine */ - AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, - - /* - * Other graphics doorbells can be allocated here: from 0x8c to 0xef - * Graphics voltage island aperture 1 - * default non-graphics QWORD index is 0xF0 - 0xFF inclusive - */ - - /* sDMA engines */ - AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, - AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, - AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, - - /* Interrupt handler */ - AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ - AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ - AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ - - /* VCN engine use 32 bits doorbell */ - AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ - AMDGPU_DOORBELL64_VCN2_3 = 0xF9, - AMDGPU_DOORBELL64_VCN4_5 = 0xFA, - AMDGPU_DOORBELL64_VCN6_7 = 0xFB, - - /* overlap the doorbell assignment with VCN as they are mutually exclusive - * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD - */ - AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, - AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, - AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, - AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, - - AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, - AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, - AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, - AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, - - AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, - AMDGPU_DOORBELL64_INVALID = 0xFFFF -} AMDGPU_DOORBELL64_ASSIGNMENT; - -/* * IRQS. */ @@ -600,84 +399,6 @@ struct amdgpu_ib { extern const struct drm_sched_backend_ops amdgpu_sched_ops; /* - * Queue manager - */ -struct amdgpu_queue_mapper { - int hw_ip; - struct mutex lock; - /* protected by lock */ - struct amdgpu_ring *queue_map[AMDGPU_MAX_RINGS]; -}; - -struct amdgpu_queue_mgr { - struct amdgpu_queue_mapper mapper[AMDGPU_MAX_IP_NUM]; -}; - -int amdgpu_queue_mgr_init(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr); -int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr); -int amdgpu_queue_mgr_map(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr, - u32 hw_ip, u32 instance, u32 ring, - struct amdgpu_ring **out_ring); - -/* - * context related structures - */ - -struct amdgpu_ctx_ring { - uint64_t sequence; - struct dma_fence **fences; - struct drm_sched_entity entity; -}; - -struct amdgpu_ctx { - struct kref refcount; - struct amdgpu_device *adev; - struct amdgpu_queue_mgr queue_mgr; - unsigned reset_counter; - unsigned reset_counter_query; - uint32_t vram_lost_counter; - spinlock_t ring_lock; - struct dma_fence **fences; - struct amdgpu_ctx_ring rings[AMDGPU_MAX_RINGS]; - bool preamble_presented; - enum drm_sched_priority init_priority; - enum drm_sched_priority override_priority; - struct mutex lock; - atomic_t guilty; -}; - -struct amdgpu_ctx_mgr { - struct amdgpu_device *adev; - struct mutex lock; - /* protected by lock */ - struct idr ctx_handles; -}; - -struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); -int amdgpu_ctx_put(struct amdgpu_ctx *ctx); - -int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct dma_fence *fence, uint64_t *seq); -struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, - struct amdgpu_ring *ring, uint64_t seq); -void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, - enum drm_sched_priority priority); - -int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - -int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id); - -void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); -void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); - - -/* * file private structure */ @@ -690,271 +411,6 @@ struct amdgpu_fpriv { struct amdgpu_ctx_mgr ctx_mgr; }; -/* - * GFX stuff - */ -#include "clearstate_defs.h" - -struct amdgpu_rlc_funcs { - void (*enter_safe_mode)(struct amdgpu_device *adev); - void (*exit_safe_mode)(struct amdgpu_device *adev); -}; - -struct amdgpu_rlc { - /* for power gating */ - struct amdgpu_bo *save_restore_obj; - uint64_t save_restore_gpu_addr; - volatile uint32_t *sr_ptr; - const u32 *reg_list; - u32 reg_list_size; - /* for clear state */ - struct amdgpu_bo *clear_state_obj; - uint64_t clear_state_gpu_addr; - volatile uint32_t *cs_ptr; - const struct cs_section_def *cs_data; - u32 clear_state_size; - /* for cp tables */ - struct amdgpu_bo *cp_table_obj; - uint64_t cp_table_gpu_addr; - volatile uint32_t *cp_table_ptr; - u32 cp_table_size; - - /* safe mode for updating CG/PG state */ - bool in_safe_mode; - const struct amdgpu_rlc_funcs *funcs; - - /* for firmware data */ - u32 save_and_restore_offset; - u32 clear_state_descriptor_offset; - u32 avail_scratch_ram_locations; - u32 reg_restore_list_size; - u32 reg_list_format_start; - u32 reg_list_format_separate_start; - u32 starting_offsets_start; - u32 reg_list_format_size_bytes; - u32 reg_list_size_bytes; - u32 reg_list_format_direct_reg_list_length; - u32 save_restore_list_cntl_size_bytes; - u32 save_restore_list_gpm_size_bytes; - u32 save_restore_list_srm_size_bytes; - - u32 *register_list_format; - u32 *register_restore; - u8 *save_restore_list_cntl; - u8 *save_restore_list_gpm; - u8 *save_restore_list_srm; - - bool is_rlc_v2_1; -}; - -#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES - -struct amdgpu_mec { - struct amdgpu_bo *hpd_eop_obj; - u64 hpd_eop_gpu_addr; - struct amdgpu_bo *mec_fw_obj; - u64 mec_fw_gpu_addr; - u32 num_mec; - u32 num_pipe_per_mec; - u32 num_queue_per_pipe; - void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; - - /* These are the resources for which amdgpu takes ownership */ - DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); -}; - -struct amdgpu_kiq { - u64 eop_gpu_addr; - struct amdgpu_bo *eop_obj; - spinlock_t ring_lock; - struct amdgpu_ring ring; - struct amdgpu_irq_src irq; -}; - -/* - * GPU scratch registers structures, functions & helpers - */ -struct amdgpu_scratch { - unsigned num_reg; - uint32_t reg_base; - uint32_t free_mask; -}; - -/* - * GFX configurations - */ -#define AMDGPU_GFX_MAX_SE 4 -#define AMDGPU_GFX_MAX_SH_PER_SE 2 - -struct amdgpu_rb_config { - uint32_t rb_backend_disable; - uint32_t user_rb_backend_disable; - uint32_t raster_config; - uint32_t raster_config_1; -}; - -struct gb_addr_config { - uint16_t pipe_interleave_size; - uint8_t num_pipes; - uint8_t max_compress_frags; - uint8_t num_banks; - uint8_t num_se; - uint8_t num_rb_per_se; -}; - -struct amdgpu_gfx_config { - unsigned max_shader_engines; - unsigned max_tile_pipes; - unsigned max_cu_per_sh; - unsigned max_sh_per_se; - unsigned max_backends_per_se; - unsigned max_texture_channel_caches; - unsigned max_gprs; - unsigned max_gs_threads; - unsigned max_hw_contexts; - unsigned sc_prim_fifo_size_frontend; - unsigned sc_prim_fifo_size_backend; - unsigned sc_hiz_tile_fifo_size; - unsigned sc_earlyz_tile_fifo_size; - - unsigned num_tile_pipes; - unsigned backend_enable_mask; - unsigned mem_max_burst_length_bytes; - unsigned mem_row_size_in_kb; - unsigned shader_engine_tile_size; - unsigned num_gpus; - unsigned multi_gpu_tile_size; - unsigned mc_arb_ramcfg; - unsigned gb_addr_config; - unsigned num_rbs; - unsigned gs_vgt_table_depth; - unsigned gs_prim_buffer_depth; - - uint32_t tile_mode_array[32]; - uint32_t macrotile_mode_array[16]; - - struct gb_addr_config gb_addr_config_fields; - struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; - - /* gfx configure feature */ - uint32_t double_offchip_lds_buf; - /* cached value of DB_DEBUG2 */ - uint32_t db_debug2; -}; - -struct amdgpu_cu_info { - uint32_t simd_per_cu; - uint32_t max_waves_per_simd; - uint32_t wave_front_size; - uint32_t max_scratch_slots_per_cu; - uint32_t lds_size; - - /* total active CU number */ - uint32_t number; - uint32_t ao_cu_mask; - uint32_t ao_cu_bitmap[4][4]; - uint32_t bitmap[4][4]; -}; - -struct amdgpu_gfx_funcs { - /* get the gpu clock counter */ - uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); - void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance); - void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t *dst, int *no_fields); - void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t thread, uint32_t start, uint32_t size, uint32_t *dst); - void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t start, uint32_t size, uint32_t *dst); - void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue); -}; - -struct amdgpu_ngg_buf { - struct amdgpu_bo *bo; - uint64_t gpu_addr; - uint32_t size; - uint32_t bo_size; -}; - -enum { - NGG_PRIM = 0, - NGG_POS, - NGG_CNTL, - NGG_PARAM, - NGG_BUF_MAX -}; - -struct amdgpu_ngg { - struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; - uint32_t gds_reserve_addr; - uint32_t gds_reserve_size; - bool init; -}; - -struct sq_work { - struct work_struct work; - unsigned ih_data; -}; - -struct amdgpu_gfx { - struct mutex gpu_clock_mutex; - struct amdgpu_gfx_config config; - struct amdgpu_rlc rlc; - struct amdgpu_mec mec; - struct amdgpu_kiq kiq; - struct amdgpu_scratch scratch; - const struct firmware *me_fw; /* ME firmware */ - uint32_t me_fw_version; - const struct firmware *pfp_fw; /* PFP firmware */ - uint32_t pfp_fw_version; - const struct firmware *ce_fw; /* CE firmware */ - uint32_t ce_fw_version; - const struct firmware *rlc_fw; /* RLC firmware */ - uint32_t rlc_fw_version; - const struct firmware *mec_fw; /* MEC firmware */ - uint32_t mec_fw_version; - const struct firmware *mec2_fw; /* MEC2 firmware */ - uint32_t mec2_fw_version; - uint32_t me_feature_version; - uint32_t ce_feature_version; - uint32_t pfp_feature_version; - uint32_t rlc_feature_version; - uint32_t rlc_srlc_fw_version; - uint32_t rlc_srlc_feature_version; - uint32_t rlc_srlg_fw_version; - uint32_t rlc_srlg_feature_version; - uint32_t rlc_srls_fw_version; - uint32_t rlc_srls_feature_version; - uint32_t mec_feature_version; - uint32_t mec2_feature_version; - struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; - unsigned num_gfx_rings; - struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; - unsigned num_compute_rings; - struct amdgpu_irq_src eop_irq; - struct amdgpu_irq_src priv_reg_irq; - struct amdgpu_irq_src priv_inst_irq; - struct amdgpu_irq_src cp_ecc_error_irq; - struct amdgpu_irq_src sq_irq; - struct sq_work sq_work; - - /* gfx status */ - uint32_t gfx_current_status; - /* ce ram size*/ - unsigned ce_ram_size; - struct amdgpu_cu_info cu_info; - const struct amdgpu_gfx_funcs *funcs; - - /* reset mask */ - uint32_t grbm_soft_reset; - uint32_t srbm_soft_reset; - /* s3/s4 mask */ - bool in_suspend; - /* NGG */ - struct amdgpu_ngg ngg; - - /* pipe reservation */ - struct mutex pipe_reserve_mutex; - DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); -}; - int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned size, struct amdgpu_ib *ib); void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, @@ -986,7 +442,7 @@ struct amdgpu_cs_parser { /* scheduler job object */ struct amdgpu_job *job; - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; /* buffer objects */ struct ww_acquire_ctx ticket; @@ -1038,58 +494,6 @@ int amdgpu_device_wb_get(struct amdgpu_device *adev, u32 *wb); void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb); /* - * SDMA - */ -struct amdgpu_sdma_instance { - /* SDMA firmware */ - const struct firmware *fw; - uint32_t fw_version; - uint32_t feature_version; - - struct amdgpu_ring ring; - bool burst_nop; -}; - -struct amdgpu_sdma { - struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; -#ifdef CONFIG_DRM_AMDGPU_SI - //SI DMA has a difference trap irq number for the second engine - struct amdgpu_irq_src trap_irq_1; -#endif - struct amdgpu_irq_src trap_irq; - struct amdgpu_irq_src illegal_inst_irq; - int num_instances; - uint32_t srbm_soft_reset; -}; - -/* - * Firmware - */ -enum amdgpu_firmware_load_type { - AMDGPU_FW_LOAD_DIRECT = 0, - AMDGPU_FW_LOAD_SMU, - AMDGPU_FW_LOAD_PSP, -}; - -struct amdgpu_firmware { - struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM]; - enum amdgpu_firmware_load_type load_type; - struct amdgpu_bo *fw_buf; - unsigned int fw_size; - unsigned int max_ucodes; - /* firmwares are loaded by psp instead of smu from vega10 */ - const struct amdgpu_psp_funcs *funcs; - struct amdgpu_bo *rbuf; - struct mutex mutex; - - /* gpu info firmware data pointer */ - const struct firmware *gpu_info_fw; - - void *fw_buf_ptr; - uint64_t fw_buf_mc; -}; - -/* * Benchmarking */ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); @@ -1100,31 +504,6 @@ void amdgpu_benchmark(struct amdgpu_device *adev, int test_number); */ void amdgpu_test_moves(struct amdgpu_device *adev); - -/* - * amdgpu smumgr functions - */ -struct amdgpu_smumgr_funcs { - int (*check_fw_load_finish)(struct amdgpu_device *adev, uint32_t fwtype); - int (*request_smu_load_fw)(struct amdgpu_device *adev); - int (*request_smu_specific_fw)(struct amdgpu_device *adev, uint32_t fwtype); -}; - -/* - * amdgpu smumgr - */ -struct amdgpu_smumgr { - struct amdgpu_bo *toc_buf; - struct amdgpu_bo *smu_buf; - /* asic priv smu data */ - void *priv; - spinlock_t smu_lock; - /* smumgr functions */ - const struct amdgpu_smumgr_funcs *smumgr_funcs; - /* ucode loading complete flag */ - uint32_t fw_flags; -}; - /* * ASIC specific register table accessible by UMD */ @@ -1161,28 +540,16 @@ struct amdgpu_asic_funcs { struct amdgpu_ring *ring); /* check if the asic needs a full reset of if soft reset will work */ bool (*need_full_reset)(struct amdgpu_device *adev); + /* initialize doorbell layout for specific asic*/ + void (*init_doorbell_index)(struct amdgpu_device *adev); }; /* * IOCTL. */ -int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); -int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); @@ -1190,9 +557,6 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *fi int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data, struct drm_file *filp); -int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, - struct drm_file *filp); - /* VRAM scratch page for HDP bug, default vram page */ struct amdgpu_vram_scratch { struct amdgpu_bo *robj; @@ -1356,7 +720,6 @@ struct amdgpu_device { bool need_dma32; bool need_swiotlb; bool accel_working; - struct work_struct reset_work; struct notifier_block acpi_nb; struct amdgpu_i2c_chan *i2c_bus[AMDGPU_MAX_I2C_BUS]; struct amdgpu_debugfs debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS]; @@ -1477,9 +840,6 @@ struct amdgpu_device { u32 cg_flags; u32 pg_flags; - /* amdgpu smumgr */ - struct amdgpu_smumgr smu; - /* gfx */ struct amdgpu_gfx gfx; @@ -1504,6 +864,9 @@ struct amdgpu_device { /* GDS */ struct amdgpu_gds gds; + /* KFD */ + struct amdgpu_kfd_dev kfd; + /* display related functionality */ struct amdgpu_display_manager dm; @@ -1517,9 +880,6 @@ struct amdgpu_device { atomic64_t visible_pin_size; atomic64_t gart_pin_size; - /* amdkfd interface */ - struct kfd_dev *kfd; - /* soc15 register offset based on ip, instance and segment */ uint32_t *reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE]; @@ -1544,10 +904,17 @@ struct amdgpu_device { bool has_hw_reset; u8 reset_magic[AMDGPU_RESET_MAGIC_NUM]; + /* s3/s4 mask */ + bool in_suspend; + /* record last mm index being written through WREG32*/ unsigned long last_mm_index; bool in_gpu_reset; struct mutex lock_reset; + struct amdgpu_doorbell_index doorbell_index; + + int asic_reset_res; + struct work_struct xgmi_reset_work; }; static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev) @@ -1572,11 +939,6 @@ uint8_t amdgpu_mm_rreg8(struct amdgpu_device *adev, uint32_t offset); u32 amdgpu_io_rreg(struct amdgpu_device *adev, u32 reg); void amdgpu_io_wreg(struct amdgpu_device *adev, u32 reg, u32 v); -u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); -void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); -u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); -void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); - bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type); bool amdgpu_device_has_dc_support(struct amdgpu_device *adev); @@ -1638,11 +1000,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RREG32_IO(reg) amdgpu_io_rreg(adev, (reg)) #define WREG32_IO(reg, v) amdgpu_io_wreg(adev, (reg), (v)) -#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) -#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) -#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) -#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) - #define REG_FIELD_SHIFT(reg, field) reg##__##field##__SHIFT #define REG_FIELD_MASK(reg, field) reg##__##field##_MASK @@ -1666,22 +1023,6 @@ int emu_soc_asic_init(struct amdgpu_device *adev); #define RBIOS16(i) (RBIOS8(i) | (RBIOS8((i)+1) << 8)) #define RBIOS32(i) ((RBIOS16(i)) | (RBIOS16((i)+2) << 16)) -static inline struct amdgpu_sdma_instance * -amdgpu_get_sdma_instance(struct amdgpu_ring *ring) -{ - struct amdgpu_device *adev = ring->adev; - int i; - - for (i = 0; i < adev->sdma.num_instances; i++) - if (&adev->sdma.instance[i].ring == ring) - break; - - if (i < AMDGPU_MAX_SDMA_INSTANCES) - return &adev->sdma.instance[i]; - else - return NULL; -} - /* * ASICs macro. */ @@ -1700,74 +1041,17 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring) #define amdgpu_asic_flush_hdp(adev, r) (adev)->asic_funcs->flush_hdp((adev), (r)) #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r)) #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev)) -#define amdgpu_gmc_flush_gpu_tlb(adev, vmid) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid)) -#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) -#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) -#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) -#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) -#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) -#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) -#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) -#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) -#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) -#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) -#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) -#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) -#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) -#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) -#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) -#define amdgpu_ring_emit_ib(r, ib, vmid, c) (r)->funcs->emit_ib((r), (ib), (vmid), (c)) -#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) -#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) -#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) -#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) -#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) -#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) -#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) -#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) -#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) -#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) -#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) -#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) -#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) -#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) -#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) -#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) -#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev)) -#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) -#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) -#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) -#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) -#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) -#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h)) -#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h)) -#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev)) -#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev)) -#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async)) -#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) -#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) -#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) -#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) -#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) -#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) -#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) -#define amdgpu_gds_switch(adev, r, v, d, w, a) (adev)->gds.funcs->patch_gds_switch((r), (v), (d), (w), (a)) -#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) -#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q)) +#define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev)) /* Common functions */ +bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev); int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job* job, bool force); + struct amdgpu_job* job); void amdgpu_device_pci_config_reset(struct amdgpu_device *adev); bool amdgpu_device_need_post(struct amdgpu_device *adev); -void amdgpu_display_update_priority(struct amdgpu_device *adev); void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes, u64 num_vis_bytes); -void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_gmc *mc, u64 base); -void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_gmc *mc); int amdgpu_device_resize_fb_bar(struct amdgpu_device *adev); void amdgpu_device_program_register_sequence(struct amdgpu_device *adev, const u32 *registers, @@ -1845,6 +1129,9 @@ bool amdgpu_acpi_is_pcie_performance_request_supported(struct amdgpu_device *ade int amdgpu_acpi_pcie_performance_request(struct amdgpu_device *adev, u8 perf_req, bool advertise); int amdgpu_acpi_pcie_notify_device_ready(struct amdgpu_device *adev); + +void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, + struct amdgpu_dm_backlight_caps *caps); #else static inline int amdgpu_acpi_init(struct amdgpu_device *adev) { return 0; } static inline void amdgpu_acpi_fini(struct amdgpu_device *adev) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c index 71efcf38f11b..0a4fba196b84 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c @@ -116,136 +116,48 @@ static int acp_sw_fini(void *handle) return 0; } -/* power off a tile/block within ACP */ -static int acp_suspend_tile(void *cgs_dev, int tile) -{ - u32 val = 0; - u32 count = 0; - - if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { - pr_err("Invalid ACP tile : %d to suspend\n", tile); - return -1; - } - - val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); - val &= ACP_TILE_ON_MASK; - - if (val == 0x0) { - val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); - val = val | (1 << tile); - cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); - cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, - 0x500 + tile); - - count = ACP_TIMEOUT_LOOP; - while (true) { - val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 - + tile); - val = val & ACP_TILE_ON_MASK; - if (val == ACP_TILE_OFF_MASK) - break; - if (--count == 0) { - pr_err("Timeout reading ACP PGFSM status\n"); - return -ETIMEDOUT; - } - udelay(100); - } - - val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); - - val |= ACP_TILE_OFF_RETAIN_REG_MASK; - cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); - } - return 0; -} - -/* power on a tile/block within ACP */ -static int acp_resume_tile(void *cgs_dev, int tile) -{ - u32 val = 0; - u32 count = 0; - - if ((tile < ACP_TILE_P1) || (tile > ACP_TILE_DSP2)) { - pr_err("Invalid ACP tile to resume\n"); - return -1; - } - - val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 + tile); - val = val & ACP_TILE_ON_MASK; - - if (val != 0x0) { - cgs_write_register(cgs_dev, mmACP_PGFSM_CONFIG_REG, - 0x600 + tile); - count = ACP_TIMEOUT_LOOP; - while (true) { - val = cgs_read_register(cgs_dev, mmACP_PGFSM_READ_REG_0 - + tile); - val = val & ACP_TILE_ON_MASK; - if (val == 0x0) - break; - if (--count == 0) { - pr_err("Timeout reading ACP PGFSM status\n"); - return -ETIMEDOUT; - } - udelay(100); - } - val = cgs_read_register(cgs_dev, mmACP_PGFSM_RETAIN_REG); - if (tile == ACP_TILE_P1) - val = val & (ACP_TILE_P1_MASK); - else if (tile == ACP_TILE_P2) - val = val & (ACP_TILE_P2_MASK); - - cgs_write_register(cgs_dev, mmACP_PGFSM_RETAIN_REG, val); - } - return 0; -} - struct acp_pm_domain { - void *cgs_dev; + void *adev; struct generic_pm_domain gpd; }; static int acp_poweroff(struct generic_pm_domain *genpd) { - int i, ret; struct acp_pm_domain *apd; + struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); if (apd != NULL) { - /* Donot return abruptly if any of power tile fails to suspend. - * Log it and continue powering off other tile - */ - for (i = 4; i >= 0 ; i--) { - ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_P1 + i); - if (ret) - pr_err("ACP tile %d tile suspend failed\n", i); - } + adev = apd->adev; + /* call smu to POWER GATE ACP block + * smu will + * 1. turn off the acp clock + * 2. power off the acp tiles + * 3. check and enter ulv state + */ + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); } return 0; } static int acp_poweron(struct generic_pm_domain *genpd) { - int i, ret; struct acp_pm_domain *apd; + struct amdgpu_device *adev; apd = container_of(genpd, struct acp_pm_domain, gpd); if (apd != NULL) { - for (i = 0; i < 2; i++) { - ret = acp_resume_tile(apd->cgs_dev, ACP_TILE_P1 + i); - if (ret) { - pr_err("ACP tile %d resume failed\n", i); - break; - } - } - - /* Disable DSPs which are not going to be used */ - for (i = 0; i < 3; i++) { - ret = acp_suspend_tile(apd->cgs_dev, ACP_TILE_DSP0 + i); - /* Continue suspending other DSP, even if one fails */ - if (ret) - pr_err("ACP DSP %d suspend failed\n", i); - } + adev = apd->adev; + /* call smu to UNGATE ACP block + * smu will + * 1. exit ulv + * 2. turn on acp clock + * 3. power on acp tiles + */ + if (adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); } return 0; } @@ -289,30 +201,31 @@ static int acp_hw_init(void *handle) r = amd_acp_hw_init(adev->acp.cgs_device, ip_block->version->major, ip_block->version->minor); /* -ENODEV means board uses AZ rather than ACP */ - if (r == -ENODEV) + if (r == -ENODEV) { + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; - else if (r) + } else if (r) { return r; + } if (adev->rmmio_size == 0 || adev->rmmio_size < 0x5289) return -EINVAL; acp_base = adev->rmmio_base; - if (adev->asic_type != CHIP_STONEY) { - adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); - if (adev->acp.acp_genpd == NULL) - return -ENOMEM; - adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; - adev->acp.acp_genpd->gpd.power_off = acp_poweroff; - adev->acp.acp_genpd->gpd.power_on = acp_poweron; + adev->acp.acp_genpd = kzalloc(sizeof(struct acp_pm_domain), GFP_KERNEL); + if (adev->acp.acp_genpd == NULL) + return -ENOMEM; + + adev->acp.acp_genpd->gpd.name = "ACP_AUDIO"; + adev->acp.acp_genpd->gpd.power_off = acp_poweroff; + adev->acp.acp_genpd->gpd.power_on = acp_poweron; - adev->acp.acp_genpd->cgs_dev = adev->acp.cgs_device; + adev->acp.acp_genpd->adev = adev; - pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); - } + pm_genpd_init(&adev->acp.acp_genpd->gpd, NULL, false); adev->acp.acp_cell = kcalloc(ACP_DEVS, sizeof(struct mfd_cell), GFP_KERNEL); @@ -429,17 +342,16 @@ static int acp_hw_init(void *handle) if (r) return r; - if (adev->asic_type != CHIP_STONEY) { - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); - if (r) { - dev_err(dev, "Failed to add dev to genpd\n"); - return r; - } + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + r = pm_genpd_add_device(&adev->acp.acp_genpd->gpd, dev); + if (r) { + dev_err(dev, "Failed to add dev to genpd\n"); + return r; } } + /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -497,8 +409,10 @@ static int acp_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* return early if no ACP */ - if (!adev->acp.acp_cell) + if (!adev->acp.acp_genpd) { + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; + } /* Assert Soft reset of ACP */ val = cgs_read_register(adev->acp.cgs_device, mmACP_SOFT_RESET); @@ -536,19 +450,17 @@ static int acp_hw_fini(void *handle) udelay(100); } - if (adev->acp.acp_genpd) { - for (i = 0; i < ACP_DEVS ; i++) { - dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); - ret = pm_genpd_remove_device(dev); - /* If removal fails, dont giveup and try rest */ - if (ret) - dev_err(dev, "remove dev from genpd failed\n"); - } - kfree(adev->acp.acp_genpd); + for (i = 0; i < ACP_DEVS ; i++) { + dev = get_mfd_cell_dev(adev->acp.acp_cell[i].name, i); + ret = pm_genpd_remove_device(dev); + /* If removal fails, dont giveup and try rest */ + if (ret) + dev_err(dev, "remove dev from genpd failed\n"); } mfd_remove_devices(adev->acp.parent); kfree(adev->acp.acp_res); + kfree(adev->acp.acp_genpd); kfree(adev->acp.acp_cell); return 0; @@ -556,11 +468,21 @@ static int acp_hw_fini(void *handle) static int acp_suspend(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* power up on suspend */ + if (!adev->acp.acp_cell) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, false); return 0; } static int acp_resume(void *handle) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + /* power down again on resume */ + if (!adev->acp.acp_cell) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, true); return 0; } @@ -593,6 +515,13 @@ static int acp_set_clockgating_state(void *handle, static int acp_set_powergating_state(void *handle, enum amd_powergating_state state) { + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + bool enable = state == AMD_PG_STATE_GATE ? true : false; + + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_ACP, enable); + return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c index 353993218f21..4376b17ca594 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c @@ -31,6 +31,7 @@ #include <drm/drm_crtc_helper.h> #include "amdgpu.h" #include "amdgpu_pm.h" +#include "amdgpu_display.h" #include "amd_acpi.h" #include "atom.h" @@ -40,28 +41,21 @@ struct amdgpu_atif_notification_cfg { }; struct amdgpu_atif_notifications { - bool display_switch; - bool expansion_mode_change; bool thermal_state; bool forced_power_state; bool system_power_state; - bool display_conf_change; - bool px_gfx_switch; bool brightness_change; bool dgpu_display_event; + bool gpu_package_power_limit; }; struct amdgpu_atif_functions { bool system_params; bool sbios_requests; - bool select_active_disp; - bool lid_state; - bool get_tv_standard; - bool set_tv_standard; - bool get_panel_expansion_mode; - bool set_panel_expansion_mode; bool temperature_change; - bool graphics_device_types; + bool query_backlight_transfer_characteristics; + bool ready_to_undock; + bool external_gpu_information; }; struct amdgpu_atif { @@ -71,6 +65,7 @@ struct amdgpu_atif { struct amdgpu_atif_functions functions; struct amdgpu_atif_notification_cfg notification_cfg; struct amdgpu_encoder *encoder_for_bl; + struct amdgpu_dm_backlight_caps backlight_caps; }; /* Call the ATIF method @@ -136,15 +131,12 @@ static union acpi_object *amdgpu_atif_call(struct amdgpu_atif *atif, */ static void amdgpu_atif_parse_notification(struct amdgpu_atif_notifications *n, u32 mask) { - n->display_switch = mask & ATIF_DISPLAY_SWITCH_REQUEST_SUPPORTED; - n->expansion_mode_change = mask & ATIF_EXPANSION_MODE_CHANGE_REQUEST_SUPPORTED; n->thermal_state = mask & ATIF_THERMAL_STATE_CHANGE_REQUEST_SUPPORTED; n->forced_power_state = mask & ATIF_FORCED_POWER_STATE_CHANGE_REQUEST_SUPPORTED; n->system_power_state = mask & ATIF_SYSTEM_POWER_SOURCE_CHANGE_REQUEST_SUPPORTED; - n->display_conf_change = mask & ATIF_DISPLAY_CONF_CHANGE_REQUEST_SUPPORTED; - n->px_gfx_switch = mask & ATIF_PX_GFX_SWITCH_REQUEST_SUPPORTED; n->brightness_change = mask & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST_SUPPORTED; n->dgpu_display_event = mask & ATIF_DGPU_DISPLAY_EVENT_SUPPORTED; + n->gpu_package_power_limit = mask & ATIF_GPU_PACKAGE_POWER_LIMIT_REQUEST_SUPPORTED; } /** @@ -161,14 +153,11 @@ static void amdgpu_atif_parse_functions(struct amdgpu_atif_functions *f, u32 mas { f->system_params = mask & ATIF_GET_SYSTEM_PARAMETERS_SUPPORTED; f->sbios_requests = mask & ATIF_GET_SYSTEM_BIOS_REQUESTS_SUPPORTED; - f->select_active_disp = mask & ATIF_SELECT_ACTIVE_DISPLAYS_SUPPORTED; - f->lid_state = mask & ATIF_GET_LID_STATE_SUPPORTED; - f->get_tv_standard = mask & ATIF_GET_TV_STANDARD_FROM_CMOS_SUPPORTED; - f->set_tv_standard = mask & ATIF_SET_TV_STANDARD_IN_CMOS_SUPPORTED; - f->get_panel_expansion_mode = mask & ATIF_GET_PANEL_EXPANSION_MODE_FROM_CMOS_SUPPORTED; - f->set_panel_expansion_mode = mask & ATIF_SET_PANEL_EXPANSION_MODE_IN_CMOS_SUPPORTED; f->temperature_change = mask & ATIF_TEMPERATURE_CHANGE_NOTIFICATION_SUPPORTED; - f->graphics_device_types = mask & ATIF_GET_GRAPHICS_DEVICE_TYPES_SUPPORTED; + f->query_backlight_transfer_characteristics = + mask & ATIF_QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS_SUPPORTED; + f->ready_to_undock = mask & ATIF_READY_TO_UNDOCK_NOTIFICATION_SUPPORTED; + f->external_gpu_information = mask & ATIF_GET_EXTERNAL_GPU_INFORMATION_SUPPORTED; } /** @@ -310,6 +299,65 @@ out: } /** + * amdgpu_atif_query_backlight_caps - get min and max backlight input signal + * + * @handle: acpi handle + * + * Execute the QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS ATIF function + * to determine the acceptable range of backlight values + * + * Backlight_caps.caps_valid will be set to true if the query is successful + * + * The input signals are in range 0-255 + * + * This function assumes the display with backlight is the first LCD + * + * Returns 0 on success, error on failure. + */ +static int amdgpu_atif_query_backlight_caps(struct amdgpu_atif *atif) +{ + union acpi_object *info; + struct atif_qbtc_output characteristics; + struct atif_qbtc_arguments arguments; + struct acpi_buffer params; + size_t size; + int err = 0; + + arguments.size = sizeof(arguments); + arguments.requested_display = ATIF_QBTC_REQUEST_LCD1; + + params.length = sizeof(arguments); + params.pointer = (void *)&arguments; + + info = amdgpu_atif_call(atif, + ATIF_FUNCTION_QUERY_BRIGHTNESS_TRANSFER_CHARACTERISTICS, + ¶ms); + if (!info) { + err = -EIO; + goto out; + } + + size = *(u16 *) info->buffer.pointer; + if (size < 10) { + err = -EINVAL; + goto out; + } + + memset(&characteristics, 0, sizeof(characteristics)); + size = min(sizeof(characteristics), size); + memcpy(&characteristics, info->buffer.pointer, size); + + atif->backlight_caps.caps_valid = true; + atif->backlight_caps.min_input_signal = + characteristics.min_input_signal; + atif->backlight_caps.max_input_signal = + characteristics.max_input_signal; +out: + kfree(info); + return err; +} + +/** * amdgpu_atif_get_sbios_requests - get requested sbios event * * @handle: acpi handle @@ -358,7 +406,9 @@ out: * * Checks the acpi event and if it matches an atif event, * handles it. - * Returns NOTIFY code + * + * Returns: + * NOTIFY_BAD or NOTIFY_DONE, depending on the event. */ static int amdgpu_atif_handler(struct amdgpu_device *adev, struct acpi_bus_event *event) @@ -372,11 +422,16 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, if (strcmp(event->device_class, ACPI_VIDEO_CLASS) != 0) return NOTIFY_DONE; + /* Is this actually our event? */ if (!atif || !atif->notification_cfg.enabled || - event->type != atif->notification_cfg.command_code) - /* Not our event */ - return NOTIFY_DONE; + event->type != atif->notification_cfg.command_code) { + /* These events will generate keypresses otherwise */ + if (event->type == ACPI_VIDEO_NOTIFY_PROBE) + return NOTIFY_BAD; + else + return NOTIFY_DONE; + } if (atif->functions.sbios_requests) { struct atif_sbios_requests req; @@ -385,7 +440,7 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev, count = amdgpu_atif_get_sbios_requests(atif, &req); if (count <= 0) - return NOTIFY_DONE; + return NOTIFY_BAD; DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count); @@ -791,6 +846,17 @@ int amdgpu_acpi_init(struct amdgpu_device *adev) } } + if (atif->functions.query_backlight_transfer_characteristics) { + ret = amdgpu_atif_query_backlight_caps(atif); + if (ret) { + DRM_DEBUG_DRIVER("Call to QUERY_BACKLIGHT_TRANSFER_CHARACTERISTICS failed: %d\n", + ret); + atif->backlight_caps.caps_valid = false; + } + } else { + atif->backlight_caps.caps_valid = false; + } + out: adev->acpi_nb.notifier_call = amdgpu_acpi_event; register_acpi_notifier(&adev->acpi_nb); @@ -798,6 +864,18 @@ out: return ret; } +void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev, + struct amdgpu_dm_backlight_caps *caps) +{ + if (!adev->atif) { + caps->caps_valid = false; + return; + } + caps->caps_valid = adev->atif->backlight_caps.caps_valid; + caps->min_input_signal = adev->atif->backlight_caps.min_input_signal; + caps->max_input_signal = adev->atif->backlight_caps.max_input_signal; +} + /** * amdgpu_acpi_fini - tear down driver acpi support * @@ -808,6 +886,5 @@ out: void amdgpu_acpi_fini(struct amdgpu_device *adev) { unregister_acpi_notifier(&adev->acpi_nb); - if (adev->atif) - kfree(adev->atif); + kfree(adev->atif); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c index 0c791e35acf0..2dfaf158ef07 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c @@ -26,55 +26,43 @@ #include "amdgpu.h" #include "amdgpu_gfx.h" #include <linux/module.h> +#include <linux/dma-buf.h> const struct kgd2kfd_calls *kgd2kfd; -bool (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); static const unsigned int compute_vmid_bitmap = 0xFF00; +/* Total memory size in system memory and all GPU VRAM. Used to + * estimate worst case amount of memory to reserve for page tables + */ +uint64_t amdgpu_amdkfd_total_mem_size; + int amdgpu_amdkfd_init(void) { + struct sysinfo si; int ret; -#if defined(CONFIG_HSA_AMD_MODULE) - int (*kgd2kfd_init_p)(unsigned int, const struct kgd2kfd_calls**); - - kgd2kfd_init_p = symbol_request(kgd2kfd_init); - - if (kgd2kfd_init_p == NULL) - return -ENOENT; - - ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd); - if (ret) { - symbol_put(kgd2kfd_init); - kgd2kfd = NULL; - } - - -#elif defined(CONFIG_HSA_AMD) + si_meminfo(&si); + amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh; + amdgpu_amdkfd_total_mem_size *= si.mem_unit; +#ifdef CONFIG_HSA_AMD ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd); if (ret) kgd2kfd = NULL; - + amdgpu_amdkfd_gpuvm_init_mem_limits(); #else kgd2kfd = NULL; ret = -ENOENT; #endif -#if defined(CONFIG_HSA_AMD_MODULE) || defined(CONFIG_HSA_AMD) - amdgpu_amdkfd_gpuvm_init_mem_limits(); -#endif - return ret; } void amdgpu_amdkfd_fini(void) { - if (kgd2kfd) { + if (kgd2kfd) kgd2kfd->exit(); - symbol_put(kgd2kfd_init); - } } void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) @@ -96,9 +84,12 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) case CHIP_FIJI: case CHIP_POLARIS10: case CHIP_POLARIS11: + case CHIP_POLARIS12: kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions(); break; case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: case CHIP_RAVEN: kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions(); break; @@ -107,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev) return; } - adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev, - adev->pdev, kfd2kgd); + adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev, + adev->pdev, kfd2kgd); + + if (adev->kfd.dev) + amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size; } /** @@ -146,16 +140,17 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev, void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) { - int i; + int i, n; int last_valid_bit; - if (adev->kfd) { + + if (adev->kfd.dev) { struct kgd2kfd_shared_resources gpu_resources = { .compute_vmid_bitmap = compute_vmid_bitmap, .num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec, .num_queue_per_pipe = adev->gfx.mec.num_queue_per_pipe, .gpuvm_size = min(adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT, - AMDGPU_VA_HOLE_START), + AMDGPU_GMC_HOLE_START), .drm_render_minor = adev->ddev->render->index }; @@ -166,7 +161,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) KGD_MAX_QUEUES); /* remove the KIQ bit as well */ - if (adev->gfx.kiq.ring.ready) + if (adev->gfx.kiq.ring.sched.ready) clear_bit(amdgpu_gfx_queue_to_bit(adev, adev->gfx.kiq.ring.me - 1, adev->gfx.kiq.ring.pipe, @@ -185,7 +180,15 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) &gpu_resources.doorbell_physical_address, &gpu_resources.doorbell_aperture_size, &gpu_resources.doorbell_start_offset); - if (adev->asic_type >= CHIP_VEGA10) { + + if (adev->asic_type < CHIP_VEGA10) { + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); + return; + } + + n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8; + + for (i = 0; i < n; i += 2) { /* On SOC15 the BIF is involved in routing * doorbells using the low 12 bits of the * address. Communicate the assignments to @@ -193,52 +196,52 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev) * process in case of 64-bit doorbells so we * can use each doorbell assignment twice. */ - gpu_resources.sdma_doorbell[0][0] = - AMDGPU_DOORBELL64_sDMA_ENGINE0; - gpu_resources.sdma_doorbell[0][1] = - AMDGPU_DOORBELL64_sDMA_ENGINE0 + 0x200; - gpu_resources.sdma_doorbell[1][0] = - AMDGPU_DOORBELL64_sDMA_ENGINE1; - gpu_resources.sdma_doorbell[1][1] = - AMDGPU_DOORBELL64_sDMA_ENGINE1 + 0x200; - /* Doorbells 0x0f0-0ff and 0x2f0-2ff are reserved for - * SDMA, IH and VCN. So don't use them for the CP. - */ - gpu_resources.reserved_doorbell_mask = 0x1f0; - gpu_resources.reserved_doorbell_val = 0x0f0; + gpu_resources.sdma_doorbell[0][i] = + adev->doorbell_index.sdma_engine0 + (i >> 1); + gpu_resources.sdma_doorbell[0][i+1] = + adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1); + gpu_resources.sdma_doorbell[1][i] = + adev->doorbell_index.sdma_engine1 + (i >> 1); + gpu_resources.sdma_doorbell[1][i+1] = + adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1); } + /* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for + * SDMA, IH and VCN. So don't use them for the CP. + */ + gpu_resources.reserved_doorbell_mask = 0x1e0; + gpu_resources.reserved_doorbell_val = 0x0e0; - kgd2kfd->device_init(adev->kfd, &gpu_resources); + kgd2kfd->device_init(adev->kfd.dev, &gpu_resources); } } void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev) { - if (adev->kfd) { - kgd2kfd->device_exit(adev->kfd); - adev->kfd = NULL; + if (adev->kfd.dev) { + kgd2kfd->device_exit(adev->kfd.dev); + adev->kfd.dev = NULL; } } void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev, const void *ih_ring_entry) { - if (adev->kfd) - kgd2kfd->interrupt(adev->kfd, ih_ring_entry); + if (adev->kfd.dev) + kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry); } void amdgpu_amdkfd_suspend(struct amdgpu_device *adev) { - if (adev->kfd) - kgd2kfd->suspend(adev->kfd); + if (adev->kfd.dev) + kgd2kfd->suspend(adev->kfd.dev); } int amdgpu_amdkfd_resume(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->resume(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->resume(adev->kfd.dev); return r; } @@ -247,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->pre_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->pre_reset(adev->kfd.dev); return r; } @@ -257,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev) { int r = 0; - if (adev->kfd) - r = kgd2kfd->post_reset(adev->kfd); + if (adev->kfd.dev) + r = kgd2kfd->post_reset(adev->kfd.dev); return r; } @@ -267,12 +270,13 @@ void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - amdgpu_device_gpu_recover(adev, NULL, false); + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL); } -int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9) +int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr, bool mqd_gfx9) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_bo *bo = NULL; @@ -342,7 +346,7 @@ allocate_mem_reserve_bo_failed: return r; } -void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) +void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) { struct amdgpu_bo *bo = (struct amdgpu_bo *) mem_obj; @@ -353,8 +357,8 @@ void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj) amdgpu_bo_unref(&(bo)); } -void get_local_mem_info(struct kgd_dev *kgd, - struct kfd_local_mem_info *mem_info) +void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; uint64_t address_mask = adev->dev->dma_mask ? ~*adev->dev->dma_mask : @@ -385,7 +389,7 @@ void get_local_mem_info(struct kgd_dev *kgd, mem_info->mem_clk_max = 100; } -uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -394,7 +398,7 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) return 0; } -uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -407,7 +411,7 @@ uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) return 100; } -void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) +void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; struct amdgpu_cu_info acu_info = adev->gfx.cu_info; @@ -430,6 +434,62 @@ void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info) cu_info->lds_size = acu_info.lds_size; } +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dma_buf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct dma_buf *dma_buf; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + uint64_t metadata_flags; + int r = -EINVAL; + + dma_buf = dma_buf_get(dma_buf_fd); + if (IS_ERR(dma_buf)) + return PTR_ERR(dma_buf); + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + goto out_put; + + obj = dma_buf->priv; + if (obj->dev->driver != adev->ddev->driver) + /* Can't handle buffers from different drivers */ + goto out_put; + + adev = obj->dev->dev_private; + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + goto out_put; + + r = 0; + if (dma_buf_kgd) + *dma_buf_kgd = (struct kgd_dev *)adev; + if (bo_size) + *bo_size = amdgpu_bo_size(bo); + if (metadata_size) + *metadata_size = bo->metadata_size; + if (metadata_buffer) + r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size, + metadata_size, &metadata_flags); + if (flags) { + *flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT; + + if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) + *flags |= ALLOC_MEM_FLAGS_PUBLIC; + } + +out_put: + dma_buf_put(dma_buf); + return r; +} + uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; @@ -437,6 +497,13 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd) return amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]); } +uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + + return adev->gmc.xgmi.hive_id; +} + int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine, uint32_t vmid, uint64_t gpu_addr, uint32_t *ib_cmd, uint32_t ib_len) @@ -496,13 +563,16 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle) { struct amdgpu_device *adev = (struct amdgpu_device *)kgd; - amdgpu_dpm_switch_power_profile(adev, - PP_SMC_POWER_PROFILE_COMPUTE, !idle); + if (adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->switch_power_profile) + amdgpu_dpm_switch_power_profile(adev, + PP_SMC_POWER_PROFILE_COMPUTE, + !idle); } bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) { - if (adev->kfd) { + if (adev->kfd.dev) { if ((1 << vmid) & compute_vmid_bitmap) return true; } @@ -510,13 +580,13 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid) return false; } -#if !defined(CONFIG_HSA_AMD_MODULE) && !defined(CONFIG_HSA_AMD) +#ifndef CONFIG_HSA_AMD bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm) { return false; } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h index cc9aeab5468c..70429f7aa9a8 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h @@ -27,7 +27,6 @@ #include <linux/types.h> #include <linux/mm.h> -#include <linux/mmu_context.h> #include <linux/workqueue.h> #include <kgd_kfd_interface.h> #include <drm/ttm/ttm_execbuf_util.h> @@ -35,6 +34,7 @@ #include "amdgpu_vm.h" extern const struct kgd2kfd_calls *kgd2kfd; +extern uint64_t amdgpu_amdkfd_total_mem_size; struct amdgpu_device; @@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence { char timeline_name[TASK_COMM_LEN]; }; +struct amdgpu_kfd_dev { + struct kfd_dev *dev; + uint64_t vram_used; +}; + struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context, struct mm_struct *mm); bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm); @@ -134,17 +139,23 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev); void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd); /* Shared API */ -int alloc_gtt_mem(struct kgd_dev *kgd, size_t size, - void **mem_obj, uint64_t *gpu_addr, - void **cpu_ptr, bool mqd_gfx9); -void free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); -void get_local_mem_info(struct kgd_dev *kgd, - struct kfd_local_mem_info *mem_info); -uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); - -uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); -void get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size, + void **mem_obj, uint64_t *gpu_addr, + void **cpu_ptr, bool mqd_gfx9); +void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj); +void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd, + struct kfd_local_mem_info *mem_info); +uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd); + +uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd); +void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info); +int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd, + struct kgd_dev **dmabuf_kgd, + uint64_t *bo_size, void *metadata_buffer, + size_t buffer_size, uint32_t *metadata_size, + uint32_t *flags); uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); +uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd); #define read_user_wptr(mmptr, wptr, dst) \ ({ \ @@ -162,17 +173,18 @@ uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd); }) /* GPUVM API */ -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid, + void **vm, void **process_info, struct dma_fence **ef); int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, + struct file *filp, unsigned int pasid, void **vm, void **process_info, struct dma_fence **ef); void amdgpu_amdkfd_gpuvm_destroy_cb(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm); -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm); +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm); int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( struct kgd_dev *kgd, uint64_t va, uint64_t size, void *vm, struct kgd_mem **mem, @@ -193,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info, int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, struct kfd_vm_fault_info *info); +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dmabuf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset); + void amdgpu_amdkfd_gpuvm_init_mem_limits(void); -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo); +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo); #endif /* AMDGPU_AMDKFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c index 9803b91f3e77..ff7fac7df34b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c @@ -23,6 +23,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -142,7 +143,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd); @@ -173,13 +174,6 @@ static int get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -200,27 +194,10 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, .read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void) @@ -873,7 +850,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -881,7 +858,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, pr_err("trying to set page table base for wrong VMID\n"); return; } - WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, + lower_32_bits(page_table_base)); } static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c index f6e53e9352bd..56ea929f524b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c @@ -24,6 +24,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -45,8 +46,6 @@ enum hqd_dequeue_request_type { RESET_WAVES }; -struct vi_sdma_mqd; - /* * Register access functions */ @@ -100,7 +99,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid); static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid); @@ -130,13 +129,6 @@ static int get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -159,26 +151,9 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void) @@ -281,7 +256,8 @@ static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id) lock_srbm(kgd, mec, pipe, 0, 0); - WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK); + WREG32(mmCPC_INT_CNTL, CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK | + CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK); unlock_srbm(kgd); @@ -833,7 +809,7 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -841,7 +817,8 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, pr_err("trying to set page table base for wrong VMID\n"); return; } - WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, page_table_base); + WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + vmid - 8, + lower_32_bits(page_table_base)); } static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c index 8efedfcb9dfc..5c51d4910650 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c @@ -26,6 +26,7 @@ #include <linux/fdtable.h> #include <linux/uaccess.h> #include <linux/firmware.h> +#include <linux/mmu_context.h> #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_amdkfd.h" @@ -46,38 +47,9 @@ #include "v9_structs.h" #include "soc15.h" #include "soc15d.h" +#include "mmhub_v1_0.h" +#include "gfxhub_v1_0.h" -/* HACK: MMHUB and GC both have VM-related register with the same - * names but different offsets. Define the MMHUB register we need here - * with a prefix. A proper solution would be to move the functions - * programming these registers into gfx_v9_0.c and mmhub_v1_0.c - * respectively. - */ -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ 0x06f3 -#define mmMMHUB_VM_INVALIDATE_ENG16_REQ_BASE_IDX 0 - -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK 0x0705 -#define mmMMHUB_VM_INVALIDATE_ENG16_ACK_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32 0x072b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32 0x072c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32 0x074b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32 0x074c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32 0x076b -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32_BASE_IDX 0 -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32 0x076c -#define mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32_BASE_IDX 0 - -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32 0x0727 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32_BASE_IDX 0 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32 0x0728 -#define mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32_BASE_IDX 0 #define V9_PIPE_PER_MEC (4) #define V9_QUEUES_PER_PIPE_MEC (8) @@ -138,7 +110,7 @@ static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd, static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, uint8_t vmid); static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base); + uint64_t page_table_base); static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type); static void set_scratch_backing_va(struct kgd_dev *kgd, uint64_t va, uint32_t vmid); @@ -167,13 +139,6 @@ static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd, } static const struct kfd2kgd_calls kfd2kgd = { - .init_gtt_mem_allocation = alloc_gtt_mem, - .free_gtt_mem = free_gtt_mem, - .get_local_mem_info = get_local_mem_info, - .get_gpu_clock_counter = get_gpu_clock_counter, - .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, - .alloc_pasid = amdgpu_pasid_alloc, - .free_pasid = amdgpu_pasid_free, .program_sh_mem_settings = kgd_program_sh_mem_settings, .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, .init_interrupts = kgd_init_interrupts, @@ -196,25 +161,10 @@ static const struct kfd2kgd_calls kfd2kgd = { .get_fw_version = get_fw_version, .set_scratch_backing_va = set_scratch_backing_va, .get_tile_config = amdgpu_amdkfd_get_tile_config, - .get_cu_info = get_cu_info, - .get_vram_usage = amdgpu_amdkfd_get_vram_usage, - .create_process_vm = amdgpu_amdkfd_gpuvm_create_process_vm, - .acquire_process_vm = amdgpu_amdkfd_gpuvm_acquire_process_vm, - .destroy_process_vm = amdgpu_amdkfd_gpuvm_destroy_process_vm, - .get_process_page_dir = amdgpu_amdkfd_gpuvm_get_process_page_dir, .set_vm_context_page_table_base = set_vm_context_page_table_base, - .alloc_memory_of_gpu = amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu, - .free_memory_of_gpu = amdgpu_amdkfd_gpuvm_free_memory_of_gpu, - .map_memory_to_gpu = amdgpu_amdkfd_gpuvm_map_memory_to_gpu, - .unmap_memory_to_gpu = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu, - .sync_memory = amdgpu_amdkfd_gpuvm_sync_memory, - .map_gtt_bo_to_kernel = amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel, - .restore_process_bos = amdgpu_amdkfd_gpuvm_restore_process_bos, .invalidate_tlbs = invalidate_tlbs, .invalidate_tlbs_vmid = invalidate_tlbs_vmid, - .submit_ib = amdgpu_amdkfd_submit_ib, - .gpu_recover = amdgpu_amdkfd_gpu_reset, - .set_compute_idle = amdgpu_amdkfd_set_compute_idle + .get_hive_id = amdgpu_amdkfd_get_hive_id, }; struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void) @@ -783,15 +733,6 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd, static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) { struct amdgpu_device *adev = (struct amdgpu_device *) kgd; - uint32_t req = (1 << vmid) | - (0 << VM_INVALIDATE_ENG16_REQ__FLUSH_TYPE__SHIFT) | /* legacy */ - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PTES_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE0_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE1_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L2_PDE2_MASK | - VM_INVALIDATE_ENG16_REQ__INVALIDATE_L1_PTES_MASK; - - mutex_lock(&adev->srbm_mutex); /* Use legacy mode tlb invalidation. * @@ -808,34 +749,7 @@ static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid) * TODO 2: support range-based invalidation, requires kfg2kgd * interface change */ - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_LO32), - 0xffffffff); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ADDR_RANGE_HI32), - 0x0000001f); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_REQ), req); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_INVALIDATE_ENG16_REQ), - req); - - while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmVM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) - cpu_relax(); - - while (!(RREG32(SOC15_REG_OFFSET(MMHUB, 0, - mmMMHUB_VM_INVALIDATE_ENG16_ACK)) & - (1 << vmid))) - cpu_relax(); - - mutex_unlock(&adev->srbm_mutex); - + amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0); } static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid) @@ -874,7 +788,7 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid) if (adev->in_gpu_reset) return -EIO; - if (ring->ready) + if (ring->sched.ready) return invalidate_tlbs_with_kiq(adev, pasid); for (vmid = 0; vmid < 16; vmid++) { @@ -1011,11 +925,9 @@ static uint16_t get_fw_version(struct kgd_dev *kgd, enum kgd_engine_type type) } static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, - uint32_t page_table_base) + uint64_t page_table_base) { struct amdgpu_device *adev = get_amdgpu_device(kgd); - uint64_t base = (uint64_t)page_table_base << PAGE_SHIFT | - AMDGPU_PTE_VALID; if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) { pr_err("trying to set page table base for wrong VMID %u\n", @@ -1027,25 +939,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid, * now, all processes share the same address space size, like * on GFX8 and older. */ - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); - - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmMMHUB_VM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0); - - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2), - lower_32_bits(adev->vm_manager.max_pfn - 1)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2), - upper_32_bits(adev->vm_manager.max_pfn - 1)); + mmhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base)); - WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base)); + gfxhub_v1_0_setup_vm_pt_regs(adev, vmid, page_table_base); } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index f92597c292fe..be1ab43473c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -25,6 +25,7 @@ #include <linux/list.h> #include <linux/pagemap.h> #include <linux/sched/mm.h> +#include <linux/dma-buf.h> #include <drm/drmP.h> #include "amdgpu_object.h" #include "amdgpu_vm.h" @@ -46,9 +47,9 @@ /* Impose limit on how much memory KFD can use */ static struct { uint64_t max_system_mem_limit; - uint64_t max_userptr_mem_limit; + uint64_t max_ttm_mem_limit; int64_t system_mem_used; - int64_t userptr_mem_used; + int64_t ttm_mem_used; spinlock_t mem_limit_lock; } kfd_mem_limit; @@ -90,8 +91,8 @@ static bool check_if_add_bo_to_vm(struct amdgpu_vm *avm, } /* Set memory usage limits. Current, limits are - * System (kernel) memory - 3/8th System RAM - * Userptr memory - 3/4th System RAM + * System (TTM + userptr) memory - 3/4th System RAM + * TTM memory - 3/8th System RAM */ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) { @@ -103,48 +104,61 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void) mem *= si.mem_unit; spin_lock_init(&kfd_mem_limit.mem_limit_lock); - kfd_mem_limit.max_system_mem_limit = (mem >> 1) - (mem >> 3); - kfd_mem_limit.max_userptr_mem_limit = mem - (mem >> 2); - pr_debug("Kernel memory limit %lluM, userptr limit %lluM\n", + kfd_mem_limit.max_system_mem_limit = (mem >> 1) + (mem >> 2); + kfd_mem_limit.max_ttm_mem_limit = (mem >> 1) - (mem >> 3); + pr_debug("Kernel memory limit %lluM, TTM limit %lluM\n", (kfd_mem_limit.max_system_mem_limit >> 20), - (kfd_mem_limit.max_userptr_mem_limit >> 20)); + (kfd_mem_limit.max_ttm_mem_limit >> 20)); } -static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) +static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) { - size_t acc_size; + size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed; + uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9; int ret = 0; acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size, sizeof(struct amdgpu_bo)); - spin_lock(&kfd_mem_limit.mem_limit_lock); + vram_needed = 0; if (domain == AMDGPU_GEM_DOMAIN_GTT) { - if (kfd_mem_limit.system_mem_used + (acc_size + size) > - kfd_mem_limit.max_system_mem_limit) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { - if ((kfd_mem_limit.system_mem_used + acc_size > - kfd_mem_limit.max_system_mem_limit) || - (kfd_mem_limit.userptr_mem_used + (size + acc_size) > - kfd_mem_limit.max_userptr_mem_limit)) { - ret = -ENOMEM; - goto err_no_mem; - } - kfd_mem_limit.system_mem_used += acc_size; - kfd_mem_limit.userptr_mem_used += size; + /* TTM GTT memory */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size + size; + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + /* Userptr */ + system_mem_needed = acc_size + size; + ttm_mem_needed = acc_size; + } else { + /* VRAM and SG */ + system_mem_needed = acc_size; + ttm_mem_needed = acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) + vram_needed = size; } -err_no_mem: + + spin_lock(&kfd_mem_limit.mem_limit_lock); + + if ((kfd_mem_limit.system_mem_used + system_mem_needed > + kfd_mem_limit.max_system_mem_limit) || + (kfd_mem_limit.ttm_mem_used + ttm_mem_needed > + kfd_mem_limit.max_ttm_mem_limit) || + (adev->kfd.vram_used + vram_needed > + adev->gmc.real_vram_size - reserved_for_pt)) { + ret = -ENOMEM; + } else { + kfd_mem_limit.system_mem_used += system_mem_needed; + kfd_mem_limit.ttm_mem_used += ttm_mem_needed; + adev->kfd.vram_used += vram_needed; + } + spin_unlock(&kfd_mem_limit.mem_limit_lock); return ret; } -static void unreserve_system_mem_limit(struct amdgpu_device *adev, - uint64_t size, u32 domain) +static void unreserve_mem_limit(struct amdgpu_device *adev, + uint64_t size, u32 domain, bool sg) { size_t acc_size; @@ -154,35 +168,39 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev, spin_lock(&kfd_mem_limit.mem_limit_lock); if (domain == AMDGPU_GEM_DOMAIN_GTT) { kfd_mem_limit.system_mem_used -= (acc_size + size); - } else if (domain == AMDGPU_GEM_DOMAIN_CPU) { + kfd_mem_limit.ttm_mem_used -= (acc_size + size); + } else if (domain == AMDGPU_GEM_DOMAIN_CPU && !sg) { + kfd_mem_limit.system_mem_used -= (acc_size + size); + kfd_mem_limit.ttm_mem_used -= acc_size; + } else { kfd_mem_limit.system_mem_used -= acc_size; - kfd_mem_limit.userptr_mem_used -= size; + kfd_mem_limit.ttm_mem_used -= acc_size; + if (domain == AMDGPU_GEM_DOMAIN_VRAM) { + adev->kfd.vram_used -= size; + WARN_ONCE(adev->kfd.vram_used < 0, + "kfd VRAM memory accounting unbalanced"); + } } WARN_ONCE(kfd_mem_limit.system_mem_used < 0, "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); + WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0, + "kfd TTM memory accounting unbalanced"); spin_unlock(&kfd_mem_limit.mem_limit_lock); } -void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo) +void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo) { - spin_lock(&kfd_mem_limit.mem_limit_lock); + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + u32 domain = bo->preferred_domains; + bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU); if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) { - kfd_mem_limit.system_mem_used -= bo->tbo.acc_size; - kfd_mem_limit.userptr_mem_used -= amdgpu_bo_size(bo); - } else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) { - kfd_mem_limit.system_mem_used -= - (bo->tbo.acc_size + amdgpu_bo_size(bo)); + domain = AMDGPU_GEM_DOMAIN_CPU; + sg = false; } - WARN_ONCE(kfd_mem_limit.system_mem_used < 0, - "kfd system memory accounting unbalanced"); - WARN_ONCE(kfd_mem_limit.userptr_mem_used < 0, - "kfd userptr memory accounting unbalanced"); - spin_unlock(&kfd_mem_limit.mem_limit_lock); + unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg); } @@ -364,7 +382,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) struct amdgpu_bo *pd = vm->root.base.bo; struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); struct amdgpu_vm_parser param; - uint64_t addr, flags = AMDGPU_PTE_VALID; int ret; param.domain = AMDGPU_GEM_DOMAIN_VRAM; @@ -383,9 +400,7 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return ret; } - addr = amdgpu_bo_gpu_offset(vm->root.base.bo); - amdgpu_gmc_get_vm_pde(adev, -1, &addr, &flags); - vm->pd_phys_addr = addr; + vm->pd_phys_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); if (vm->use_cpu_for_update) { ret = amdgpu_bo_kmap(pd, NULL); @@ -398,23 +413,6 @@ static int vm_validate_pt_pd_bos(struct amdgpu_vm *vm) return 0; } -static int sync_vm_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync, - struct dma_fence *f) -{ - int ret = amdgpu_sync_fence(adev, sync, f, false); - - /* Sync objects can't handle multiple GPUs (contexts) updating - * sync->last_vm_update. Fortunately we don't need it for - * KFD's purposes, so we can just drop that fence. - */ - if (sync->last_vm_update) { - dma_fence_put(sync->last_vm_update); - sync->last_vm_update = NULL; - } - - return ret; -} - static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) { struct amdgpu_bo *pd = vm->root.base.bo; @@ -425,7 +423,7 @@ static int vm_update_pds(struct amdgpu_vm *vm, struct amdgpu_sync *sync) if (ret) return ret; - return sync_vm_fence(adev, sync, vm->last_update); + return amdgpu_sync_fence(NULL, sync, vm->last_update, false); } /* add_bo_to_vm - Add a BO to a VM @@ -539,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem, struct amdgpu_bo *bo = mem->bo; INIT_LIST_HEAD(&entry->head); - entry->shared = true; + entry->num_shared = 1; entry->bo = &bo->tbo; mutex_lock(&process_info->lock); if (userptr) @@ -678,10 +676,9 @@ static int reserve_bo_and_vm(struct kgd_mem *mem, if (!ctx->vm_pd) return -ENOMEM; - ctx->kfd_bo.robj = bo; ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -743,10 +740,9 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem, return -ENOMEM; } - ctx->kfd_bo.robj = bo; ctx->kfd_bo.priority = 0; ctx->kfd_bo.tv.bo = &bo->tbo; - ctx->kfd_bo.tv.shared = true; + ctx->kfd_bo.tv.num_shared = 1; ctx->kfd_bo.user_pages = NULL; list_add(&ctx->kfd_bo.tv.head, &ctx->list); @@ -831,7 +827,7 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev, /* Add the eviction fence back */ amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true); - sync_vm_fence(adev, sync, bo_va->last_pt_update); + amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); return 0; } @@ -856,7 +852,7 @@ static int update_gpuvm_pte(struct amdgpu_device *adev, return ret; } - return sync_vm_fence(adev, sync, bo_va->last_pt_update); + return amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false); } static int map_bo_to_gpuvm(struct amdgpu_device *adev, @@ -891,6 +887,24 @@ update_gpuvm_pte_failed: return ret; } +static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size) +{ + struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL); + + if (!sg) + return NULL; + if (sg_alloc_table(sg, 1, GFP_KERNEL)) { + kfree(sg); + return NULL; + } + sg->sgl->dma_address = addr; + sg->sgl->length = size; +#ifdef CONFIG_NEED_SG_DMA_LENGTH + sg->sgl->dma_length = size; +#endif + return sg; +} + static int process_validate_vms(struct amdkfd_process_info *process_info) { struct amdgpu_vm *peer_vm; @@ -906,6 +920,26 @@ static int process_validate_vms(struct amdkfd_process_info *process_info) return 0; } +static int process_sync_pds_resv(struct amdkfd_process_info *process_info, + struct amdgpu_sync *sync) +{ + struct amdgpu_vm *peer_vm; + int ret; + + list_for_each_entry(peer_vm, &process_info->vm_list_head, + vm_list_node) { + struct amdgpu_bo *pd = peer_vm->root.base.bo; + + ret = amdgpu_sync_resv(NULL, + sync, pd->tbo.resv, + AMDGPU_FENCE_OWNER_UNDEFINED, false); + if (ret) + return ret; + } + + return 0; +} + static int process_update_pds(struct amdkfd_process_info *process_info, struct amdgpu_sync *sync) { @@ -1003,8 +1037,8 @@ create_evict_fence_fail: return ret; } -int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, - void **process_info, +int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, unsigned int pasid, + void **vm, void **process_info, struct dma_fence **ef) { struct amdgpu_device *adev = get_amdgpu_device(kgd); @@ -1016,7 +1050,7 @@ int amdgpu_amdkfd_gpuvm_create_process_vm(struct kgd_dev *kgd, void **vm, return -ENOMEM; /* Initialize AMDGPU part of the VM */ - ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, 0); + ret = amdgpu_vm_init(adev, new_vm, AMDGPU_VM_CONTEXT_COMPUTE, pasid); if (ret) { pr_err("Failed init vm ret %d\n", ret); goto amdgpu_vm_init_fail; @@ -1039,7 +1073,7 @@ amdgpu_vm_init_fail: } int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, - struct file *filp, + struct file *filp, unsigned int pasid, void **vm, void **process_info, struct dma_fence **ef) { @@ -1054,7 +1088,7 @@ int amdgpu_amdkfd_gpuvm_acquire_process_vm(struct kgd_dev *kgd, return -EINVAL; /* Convert VM into a compute VM */ - ret = amdgpu_vm_make_compute(adev, avm); + ret = amdgpu_vm_make_compute(adev, avm, pasid); if (ret) return ret; @@ -1117,11 +1151,34 @@ void amdgpu_amdkfd_gpuvm_destroy_process_vm(struct kgd_dev *kgd, void *vm) kfree(vm); } -uint32_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) +void amdgpu_amdkfd_gpuvm_release_process_vm(struct kgd_dev *kgd, void *vm) +{ + struct amdgpu_device *adev = get_amdgpu_device(kgd); + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (WARN_ON(!kgd || !vm)) + return; + + pr_debug("Releasing process vm %p\n", vm); + + /* The original pasid of amdgpu vm has already been + * released during making a amdgpu vm to a compute vm + * The current pasid is managed by kfd and will be + * released on kfd process destroy. Set amdgpu pasid + * to 0 to avoid duplicate release. + */ + amdgpu_vm_release_compute(adev, avm); +} + +uint64_t amdgpu_amdkfd_gpuvm_get_process_page_dir(void *vm) { struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + struct amdgpu_bo *pd = avm->root.base.bo; + struct amdgpu_device *adev = amdgpu_ttm_adev(pd->tbo.bdev); - return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; + if (adev->asic_type < CHIP_VEGA10) + return avm->pd_phys_addr >> AMDGPU_GPU_PAGE_SHIFT; + return avm->pd_phys_addr; } int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( @@ -1131,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( { struct amdgpu_device *adev = get_amdgpu_device(kgd); struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + enum ttm_bo_type bo_type = ttm_bo_type_device; + struct sg_table *sg = NULL; uint64_t user_addr = 0; struct amdgpu_bo *bo; struct amdgpu_bo_param bp; @@ -1159,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( if (!offset || !*offset) return -EINVAL; user_addr = *offset; + } else if (flags & ALLOC_MEM_FLAGS_DOORBELL) { + domain = AMDGPU_GEM_DOMAIN_GTT; + alloc_domain = AMDGPU_GEM_DOMAIN_CPU; + bo_type = ttm_bo_type_sg; + alloc_flags = 0; + if (size > UINT_MAX) + return -EINVAL; + sg = create_doorbell_sg(*offset, size); + if (!sg) + return -ENOMEM; } else { return -EINVAL; } *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); - if (!*mem) - return -ENOMEM; + if (!*mem) { + ret = -ENOMEM; + goto err; + } INIT_LIST_HEAD(&(*mem)->bo_va_list); mutex_init(&(*mem)->lock); (*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM); @@ -1181,7 +1252,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( byte_align = (adev->family == AMDGPU_FAMILY_VI && adev->asic_type != CHIP_FIJI && adev->asic_type != CHIP_POLARIS10 && - adev->asic_type != CHIP_POLARIS11) ? + adev->asic_type != CHIP_POLARIS11 && + adev->asic_type != CHIP_POLARIS12) ? VI_BO_SIZE_ALIGN : 1; mapping_flags = AMDGPU_VM_PAGE_READABLE; @@ -1197,10 +1269,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( amdgpu_sync_create(&(*mem)->sync); - ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size, alloc_domain); + ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg); if (ret) { pr_debug("Insufficient system memory\n"); - goto err_reserve_system_mem; + goto err_reserve_limit; } pr_debug("\tcreate BO VA 0x%llx size 0x%llx domain %s\n", @@ -1211,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( bp.byte_align = byte_align; bp.domain = alloc_domain; bp.flags = alloc_flags; - bp.type = ttm_bo_type_device; + bp.type = bo_type; bp.resv = NULL; ret = amdgpu_bo_create(adev, &bp, &bo); if (ret) { @@ -1219,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( domain_string(alloc_domain), ret); goto err_bo_create; } + if (bo_type == ttm_bo_type_sg) { + bo->tbo.sg = sg; + bo->tbo.ttm->sg = sg; + } bo->kfd_bo = *mem; (*mem)->bo = bo; if (user_addr) @@ -1248,12 +1324,17 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu( allocate_init_user_pages_failed: amdgpu_bo_unref(&bo); /* Don't unreserve system mem limit twice */ - goto err_reserve_system_mem; + goto err_reserve_limit; err_bo_create: - unreserve_system_mem_limit(adev, size, alloc_domain); -err_reserve_system_mem: + unreserve_mem_limit(adev, size, alloc_domain, !!sg); +err_reserve_limit: mutex_destroy(&(*mem)->lock); kfree(*mem); +err: + if (sg) { + sg_free_table(sg); + kfree(sg); + } return ret; } @@ -1323,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu( /* Free the sync object */ amdgpu_sync_free(&mem->sync); + /* If the SG is not NULL, it's one we created for a doorbell + * BO. We need to free it. + */ + if (mem->bo->tbo.sg) { + sg_free_table(mem->bo->tbo.sg); + kfree(mem->bo->tbo.sg); + } + /* Free the BO*/ amdgpu_bo_unref(&mem->bo); mutex_destroy(&mem->lock); @@ -1387,7 +1476,8 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu( * the queues are still stopped and we can leave mapping for * the next restore worker */ - if (bo->tbo.mem.mem_type == TTM_PL_SYSTEM) + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) && + bo->tbo.mem.mem_type == TTM_PL_SYSTEM) is_invalid_userptr = true; if (check_if_add_bo_to_vm(avm, mem)) { @@ -1624,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd, return 0; } +int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd, + struct dma_buf *dma_buf, + uint64_t va, void *vm, + struct kgd_mem **mem, uint64_t *size, + uint64_t *mmap_offset) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)kgd; + struct drm_gem_object *obj; + struct amdgpu_bo *bo; + struct amdgpu_vm *avm = (struct amdgpu_vm *)vm; + + if (dma_buf->ops != &amdgpu_dmabuf_ops) + /* Can't handle non-graphics buffers */ + return -EINVAL; + + obj = dma_buf->priv; + if (obj->dev->dev_private != adev) + /* Can't handle buffers from other devices */ + return -EINVAL; + + bo = gem_to_amdgpu_bo(obj); + if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM | + AMDGPU_GEM_DOMAIN_GTT))) + /* Only VRAM and GTT BOs are supported */ + return -EINVAL; + + *mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if (!*mem) + return -ENOMEM; + + if (size) + *size = amdgpu_bo_size(bo); + + if (mmap_offset) + *mmap_offset = amdgpu_bo_mmap_offset(bo); + + INIT_LIST_HEAD(&(*mem)->bo_va_list); + mutex_init(&(*mem)->lock); + (*mem)->mapping_flags = + AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE | + AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC; + + (*mem)->bo = amdgpu_bo_ref(bo); + (*mem)->va = va; + (*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ? + AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT; + (*mem)->mapped_to_gpu_memory = 0; + (*mem)->process_info = avm->process_info; + add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false); + amdgpu_sync_create(&(*mem)->sync); + + return 0; +} + /* Evict a userptr BO by stopping the queues if necessary * * Runs in MMU notifier, may be in RECLAIM_FS context. This means it @@ -1790,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info) validate_list.head) { list_add_tail(&mem->resv_list.head, &resv_list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } /* Reserve all BOs and page tables for validation */ @@ -2009,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) list_add_tail(&mem->resv_list.head, &ctx.list); mem->resv_list.bo = mem->validate_list.bo; - mem->resv_list.shared = mem->validate_list.shared; + mem->resv_list.num_shared = mem->validate_list.num_shared; } ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list, @@ -2026,13 +2170,10 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) if (ret) goto validate_map_fail; - /* Wait for PD/PTs validate to finish */ - /* FIXME: I think this isn't needed */ - list_for_each_entry(peer_vm, &process_info->vm_list_head, - vm_list_node) { - struct amdgpu_bo *bo = peer_vm->root.base.bo; - - ttm_bo_wait(&bo->tbo, false, false); + ret = process_sync_pds_resv(process_info, &sync_obj); + if (ret) { + pr_debug("Memory eviction: Failed to sync to PD BO moving fence. Try again\n"); + goto validate_map_fail; } /* Validate BOs and map them to GPUVM (update VM page tables). */ @@ -2048,7 +2189,11 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) pr_debug("Memory eviction: Validate BOs failed. Try again\n"); goto validate_map_fail; } - + ret = amdgpu_sync_fence(NULL, &sync_obj, bo->tbo.moving, false); + if (ret) { + pr_debug("Memory eviction: Sync BO fence failed. Try again\n"); + goto validate_map_fail; + } list_for_each_entry(bo_va_entry, &mem->bo_va_list, bo_list) { ret = update_gpuvm_pte((struct amdgpu_device *) @@ -2069,6 +2214,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) goto validate_map_fail; } + /* Wait for validate and PT updates to finish */ amdgpu_sync_wait(&sync_obj, false); /* Release old eviction fence and create new one, because fence only @@ -2087,10 +2233,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef) process_info->eviction_fence = new_fence; *ef = dma_fence_get(&new_fence->base); - /* Wait for validate to finish and attach new eviction fence */ - list_for_each_entry(mem, &process_info->kfd_bo_list, - validate_list.head) - ttm_bo_wait(&mem->bo->tbo, false, false); + /* Attach new eviction fence to all BOs */ list_for_each_entry(mem, &process_info->kfd_bo_list, validate_list.head) amdgpu_bo_fence(mem->bo, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c index bf872f694f50..e02781b37e73 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c @@ -29,6 +29,7 @@ #include "amdgpu_atombios.h" #include "amdgpu_atomfirmware.h" #include "amdgpu_i2c.h" +#include "amdgpu_display.h" #include "atom.h" #include "atom-bits.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c index 236915849cfe..b61e1dc61b4c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c @@ -117,6 +117,10 @@ union igp_info { union umc_info { struct atom_umc_info_v3_1 v31; }; + +union vram_info { + struct atom_vram_info_header_v2_3 v23; +}; /* * Return vram width from integrated system info table, if available, * or 0 if not. @@ -174,7 +178,7 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev, case ATOM_DGPU_VRAM_TYPE_GDDR5: vram_type = AMDGPU_VRAM_TYPE_GDDR5; break; - case ATOM_DGPU_VRAM_TYPE_HBM: + case ATOM_DGPU_VRAM_TYPE_HBM2: vram_type = AMDGPU_VRAM_TYPE_HBM; break; default: @@ -195,7 +199,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) int index; u16 data_offset, size; union igp_info *igp_info; - union umc_info *umc_info; + union vram_info *vram_info; u8 frev, crev; u8 mem_type; @@ -204,7 +208,7 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) integratedsysteminfo); else index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1, - umc_info); + vram_info); if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size, &frev, &crev, &data_offset)) { @@ -219,11 +223,11 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev) return 0; } } else { - umc_info = (union umc_info *) + vram_info = (union vram_info *) (mode_info->atom_context->bios + data_offset); switch (crev) { - case 1: - mem_type = umc_info->v31.vram_type; + case 3: + mem_type = vram_info->v23.vram_module[0].memory_type; return convert_atom_mem_type_to_vram_type(adev, mem_type); default: return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c index d472a2c8399f..5c79da8e1150 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c @@ -49,8 +49,11 @@ static void amdgpu_bo_list_free(struct kref *ref) refcount); struct amdgpu_bo_list_entry *e; - amdgpu_bo_list_for_each_entry(e, list) - amdgpu_bo_unref(&e->robj); + amdgpu_bo_list_for_each_entry(e, list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + amdgpu_bo_unref(&bo); + } call_rcu(&list->rhead, amdgpu_bo_list_free_rcu); } @@ -67,7 +70,8 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, unsigned i; int r; - if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry)) + if (num_entries > (SIZE_MAX - sizeof(struct amdgpu_bo_list)) + / sizeof(struct amdgpu_bo_list_entry)) return -EINVAL; size = sizeof(struct amdgpu_bo_list); @@ -111,21 +115,19 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, entry = &array[last_entry++]; } - entry->robj = bo; entry->priority = min(info[i].bo_priority, AMDGPU_BO_LIST_MAX_PRIORITY); - entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = !entry->robj->prime_shared_count; - - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) - list->gds_obj = entry->robj; - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) - list->gws_obj = entry->robj; - if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA) - list->oa_obj = entry->robj; - - total_size += amdgpu_bo_size(entry->robj); - trace_amdgpu_bo_list_set(list, entry->robj); + entry->tv.bo = &bo->tbo; + + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS) + list->gds_obj = bo; + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GWS) + list->gws_obj = bo; + if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_OA) + list->oa_obj = bo; + + total_size += amdgpu_bo_size(bo); + trace_amdgpu_bo_list_set(list, bo); } list->first_userptr = first_userptr; @@ -137,8 +139,11 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp, return 0; error_free: - while (i--) - amdgpu_bo_unref(&array[i].robj); + while (i--) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(array[i].tv.bo); + + amdgpu_bo_unref(&bo); + } kvfree(list); return r; @@ -190,9 +195,10 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list, * with the same priority, i.e. it must be stable. */ amdgpu_bo_list_for_each_entry(e, list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); unsigned priority = e->priority; - if (!e->robj->parent) + if (!bo->parent) list_add_tail(&e->tv.head, &bucket[priority]); e->user_pages = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h index 61b089768e1c..7c5f5d1601e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h @@ -32,7 +32,6 @@ struct amdgpu_bo_va; struct amdgpu_fpriv; struct amdgpu_bo_list_entry { - struct amdgpu_bo *robj; struct ttm_validate_buffer tv; struct amdgpu_bo_va *bo_va; uint32_t priority; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c index 8816c697b205..387f1cf1dc20 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c @@ -330,7 +330,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, case CHIP_TOPAZ: if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) || ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) || - ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) { + ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)) || + ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD1)) || + ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0xD3))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/topaz_k_smc.bin"); } else @@ -351,7 +353,6 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, if (type == CGS_UCODE_ID_SMU) { if (((adev->pdev->device == 0x67ef) && ((adev->pdev->revision == 0xe0) || - (adev->pdev->revision == 0xe2) || (adev->pdev->revision == 0xe5))) || ((adev->pdev->device == 0x67ff) && ((adev->pdev->revision == 0xcf) || @@ -359,8 +360,13 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0xff)))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris11_k_smc.bin"); - } else + } else if ((adev->pdev->device == 0x67ef) && + (adev->pdev->revision == 0xe2)) { + info->is_kicker = true; + strcpy(fw_name, "amdgpu/polaris11_k2_smc.bin"); + } else { strcpy(fw_name, "amdgpu/polaris11_smc.bin"); + } } else if (type == CGS_UCODE_ID_SMU_SK) { strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin"); } @@ -375,17 +381,35 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device, (adev->pdev->revision == 0xe7) || (adev->pdev->revision == 0xef))) || ((adev->pdev->device == 0x6fdf) && - (adev->pdev->revision == 0xef))) { + ((adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) { info->is_kicker = true; strcpy(fw_name, "amdgpu/polaris10_k_smc.bin"); - } else + } else if ((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe1) || + (adev->pdev->revision == 0xf7))) { + info->is_kicker = true; + strcpy(fw_name, "amdgpu/polaris10_k2_smc.bin"); + } else { strcpy(fw_name, "amdgpu/polaris10_smc.bin"); + } } else if (type == CGS_UCODE_ID_SMU_SK) { strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin"); } break; case CHIP_POLARIS12: - strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + if (((adev->pdev->device == 0x6987) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc3))) || + ((adev->pdev->device == 0x6981) && + ((adev->pdev->revision == 0x00) || + (adev->pdev->revision == 0x01) || + (adev->pdev->revision == 0x10)))) { + info->is_kicker = true; + strcpy(fw_name, "amdgpu/polaris12_k_smc.bin"); + } else { + strcpy(fw_name, "amdgpu/polaris12_smc.bin"); + } break; case CHIP_VEGAM: strcpy(fw_name, "amdgpu/vegam_smc.bin"); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c index c770d73352a7..69ad6ec0a4f3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c @@ -34,6 +34,7 @@ #include "atombios_dp.h" #include "amdgpu_connectors.h" #include "amdgpu_i2c.h" +#include "amdgpu_display.h" #include <linux/pm_runtime.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c index b31d121a876b..1c49b8266d69 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c @@ -32,12 +32,14 @@ #include "amdgpu.h" #include "amdgpu_trace.h" #include "amdgpu_gmc.h" +#include "amdgpu_gem.h" static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, struct drm_amdgpu_cs_chunk_fence *data, uint32_t *offset) { struct drm_gem_object *gobj; + struct amdgpu_bo *bo; unsigned long size; int r; @@ -45,21 +47,22 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, if (gobj == NULL) return -EINVAL; - p->uf_entry.robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); + bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj)); p->uf_entry.priority = 0; - p->uf_entry.tv.bo = &p->uf_entry.robj->tbo; - p->uf_entry.tv.shared = true; + p->uf_entry.tv.bo = &bo->tbo; + /* One for TTM and one for the CS job */ + p->uf_entry.tv.num_shared = 2; p->uf_entry.user_pages = NULL; drm_gem_object_put_unlocked(gobj); - size = amdgpu_bo_size(p->uf_entry.robj); + size = amdgpu_bo_size(bo); if (size != PAGE_SIZE || (data->offset + 8) > size) { r = -EINVAL; goto error_unref; } - if (amdgpu_ttm_tt_get_usermm(p->uf_entry.robj->tbo.ttm)) { + if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm)) { r = -EINVAL; goto error_unref; } @@ -69,7 +72,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p, return 0; error_unref: - amdgpu_bo_unref(&p->uf_entry.robj); + amdgpu_bo_unref(&bo); return r; } @@ -122,14 +125,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_chunk; } + mutex_lock(&p->ctx->lock); + /* skip guilty context job */ if (atomic_read(&p->ctx->guilty) == 1) { ret = -ECANCELED; goto free_chunk; } - mutex_lock(&p->ctx->lock); - /* get chunks */ chunk_array_user = u64_to_user_ptr(cs->in.chunks); if (copy_from_user(chunk_array, chunk_array_user, @@ -228,7 +231,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs goto free_all_kdata; } - if (p->uf_entry.robj) + if (p->uf_entry.tv.bo) p->job->uf_addr = uf_offset; kfree(chunk_array); @@ -457,13 +460,13 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p, p->evictable = list_prev_entry(p->evictable, tv.head)) { struct amdgpu_bo_list_entry *candidate = p->evictable; - struct amdgpu_bo *bo = candidate->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(candidate->tv.bo); struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); bool update_bytes_moved_vis; uint32_t other; /* If we reached our current BO we can forget it */ - if (candidate->robj == validated) + if (bo == validated) break; /* We can't move pinned BOs here */ @@ -528,7 +531,7 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p, int r; list_for_each_entry(lobj, validated, tv.head) { - struct amdgpu_bo *bo = lobj->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(lobj->tv.bo); bool binding_userptr = false; struct mm_struct *usermm; @@ -596,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, return r; } + /* One for TTM and one for the CS job */ + amdgpu_bo_list_for_each_entry(e, p->bo_list) + e->tv.num_shared = 2; + amdgpu_bo_list_get_list(p->bo_list, &p->validated); if (p->bo_list->first_userptr != p->bo_list->num_entries) p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX); @@ -603,7 +610,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&duplicates); amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd); - if (p->uf_entry.robj && !p->uf_entry.robj->parent) + if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent) list_add(&p->uf_entry.tv.head, &p->validated); while (1) { @@ -619,7 +626,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, INIT_LIST_HEAD(&need_pages); amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = e->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm, &e->user_invalidated) && e->user_pages) { @@ -638,7 +645,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, list_del(&e->tv.head); list_add(&e->tv.head, &need_pages); - amdgpu_bo_unreserve(e->robj); + amdgpu_bo_unreserve(bo); } } @@ -657,7 +664,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, /* Fill the page arrays for all userptrs. */ list_for_each_entry(e, &need_pages, tv.head) { - struct ttm_tt *ttm = e->robj->tbo.ttm; + struct ttm_tt *ttm = e->tv.bo->ttm; e->user_pages = kvmalloc_array(ttm->num_pages, sizeof(struct page*), @@ -715,24 +722,30 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p, gws = p->bo_list->gws_obj; oa = p->bo_list->oa_obj; - amdgpu_bo_list_for_each_entry(e, p->bo_list) - e->bo_va = amdgpu_vm_bo_find(vm, e->robj); + amdgpu_bo_list_for_each_entry(e, p->bo_list) { + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + + /* Make sure we use the exclusive slot for shared BOs */ + if (bo->prime_shared_count) + e->tv.num_shared = 0; + e->bo_va = amdgpu_vm_bo_find(vm, bo); + } if (gds) { - p->job->gds_base = amdgpu_bo_gpu_offset(gds); - p->job->gds_size = amdgpu_bo_size(gds); + p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT; + p->job->gds_size = amdgpu_bo_size(gds) >> PAGE_SHIFT; } if (gws) { - p->job->gws_base = amdgpu_bo_gpu_offset(gws); - p->job->gws_size = amdgpu_bo_size(gws); + p->job->gws_base = amdgpu_bo_gpu_offset(gws) >> PAGE_SHIFT; + p->job->gws_size = amdgpu_bo_size(gws) >> PAGE_SHIFT; } if (oa) { - p->job->oa_base = amdgpu_bo_gpu_offset(oa); - p->job->oa_size = amdgpu_bo_size(oa); + p->job->oa_base = amdgpu_bo_gpu_offset(oa) >> PAGE_SHIFT; + p->job->oa_size = amdgpu_bo_size(oa) >> PAGE_SHIFT; } - if (!r && p->uf_entry.robj) { - struct amdgpu_bo *uf = p->uf_entry.robj; + if (!r && p->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(p->uf_entry.tv.bo); r = amdgpu_ttm_alloc_gart(&uf->tbo); p->job->uf_addr += amdgpu_bo_gpu_offset(uf); @@ -748,8 +761,7 @@ error_free_pages: if (!e->user_pages) continue; - release_pages(e->user_pages, - e->robj->tbo.ttm->num_pages); + release_pages(e->user_pages, e->tv.bo->ttm->num_pages); kvfree(e->user_pages); } @@ -762,9 +774,11 @@ static int amdgpu_cs_sync_rings(struct amdgpu_cs_parser *p) int r; list_for_each_entry(e, &p->validated, tv.head) { - struct reservation_object *resv = e->robj->tbo.resv; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); + struct reservation_object *resv = bo->tbo.resv; + r = amdgpu_sync_resv(p->adev, &p->job->sync, resv, p->filp, - amdgpu_bo_explicit_sync(e->robj)); + amdgpu_bo_explicit_sync(bo)); if (r) return r; @@ -807,11 +821,16 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error, kfree(parser->chunks); if (parser->job) amdgpu_job_free(parser->job); - amdgpu_bo_unref(&parser->uf_entry.robj); + if (parser->uf_entry.tv.bo) { + struct amdgpu_bo *uf = ttm_to_amdgpu_bo(parser->uf_entry.tv.bo); + + amdgpu_bo_unref(&uf); + } } -static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) +static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p) { + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_fpriv *fpriv = p->filp->driver_priv; struct amdgpu_device *adev = p->adev; struct amdgpu_vm *vm = &fpriv->vm; @@ -820,6 +839,71 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) struct amdgpu_bo *bo; int r; + /* Only for UVD/VCE VM emulation */ + if (ring->funcs->parse_cs || ring->funcs->patch_cs_in_place) { + unsigned i, j; + + for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { + struct drm_amdgpu_cs_chunk_ib *chunk_ib; + struct amdgpu_bo_va_mapping *m; + struct amdgpu_bo *aobj = NULL; + struct amdgpu_cs_chunk *chunk; + uint64_t offset, va_start; + struct amdgpu_ib *ib; + uint8_t *kptr; + + chunk = &p->chunks[i]; + ib = &p->job->ibs[j]; + chunk_ib = chunk->kdata; + + if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + continue; + + va_start = chunk_ib->va_start & AMDGPU_GMC_HOLE_MASK; + r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); + if (r) { + DRM_ERROR("IB va_start is invalid\n"); + return r; + } + + if ((va_start + chunk_ib->ib_bytes) > + (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { + DRM_ERROR("IB va_start+ib_bytes is invalid\n"); + return -EINVAL; + } + + /* the IB should be reserved at this point */ + r = amdgpu_bo_kmap(aobj, (void **)&kptr); + if (r) { + return r; + } + + offset = m->start * AMDGPU_GPU_PAGE_SIZE; + kptr += va_start - offset; + + if (ring->funcs->parse_cs) { + memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); + amdgpu_bo_kunmap(aobj); + + r = amdgpu_ring_parse_cs(ring, p, j); + if (r) + return r; + } else { + ib->ptr = (uint32_t *)kptr; + r = amdgpu_ring_patch_cs_in_place(ring, p, j); + amdgpu_bo_kunmap(aobj); + if (r) + return r; + } + + j++; + } + } + + if (!p->job->vm) + return amdgpu_cs_sync_rings(p); + + r = amdgpu_vm_clear_freed(adev, vm, NULL); if (r) return r; @@ -852,7 +936,7 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) struct dma_fence *f; /* ignore duplicates */ - bo = e->robj; + bo = ttm_to_amdgpu_bo(e->tv.bo); if (!bo) continue; @@ -882,101 +966,21 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p) if (r) return r; + p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo); + if (amdgpu_vm_debug) { /* Invalidate all BOs to test for userspace bugs */ amdgpu_bo_list_for_each_entry(e, p->bo_list) { - /* ignore duplicates */ - if (!e->robj) - continue; - - amdgpu_vm_bo_invalidate(adev, e->robj, false); - } - } - - return r; -} - -static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev, - struct amdgpu_cs_parser *p) -{ - struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_vm *vm = &fpriv->vm; - struct amdgpu_ring *ring = p->ring; - int r; - - /* Only for UVD/VCE VM emulation */ - if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) { - unsigned i, j; - - for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) { - struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_bo_va_mapping *m; - struct amdgpu_bo *aobj = NULL; - struct amdgpu_cs_chunk *chunk; - uint64_t offset, va_start; - struct amdgpu_ib *ib; - uint8_t *kptr; - - chunk = &p->chunks[i]; - ib = &p->job->ibs[j]; - chunk_ib = chunk->kdata; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); - if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB) + /* ignore duplicates */ + if (!bo) continue; - va_start = chunk_ib->va_start & AMDGPU_VA_HOLE_MASK; - r = amdgpu_cs_find_mapping(p, va_start, &aobj, &m); - if (r) { - DRM_ERROR("IB va_start is invalid\n"); - return r; - } - - if ((va_start + chunk_ib->ib_bytes) > - (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) { - DRM_ERROR("IB va_start+ib_bytes is invalid\n"); - return -EINVAL; - } - - /* the IB should be reserved at this point */ - r = amdgpu_bo_kmap(aobj, (void **)&kptr); - if (r) { - return r; - } - - offset = m->start * AMDGPU_GPU_PAGE_SIZE; - kptr += va_start - offset; - - if (p->ring->funcs->parse_cs) { - memcpy(ib->ptr, kptr, chunk_ib->ib_bytes); - amdgpu_bo_kunmap(aobj); - - r = amdgpu_ring_parse_cs(ring, p, j); - if (r) - return r; - } else { - ib->ptr = (uint32_t *)kptr; - r = amdgpu_ring_patch_cs_in_place(ring, p, j); - amdgpu_bo_kunmap(aobj); - if (r) - return r; - } - - j++; + amdgpu_vm_bo_invalidate(adev, bo, false); } } - if (p->job->vm) { - p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->root.base.bo); - - r = amdgpu_bo_vm_update_pte(p); - if (r) - return r; - - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); - if (r) - return r; - } - return amdgpu_cs_sync_rings(p); } @@ -985,14 +989,15 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, { struct amdgpu_fpriv *fpriv = parser->filp->driver_priv; struct amdgpu_vm *vm = &fpriv->vm; - int i, j; int r, ce_preempt = 0, de_preempt = 0; + struct amdgpu_ring *ring; + int i, j; for (i = 0, j = 0; i < parser->nchunks && j < parser->job->num_ibs; i++) { struct amdgpu_cs_chunk *chunk; struct amdgpu_ib *ib; struct drm_amdgpu_cs_chunk_ib *chunk_ib; - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; chunk = &parser->chunks[i]; ib = &parser->job->ibs[j]; @@ -1014,8 +1019,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, return -EINVAL; } - r = amdgpu_queue_mgr_map(adev, &parser->ctx->queue_mgr, chunk_ib->ip_type, - chunk_ib->ip_instance, chunk_ib->ring, &ring); + r = amdgpu_ctx_get_entity(parser->ctx, chunk_ib->ip_type, + chunk_ib->ip_instance, chunk_ib->ring, + &entity); if (r) return r; @@ -1023,14 +1029,14 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, parser->job->preamble_status |= AMDGPU_PREAMBLE_IB_PRESENT; - if (parser->ring && parser->ring != ring) + if (parser->entity && parser->entity != entity) return -EINVAL; - parser->ring = ring; + parser->entity = entity; - r = amdgpu_ib_get(adev, vm, - ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0, - ib); + ring = to_amdgpu_ring(entity->rq->sched); + r = amdgpu_ib_get(adev, vm, ring->funcs->parse_cs ? + chunk_ib->ib_bytes : 0, ib); if (r) { DRM_ERROR("Failed to get ib !\n"); return r; @@ -1044,12 +1050,13 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev, } /* UVD & VCE fw doesn't support user fences */ + ring = to_amdgpu_ring(parser->entity->rq->sched); if (parser->job->uf_addr && ( - parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD || - parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE)) + ring->funcs->type == AMDGPU_RING_TYPE_UVD || + ring->funcs->type == AMDGPU_RING_TYPE_VCE)) return -EINVAL; - return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx); + return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity); } static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, @@ -1065,24 +1072,23 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p, sizeof(struct drm_amdgpu_cs_chunk_dep); for (i = 0; i < num_deps; ++i) { - struct amdgpu_ring *ring; struct amdgpu_ctx *ctx; + struct drm_sched_entity *entity; struct dma_fence *fence; ctx = amdgpu_ctx_get(fpriv, deps[i].ctx_id); if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(p->adev, &ctx->queue_mgr, - deps[i].ip_type, - deps[i].ip_instance, - deps[i].ring, &ring); + r = amdgpu_ctx_get_entity(ctx, deps[i].ip_type, + deps[i].ip_instance, + deps[i].ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, + fence = amdgpu_ctx_get_fence(ctx, entity, deps[i].handle); if (IS_ERR(fence)) { r = PTR_ERR(fence); @@ -1105,7 +1111,7 @@ static int amdgpu_syncobj_lookup_and_add_to_sync(struct amdgpu_cs_parser *p, { int r; struct dma_fence *fence; - r = drm_syncobj_find_fence(p->filp, handle, &fence); + r = drm_syncobj_find_fence(p->filp, handle, 0, 0, &fence); if (r) return r; @@ -1201,9 +1207,9 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs) { struct amdgpu_fpriv *fpriv = p->filp->driver_priv; - struct amdgpu_ring *ring = p->ring; - struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity; + struct drm_sched_entity *entity = p->entity; enum drm_sched_priority priority; + struct amdgpu_ring *ring; struct amdgpu_bo_list_entry *e; struct amdgpu_job *job; uint64_t seq; @@ -1220,7 +1226,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, /* No memory allocation is allowed while holding the mn lock */ amdgpu_mn_lock(p->mn); amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) { - struct amdgpu_bo *bo = e->robj; + struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo); if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) { r = -ERESTARTSYS; @@ -1231,15 +1237,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, job->owner = p->filp; p->fence = dma_fence_get(&job->base.s_fence->finished); - r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq); - if (r) { - dma_fence_put(p->fence); - dma_fence_put(&job->base.s_fence->finished); - amdgpu_job_free(job); - amdgpu_mn_unlock(p->mn); - return r; - } - + amdgpu_ctx_add_fence(p->ctx, entity, p->fence, &seq); amdgpu_cs_post_dependencies(p); if ((job->preamble_status & AMDGPU_PREAMBLE_IB_PRESENT) && @@ -1261,14 +1259,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p, ring = to_amdgpu_ring(entity->rq->sched); amdgpu_ring_priority_get(ring, priority); + amdgpu_vm_move_to_lru_tail(p->adev, &fpriv->vm); + ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence); amdgpu_mn_unlock(p->mn); return 0; error_abort: - dma_fence_put(&job->base.s_fence->finished); - job->base.s_fence = NULL; + drm_sched_job_cleanup(&job->base); amdgpu_mn_unlock(p->mn); error_unlock: @@ -1292,7 +1291,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) r = amdgpu_cs_parser_init(&parser, data); if (r) { - DRM_ERROR("Failed to initialize parser !\n"); + DRM_ERROR("Failed to initialize parser %d!\n", r); goto out; } @@ -1300,6 +1299,12 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) if (r) goto out; + r = amdgpu_cs_dependencies(adev, &parser); + if (r) { + DRM_ERROR("Failed in the dependencies handling %d!\n", r); + goto out; + } + r = amdgpu_cs_parser_bos(&parser, data); if (r) { if (r == -ENOMEM) @@ -1311,16 +1316,10 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) reserved_buffers = true; - r = amdgpu_cs_dependencies(adev, &parser); - if (r) { - DRM_ERROR("Failed in the dependencies handling %d!\n", r); - goto out; - } - for (i = 0; i < parser.job->num_ibs; i++) trace_amdgpu_cs(&parser, i); - r = amdgpu_cs_ib_vm_chunk(adev, &parser); + r = amdgpu_cs_vm_handling(&parser); if (r) goto out; @@ -1344,9 +1343,8 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) { union drm_amdgpu_wait_cs *wait = data; - struct amdgpu_device *adev = dev->dev_private; unsigned long timeout = amdgpu_gem_timeout(wait->in.timeout); - struct amdgpu_ring *ring = NULL; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; long r; @@ -1355,15 +1353,14 @@ int amdgpu_cs_wait_ioctl(struct drm_device *dev, void *data, if (ctx == NULL) return -EINVAL; - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, - wait->in.ip_type, wait->in.ip_instance, - wait->in.ring, &ring); + r = amdgpu_ctx_get_entity(ctx, wait->in.ip_type, wait->in.ip_instance, + wait->in.ring, &entity); if (r) { amdgpu_ctx_put(ctx); return r; } - fence = amdgpu_ctx_get_fence(ctx, ring, wait->in.handle); + fence = amdgpu_ctx_get_fence(ctx, entity, wait->in.handle); if (IS_ERR(fence)) r = PTR_ERR(fence); else if (fence) { @@ -1395,7 +1392,7 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, struct drm_file *filp, struct drm_amdgpu_fence *user) { - struct amdgpu_ring *ring; + struct drm_sched_entity *entity; struct amdgpu_ctx *ctx; struct dma_fence *fence; int r; @@ -1404,14 +1401,14 @@ static struct dma_fence *amdgpu_cs_get_fence(struct amdgpu_device *adev, if (ctx == NULL) return ERR_PTR(-EINVAL); - r = amdgpu_queue_mgr_map(adev, &ctx->queue_mgr, user->ip_type, - user->ip_instance, user->ring, &ring); + r = amdgpu_ctx_get_entity(ctx, user->ip_type, user->ip_instance, + user->ring, &entity); if (r) { amdgpu_ctx_put(ctx); return ERR_PTR(r); } - fence = amdgpu_ctx_get_fence(ctx, ring, user->seq_no); + fence = amdgpu_ctx_get_fence(ctx, entity, user->seq_no); amdgpu_ctx_put(ctx); return fence; @@ -1431,6 +1428,9 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data, if (IS_ERR(fence)) return PTR_ERR(fence); + if (!fence) + fence = dma_fence_get_stub(); + switch (info->in.what) { case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ: r = drm_syncobj_create(&syncobj, 0, fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c new file mode 100644 index 000000000000..7e22be7ca68a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c @@ -0,0 +1,117 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + + * * Author: Monk.liu@amd.com + */ + +#include "amdgpu.h" + +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) +{ + uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; + + addr -= AMDGPU_VA_RESERVED_SIZE; + addr = amdgpu_gmc_sign_extend(addr); + + return addr; +} + +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size) +{ + int r; + void *ptr; + + r = amdgpu_bo_create_kernel(adev, size, PAGE_SIZE, + domain, bo, + NULL, &ptr); + if (!*bo) + return -ENOMEM; + + memset(ptr, 0, size); + return 0; +} + +void amdgpu_free_static_csa(struct amdgpu_bo **bo) +{ + amdgpu_bo_free_kernel(bo, NULL, NULL); +} + +/* + * amdgpu_map_static_csa should be called during amdgpu_vm_init + * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command + * submission of GFX should use this virtual address within META_DATA init + * package to support SRIOV gfx preemption. + */ +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t csa_addr, uint32_t size) +{ + struct ww_acquire_ctx ticket; + struct list_head list; + struct amdgpu_bo_list_entry pd; + struct ttm_validate_buffer csa_tv; + int r; + + INIT_LIST_HEAD(&list); + INIT_LIST_HEAD(&csa_tv.head); + csa_tv.bo = &bo->tbo; + csa_tv.num_shared = 1; + + list_add(&csa_tv.head, &list); + amdgpu_vm_get_pd_bo(vm, &list, &pd); + + r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); + if (r) { + DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); + return r; + } + + *bo_va = amdgpu_vm_bo_add(adev, vm, bo); + if (!*bo_va) { + ttm_eu_backoff_reservation(&ticket, &list); + DRM_ERROR("failed to create bo_va for static CSA\n"); + return -ENOMEM; + } + + r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, + size); + if (r) { + DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); + amdgpu_vm_bo_rmv(adev, *bo_va); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, size, + AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | + AMDGPU_PTE_EXECUTABLE); + + if (r) { + DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); + amdgpu_vm_bo_rmv(adev, *bo_va); + ttm_eu_backoff_reservation(&ticket, &list); + return r; + } + + ttm_eu_backoff_reservation(&ticket, &list); + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h new file mode 100644 index 000000000000..524b4437a021 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.h @@ -0,0 +1,39 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * Author: Monk.liu@amd.com + */ + +#ifndef AMDGPU_CSA_MANAGER_H +#define AMDGPU_CSA_MANAGER_H + +#define AMDGPU_CSA_SIZE (128 * 1024) + +uint32_t amdgpu_get_total_csa_size(struct amdgpu_device *adev); +uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); +int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo, + u32 domain, uint32_t size); +int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, + struct amdgpu_bo *bo, struct amdgpu_bo_va **bo_va, + uint64_t csa_addr, uint32_t size); +void amdgpu_free_static_csa(struct amdgpu_bo **bo); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c index df6965761046..d85184b5b35c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c @@ -27,6 +27,31 @@ #include "amdgpu.h" #include "amdgpu_sched.h" +#define to_amdgpu_ctx_entity(e) \ + container_of((e), struct amdgpu_ctx_entity, entity) + +const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = { + [AMDGPU_HW_IP_GFX] = 1, + [AMDGPU_HW_IP_COMPUTE] = 4, + [AMDGPU_HW_IP_DMA] = 2, + [AMDGPU_HW_IP_UVD] = 1, + [AMDGPU_HW_IP_VCE] = 1, + [AMDGPU_HW_IP_UVD_ENC] = 1, + [AMDGPU_HW_IP_VCN_DEC] = 1, + [AMDGPU_HW_IP_VCN_ENC] = 1, + [AMDGPU_HW_IP_VCN_JPEG] = 1, +}; + +static int amdgput_ctx_total_num_entities(void) +{ + unsigned i, num_entities = 0; + + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) + num_entities += amdgpu_ctx_num_entities[i]; + + return num_entities; +} + static int amdgpu_ctx_priority_permit(struct drm_file *filp, enum drm_sched_priority priority) { @@ -48,6 +73,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, struct drm_file *filp, struct amdgpu_ctx *ctx) { + unsigned num_entities = amdgput_ctx_total_num_entities(); unsigned i, j; int r; @@ -60,19 +86,33 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, memset(ctx, 0, sizeof(*ctx)); ctx->adev = adev; - kref_init(&ctx->refcount); - spin_lock_init(&ctx->ring_lock); - ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS, + + ctx->fences = kcalloc(amdgpu_sched_jobs * num_entities, sizeof(struct dma_fence*), GFP_KERNEL); if (!ctx->fences) return -ENOMEM; - mutex_init(&ctx->lock); + ctx->entities[0] = kcalloc(num_entities, + sizeof(struct amdgpu_ctx_entity), + GFP_KERNEL); + if (!ctx->entities[0]) { + r = -ENOMEM; + goto error_free_fences; + } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { - ctx->rings[i].sequence = 1; - ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i]; + for (i = 0; i < num_entities; ++i) { + struct amdgpu_ctx_entity *entity = &ctx->entities[0][i]; + + entity->sequence = 1; + entity->fences = &ctx->fences[amdgpu_sched_jobs * i]; } + for (i = 1; i < AMDGPU_HW_IP_NUM; ++i) + ctx->entities[i] = ctx->entities[i - 1] + + amdgpu_ctx_num_entities[i - 1]; + + kref_init(&ctx->refcount); + spin_lock_init(&ctx->ring_lock); + mutex_init(&ctx->lock); ctx->reset_counter = atomic_read(&adev->gpu_reset_counter); ctx->reset_counter_query = ctx->reset_counter; @@ -80,31 +120,70 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev, ctx->init_priority = priority; ctx->override_priority = DRM_SCHED_PRIORITY_UNSET; - /* create context entity for each ring */ - for (i = 0; i < adev->num_rings; i++) { - struct amdgpu_ring *ring = adev->rings[i]; - struct drm_sched_rq *rq; + for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) { + struct amdgpu_ring *rings[AMDGPU_MAX_RINGS]; + struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS]; + unsigned num_rings; + + switch (i) { + case AMDGPU_HW_IP_GFX: + rings[0] = &adev->gfx.gfx_ring[0]; + num_rings = 1; + break; + case AMDGPU_HW_IP_COMPUTE: + for (j = 0; j < adev->gfx.num_compute_rings; ++j) + rings[j] = &adev->gfx.compute_ring[j]; + num_rings = adev->gfx.num_compute_rings; + break; + case AMDGPU_HW_IP_DMA: + for (j = 0; j < adev->sdma.num_instances; ++j) + rings[j] = &adev->sdma.instance[j].ring; + num_rings = adev->sdma.num_instances; + break; + case AMDGPU_HW_IP_UVD: + rings[0] = &adev->uvd.inst[0].ring; + num_rings = 1; + break; + case AMDGPU_HW_IP_VCE: + rings[0] = &adev->vce.ring[0]; + num_rings = 1; + break; + case AMDGPU_HW_IP_UVD_ENC: + rings[0] = &adev->uvd.inst[0].ring_enc[0]; + num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_DEC: + rings[0] = &adev->vcn.ring_dec; + num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_ENC: + rings[0] = &adev->vcn.ring_enc[0]; + num_rings = 1; + break; + case AMDGPU_HW_IP_VCN_JPEG: + rings[0] = &adev->vcn.ring_jpeg; + num_rings = 1; + break; + } - rq = &ring->sched.sched_rq[priority]; + for (j = 0; j < num_rings; ++j) + rqs[j] = &rings[j]->sched.sched_rq[priority]; - if (ring == &adev->gfx.kiq.ring) - continue; - - r = drm_sched_entity_init(&ctx->rings[i].entity, - &rq, 1, &ctx->guilty); + for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) + r = drm_sched_entity_init(&ctx->entities[i][j].entity, + rqs, num_rings, &ctx->guilty); if (r) - goto failed; + goto error_cleanup_entities; } - r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr); - if (r) - goto failed; - return 0; -failed: - for (j = 0; j < i; j++) - drm_sched_entity_destroy(&ctx->rings[j].entity); +error_cleanup_entities: + for (i = 0; i < num_entities; ++i) + drm_sched_entity_destroy(&ctx->entities[0][i].entity); + kfree(ctx->entities[0]); + +error_free_fences: kfree(ctx->fences); ctx->fences = NULL; return r; @@ -113,25 +192,47 @@ failed: static void amdgpu_ctx_fini(struct kref *ref) { struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount); + unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_device *adev = ctx->adev; unsigned i, j; if (!adev) return; - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) + for (i = 0; i < num_entities; ++i) for (j = 0; j < amdgpu_sched_jobs; ++j) - dma_fence_put(ctx->rings[i].fences[j]); + dma_fence_put(ctx->entities[0][i].fences[j]); kfree(ctx->fences); - ctx->fences = NULL; - - amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr); + kfree(ctx->entities[0]); mutex_destroy(&ctx->lock); kfree(ctx); } +int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, + u32 ring, struct drm_sched_entity **entity) +{ + if (hw_ip >= AMDGPU_HW_IP_NUM) { + DRM_ERROR("unknown HW IP type: %d\n", hw_ip); + return -EINVAL; + } + + /* Right now all IPs have only one instance - multiple rings. */ + if (instance != 0) { + DRM_DEBUG("invalid ip instance: %d\n", instance); + return -EINVAL; + } + + if (ring >= amdgpu_ctx_num_entities[hw_ip]) { + DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring); + return -EINVAL; + } + + *entity = &ctx->entities[hw_ip][ring].entity; + return 0; +} + static int amdgpu_ctx_alloc(struct amdgpu_device *adev, struct amdgpu_fpriv *fpriv, struct drm_file *filp, @@ -147,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, return -ENOMEM; mutex_lock(&mgr->lock); - r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL); + r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL); if (r < 0) { mutex_unlock(&mgr->lock); kfree(ctx); @@ -168,17 +269,17 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev, static void amdgpu_ctx_do_release(struct kref *ref) { struct amdgpu_ctx *ctx; + unsigned num_entities; u32 i; ctx = container_of(ref, struct amdgpu_ctx, refcount); - for (i = 0; i < ctx->adev->num_rings; i++) { + num_entities = 0; + for (i = 0; i < AMDGPU_HW_IP_NUM; i++) + num_entities += amdgpu_ctx_num_entities[i]; - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) - continue; - - drm_sched_entity_destroy(&ctx->rings[i].entity); - } + for (i = 0; i < num_entities; i++) + drm_sched_entity_destroy(&ctx->entities[0][i].entity); amdgpu_ctx_fini(ref); } @@ -334,56 +435,56 @@ int amdgpu_ctx_put(struct amdgpu_ctx *ctx) return 0; } -int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring, - struct dma_fence *fence, uint64_t* handler) +void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence, uint64_t* handle) { - struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; - uint64_t seq = cring->sequence; - unsigned idx = 0; + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + uint64_t seq = centity->sequence; struct dma_fence *other = NULL; + unsigned idx = 0; idx = seq & (amdgpu_sched_jobs - 1); - other = cring->fences[idx]; + other = centity->fences[idx]; if (other) BUG_ON(!dma_fence_is_signaled(other)); dma_fence_get(fence); spin_lock(&ctx->ring_lock); - cring->fences[idx] = fence; - cring->sequence++; + centity->fences[idx] = fence; + centity->sequence++; spin_unlock(&ctx->ring_lock); dma_fence_put(other); - if (handler) - *handler = seq; - - return 0; + if (handle) + *handle = seq; } struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, - struct amdgpu_ring *ring, uint64_t seq) + struct drm_sched_entity *entity, + uint64_t seq) { - struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx]; + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); struct dma_fence *fence; spin_lock(&ctx->ring_lock); if (seq == ~0ull) - seq = ctx->rings[ring->idx].sequence - 1; + seq = centity->sequence - 1; - if (seq >= cring->sequence) { + if (seq >= centity->sequence) { spin_unlock(&ctx->ring_lock); return ERR_PTR(-EINVAL); } - if (seq + amdgpu_sched_jobs < cring->sequence) { + if (seq + amdgpu_sched_jobs < centity->sequence) { spin_unlock(&ctx->ring_lock); return NULL; } - fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]); + fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]); spin_unlock(&ctx->ring_lock); return fence; @@ -392,35 +493,28 @@ struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, enum drm_sched_priority priority) { - int i; - struct amdgpu_device *adev = ctx->adev; - struct drm_sched_rq *rq; - struct drm_sched_entity *entity; - struct amdgpu_ring *ring; + unsigned num_entities = amdgput_ctx_total_num_entities(); enum drm_sched_priority ctx_prio; + unsigned i; ctx->override_priority = priority; ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ? ctx->init_priority : ctx->override_priority; - for (i = 0; i < adev->num_rings; i++) { - ring = adev->rings[i]; - entity = &ctx->rings[i].entity; - rq = &ring->sched.sched_rq[ctx_prio]; - - if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ) - continue; + for (i = 0; i < num_entities; i++) { + struct drm_sched_entity *entity = &ctx->entities[0][i].entity; - drm_sched_entity_set_rq(entity, rq); + drm_sched_entity_set_priority(entity, ctx_prio); } } -int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id) +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity) { - struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id]; - unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1); - struct dma_fence *other = cring->fences[idx]; + struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity); + unsigned idx = centity->sequence & (amdgpu_sched_jobs - 1); + struct dma_fence *other = centity->fences[idx]; if (other) { signed long r; @@ -444,6 +538,7 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr) void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) { + unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; @@ -459,13 +554,11 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) return; } - for (i = 0; i < ctx->adev->num_rings; i++) { + for (i = 0; i < num_entities; i++) { + struct drm_sched_entity *entity; - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) - continue; - - max_wait = drm_sched_entity_flush(&ctx->rings[i].entity, - max_wait); + entity = &ctx->entities[0][i].entity; + max_wait = drm_sched_entity_flush(entity, max_wait); } } mutex_unlock(&mgr->lock); @@ -473,6 +566,7 @@ void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr) void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) { + unsigned num_entities = amdgput_ctx_total_num_entities(); struct amdgpu_ctx *ctx; struct idr *idp; uint32_t id, i; @@ -484,16 +578,13 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr) if (!ctx->adev) return; - for (i = 0; i < ctx->adev->num_rings; i++) { - - if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring) - continue; - - if (kref_read(&ctx->refcount) == 1) - drm_sched_entity_fini(&ctx->rings[i].entity); - else - DRM_ERROR("ctx %p is still alive\n", ctx); + if (kref_read(&ctx->refcount) != 1) { + DRM_ERROR("ctx %p is still alive\n", ctx); + continue; } + + for (i = 0; i < num_entities; i++) + drm_sched_entity_fini(&ctx->entities[0][i].entity); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h new file mode 100644 index 000000000000..b3b012c0a7da --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.h @@ -0,0 +1,88 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_CTX_H__ +#define __AMDGPU_CTX_H__ + +#include "amdgpu_ring.h" + +struct drm_device; +struct drm_file; +struct amdgpu_fpriv; + +struct amdgpu_ctx_entity { + uint64_t sequence; + struct dma_fence **fences; + struct drm_sched_entity entity; +}; + +struct amdgpu_ctx { + struct kref refcount; + struct amdgpu_device *adev; + unsigned reset_counter; + unsigned reset_counter_query; + uint32_t vram_lost_counter; + spinlock_t ring_lock; + struct dma_fence **fences; + struct amdgpu_ctx_entity *entities[AMDGPU_HW_IP_NUM]; + bool preamble_presented; + enum drm_sched_priority init_priority; + enum drm_sched_priority override_priority; + struct mutex lock; + atomic_t guilty; +}; + +struct amdgpu_ctx_mgr { + struct amdgpu_device *adev; + struct mutex lock; + /* protected by lock */ + struct idr ctx_handles; +}; + +extern const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM]; + +struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id); +int amdgpu_ctx_put(struct amdgpu_ctx *ctx); + +int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance, + u32 ring, struct drm_sched_entity **entity); +void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + struct dma_fence *fence, uint64_t *seq); +struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity, + uint64_t seq); +void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx, + enum drm_sched_priority priority); + +int amdgpu_ctx_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, + struct drm_sched_entity *entity); + +void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr); +void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c index f5fb93795a69..dd9a4fb9ce39 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c @@ -826,21 +826,13 @@ int amdgpu_debugfs_regs_init(struct amdgpu_device *adev) { struct drm_minor *minor = adev->ddev->primary; struct dentry *ent, *root = minor->debugfs_root; - unsigned i, j; + unsigned int i; for (i = 0; i < ARRAY_SIZE(debugfs_regs); i++) { ent = debugfs_create_file(debugfs_regs_names[i], S_IFREG | S_IRUGO, root, adev, debugfs_regs[i]); - if (IS_ERR(ent)) { - for (j = 0; j < i; j++) { - debugfs_remove(adev->debugfs_regs[i]); - adev->debugfs_regs[i] = NULL; - } - return PTR_ERR(ent); - } - - if (!i) + if (!i && !IS_ERR_OR_NULL(ent)) i_size_write(ent->d_inode, adev->rmmio_size); adev->debugfs_regs[i] = ent; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 39bf2ce548c6..8a078f4ae73d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -59,9 +59,13 @@ #include "amdgpu_amdkfd.h" #include "amdgpu_pm.h" +#include "amdgpu_xgmi.h" + MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin"); MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin"); +MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin"); #define AMDGPU_RESUME_MS 2000 @@ -511,6 +515,7 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev) */ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) { + /* No doorbell on SI hardware generation */ if (adev->asic_type < CHIP_BONAIRE) { adev->doorbell.base = 0; @@ -523,15 +528,26 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev) if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET) return -EINVAL; + amdgpu_asic_init_doorbell_index(adev); + /* doorbell bar mapping */ adev->doorbell.base = pci_resource_start(adev->pdev, 2); adev->doorbell.size = pci_resource_len(adev->pdev, 2); adev->doorbell.num_doorbells = min_t(u32, adev->doorbell.size / sizeof(u32), - AMDGPU_DOORBELL_MAX_ASSIGNMENT+1); + adev->doorbell_index.max_assignment+1); if (adev->doorbell.num_doorbells == 0) return -EINVAL; + /* For Vega, reserve and map two pages on doorbell BAR since SDMA + * paging queue doorbell use the second page. The + * AMDGPU_DOORBELL64_MAX_ASSIGNMENT definition assumes all the + * doorbells are in the first page. So with paging queue enabled, + * the max num_doorbells should + 1 page (0x400 in dword) + */ + if (adev->asic_type >= CHIP_VEGA10) + adev->doorbell.num_doorbells += 0x400; + adev->doorbell.ptr = ioremap(adev->doorbell.base, adev->doorbell.num_doorbells * sizeof(u32)); @@ -652,71 +668,6 @@ void amdgpu_device_wb_free(struct amdgpu_device *adev, u32 wb) } /** - * amdgpu_device_vram_location - try to find VRAM location - * - * @adev: amdgpu device structure holding all necessary informations - * @mc: memory controller structure holding memory informations - * @base: base address at which to put VRAM - * - * Function will try to place VRAM at base address provided - * as parameter. - */ -void amdgpu_device_vram_location(struct amdgpu_device *adev, - struct amdgpu_gmc *mc, u64 base) -{ - uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; - - mc->vram_start = base; - mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; - if (limit && limit < mc->real_vram_size) - mc->real_vram_size = limit; - dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", - mc->mc_vram_size >> 20, mc->vram_start, - mc->vram_end, mc->real_vram_size >> 20); -} - -/** - * amdgpu_device_gart_location - try to find GART location - * - * @adev: amdgpu device structure holding all necessary informations - * @mc: memory controller structure holding memory informations - * - * Function will place try to place GART before or after VRAM. - * - * If GART size is bigger than space left then we ajust GART size. - * Thus function will never fails. - */ -void amdgpu_device_gart_location(struct amdgpu_device *adev, - struct amdgpu_gmc *mc) -{ - u64 size_af, size_bf; - - mc->gart_size += adev->pm.smu_prv_buffer_size; - - size_af = adev->gmc.mc_mask - mc->vram_end; - size_bf = mc->vram_start; - if (size_bf > size_af) { - if (mc->gart_size > size_bf) { - dev_warn(adev->dev, "limiting GART\n"); - mc->gart_size = size_bf; - } - mc->gart_start = 0; - } else { - if (mc->gart_size > size_af) { - dev_warn(adev->dev, "limiting GART\n"); - mc->gart_size = size_af; - } - /* VCE doesn't like it when BOs cross a 4GB segment, so align - * the GART base on a 4GB boundary as well. - */ - mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL); - } - mc->gart_end = mc->gart_start + mc->gart_size - 1; - dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", - mc->gart_size >> 20, mc->gart_start, mc->gart_end); -} - -/** * amdgpu_device_resize_fb_bar - try to resize FB BAR * * @adev: amdgpu_device pointer @@ -1397,7 +1348,12 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev) chip_name = "vega12"; break; case CHIP_RAVEN: - chip_name = "raven"; + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; break; } @@ -1581,6 +1537,92 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev) return 0; } +static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.sw) + continue; + if (adev->ip_blocks[i].status.hw) + continue; + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.hw = true; + } + } + + return 0; +} + +static int amdgpu_device_ip_hw_init_phase2(struct amdgpu_device *adev) +{ + int i, r; + + for (i = 0; i < adev->num_ip_blocks; i++) { + if (!adev->ip_blocks[i].status.sw) + continue; + if (adev->ip_blocks[i].status.hw) + continue; + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + adev->ip_blocks[i].status.hw = true; + } + + return 0; +} + +static int amdgpu_device_fw_loading(struct amdgpu_device *adev) +{ + int r = 0; + int i; + + if (adev->asic_type >= CHIP_VEGA10) { + for (i = 0; i < adev->num_ip_blocks; i++) { + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) { + if (adev->in_gpu_reset || adev->in_suspend) { + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) + break; /* sriov gpu reset, psp need to do hw_init before IH because of hw limit */ + r = adev->ip_blocks[i].version->funcs->resume(adev); + if (r) { + DRM_ERROR("resume of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } else { + r = adev->ip_blocks[i].version->funcs->hw_init(adev); + if (r) { + DRM_ERROR("hw_init of IP block <%s> failed %d\n", + adev->ip_blocks[i].version->funcs->name, r); + return r; + } + } + adev->ip_blocks[i].status.hw = true; + } + } + } + + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->load_firmware) { + r = adev->powerplay.pp_funcs->load_firmware(adev->powerplay.pp_handle); + if (r) { + pr_err("firmware loading failed\n"); + return r; + } + } + + return 0; +} + /** * amdgpu_device_ip_init - run init for hardware IPs * @@ -1628,7 +1670,9 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) /* right after GMC hw init, we create CSA */ if (amdgpu_sriov_vf(adev)) { - r = amdgpu_allocate_static_csa(adev); + r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj, + AMDGPU_GEM_DOMAIN_VRAM, + AMDGPU_CSA_SIZE); if (r) { DRM_ERROR("allocate CSA failed %d\n", r); return r; @@ -1637,20 +1681,24 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev) } } - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.sw) - continue; - if (adev->ip_blocks[i].status.hw) - continue; - r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev); - if (r) { - DRM_ERROR("hw_init of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - adev->ip_blocks[i].status.hw = true; - } + r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/ + if (r) + return r; + + r = amdgpu_device_ip_hw_init_phase1(adev); + if (r) + return r; + + r = amdgpu_device_fw_loading(adev); + if (r) + return r; + + r = amdgpu_device_ip_hw_init_phase2(adev); + if (r) + return r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_add_device(adev); amdgpu_amdkfd_device_init(adev); if (amdgpu_sriov_vf(adev)) @@ -1690,25 +1738,28 @@ static bool amdgpu_device_check_vram_lost(struct amdgpu_device *adev) } /** - * amdgpu_device_ip_late_set_cg_state - late init for clockgating + * amdgpu_device_set_cg_state - set clockgating for amdgpu device * * @adev: amdgpu_device pointer * - * Late initialization pass enabling clockgating for hardware IPs. * The list of all the hardware IPs that make up the asic is walked and the - * set_clockgating_state callbacks are run. This stage is run late - * in the init process. + * set_clockgating_state callbacks are run. + * Late initialization pass enabling clockgating for hardware IPs. + * Fini or suspend, pass disabling clockgating for hardware IPs. * Returns 0 on success, negative error code on failure. */ -static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) + +static int amdgpu_device_set_cg_state(struct amdgpu_device *adev, + enum amd_clockgating_state state) { - int i = 0, r; + int i, j, r; if (amdgpu_emu_mode == 1) return 0; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + for (j = 0; j < adev->num_ip_blocks; j++) { + i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; + if (!adev->ip_blocks[i].status.late_initialized) continue; /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && @@ -1717,7 +1768,7 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->set_clockgating_state) { /* enable clockgating to save power */ r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_GATE); + state); if (r) { DRM_ERROR("set_clockgating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -1729,15 +1780,16 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev) return 0; } -static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev) +static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_powergating_state state) { - int i = 0, r; + int i, j, r; if (amdgpu_emu_mode == 1) return 0; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + for (j = 0; j < adev->num_ip_blocks; j++) { + i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1; + if (!adev->ip_blocks[i].status.late_initialized) continue; /* skip CG for VCE/UVD, it's handled specially */ if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && @@ -1746,7 +1798,7 @@ static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->set_powergating_state) { /* enable powergating to save power */ r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev, - AMD_PG_STATE_GATE); + state); if (r) { DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n", adev->ip_blocks[i].version->funcs->name, r); @@ -1774,7 +1826,7 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) int i = 0, r; for (i = 0; i < adev->num_ip_blocks; i++) { - if (!adev->ip_blocks[i].status.valid) + if (!adev->ip_blocks[i].status.hw) continue; if (adev->ip_blocks[i].version->funcs->late_init) { r = adev->ip_blocks[i].version->funcs->late_init((void *)adev); @@ -1783,12 +1835,12 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev) adev->ip_blocks[i].version->funcs->name, r); return r; } - adev->ip_blocks[i].status.late_initialized = true; } + adev->ip_blocks[i].status.late_initialized = true; } - amdgpu_device_ip_late_set_cg_state(adev); - amdgpu_device_ip_late_set_pg_state(adev); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE); queue_delayed_work(system_wq, &adev->late_init_work, msecs_to_jiffies(AMDGPU_RESUME_MS)); @@ -1813,23 +1865,19 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) { int i, r; + if (adev->gmc.xgmi.num_physical_nodes > 1) + amdgpu_xgmi_remove_device(adev); + amdgpu_amdkfd_device_fini(adev); + + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); + /* need to disable SMC first */ for (i = 0; i < adev->num_ip_blocks; i++) { if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC && - adev->ip_blocks[i].version->funcs->set_clockgating_state) { - /* ungate blocks before hw fini so that we can shutdown the blocks safely */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - if (adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false); + if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_SMC) { r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); /* XXX handle errors */ if (r) { @@ -1845,20 +1893,6 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) if (!adev->ip_blocks[i].status.hw) continue; - if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD && - adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE && - adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN && - adev->ip_blocks[i].version->funcs->set_clockgating_state) { - /* ungate blocks before hw fini so that we can shutdown the blocks safely */ - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - return r; - } - } - r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev); /* XXX handle errors */ if (r) { @@ -1875,7 +1909,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC) { - amdgpu_free_static_csa(adev); + amdgpu_ucode_free_bo(adev); + amdgpu_free_static_csa(&adev->virt.csa_obj); amdgpu_device_wb_fini(adev); amdgpu_device_vram_scratch_fini(adev); } @@ -1905,14 +1940,47 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev) return 0; } +static int amdgpu_device_enable_mgpu_fan_boost(void) +{ + struct amdgpu_gpu_instance *gpu_ins; + struct amdgpu_device *adev; + int i, ret = 0; + + mutex_lock(&mgpu_info.mutex); + + /* + * MGPU fan boost feature should be enabled + * only when there are two or more dGPUs in + * the system + */ + if (mgpu_info.num_dgpu < 2) + goto out; + + for (i = 0; i < mgpu_info.num_dgpu; i++) { + gpu_ins = &(mgpu_info.gpu_ins[i]); + adev = gpu_ins->adev; + if (!(adev->flags & AMD_IS_APU) && + !gpu_ins->mgpu_fan_enabled && + adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->enable_mgpu_fan_boost) { + ret = amdgpu_dpm_enable_mgpu_fan_boost(adev); + if (ret) + break; + + gpu_ins->mgpu_fan_enabled = 1; + } + } + +out: + mutex_unlock(&mgpu_info.mutex); + + return ret; +} + /** - * amdgpu_device_ip_late_init_func_handler - work handler for clockgating - * - * @work: work_struct + * amdgpu_device_ip_late_init_func_handler - work handler for ib test * - * Work handler for amdgpu_device_ip_late_set_cg_state. We put the - * clockgating setup into a worker thread to speed up driver init and - * resume from suspend. + * @work: work_struct. */ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) { @@ -1923,6 +1991,23 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work) r = amdgpu_ib_ring_tests(adev); if (r) DRM_ERROR("ib ring test failed (%d).\n", r); + + r = amdgpu_device_enable_mgpu_fan_boost(); + if (r) + DRM_ERROR("enable mgpu fan boost failed (%d).\n", r); +} + +static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work) +{ + struct amdgpu_device *adev = + container_of(work, struct amdgpu_device, gfx.gfx_off_delay_work.work); + + mutex_lock(&adev->gfx.gfx_off_mutex); + if (!adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true)) + adev->gfx.gfx_off_state = true; + } + mutex_unlock(&adev->gfx.gfx_off_mutex); } /** @@ -1940,23 +2025,14 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_request_full_gpu(adev, false); + amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE); + amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE); for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; /* displays are handled separately */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) { - /* ungate blocks so that suspend can properly shut them down */ - if (adev->ip_blocks[i].version->funcs->set_clockgating_state) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ @@ -1967,9 +2043,6 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev) } } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, false); - return 0; } @@ -1988,36 +2061,12 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) { int i, r; - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_request_full_gpu(adev, false); - - /* ungate SMC block first */ - r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r); - } - - /* call smu to disable gfx off feature first when suspend */ - if (adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false); - for (i = adev->num_ip_blocks - 1; i >= 0; i--) { if (!adev->ip_blocks[i].status.valid) continue; /* displays are handled in phase1 */ if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) continue; - /* ungate blocks so that suspend can properly shut them down */ - if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC && - adev->ip_blocks[i].version->funcs->set_clockgating_state) { - r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev, - AMD_CG_STATE_UNGATE); - if (r) { - DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n", - adev->ip_blocks[i].version->funcs->name, r); - } - } /* XXX handle errors */ r = adev->ip_blocks[i].version->funcs->suspend(adev); /* XXX handle errors */ @@ -2027,9 +2076,6 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev) } } - if (amdgpu_sriov_vf(adev)) - amdgpu_virt_release_full_gpu(adev, false); - return 0; } @@ -2048,11 +2094,17 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev) { int r; + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_request_full_gpu(adev, false); + r = amdgpu_device_ip_suspend_phase1(adev); if (r) return r; r = amdgpu_device_ip_suspend_phase2(adev); + if (amdgpu_sriov_vf(adev)) + amdgpu_virt_release_full_gpu(adev, false); + return r; } @@ -2178,7 +2230,8 @@ static int amdgpu_device_ip_resume_phase2(struct amdgpu_device *adev) continue; if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON || adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GMC || - adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH || + adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP) continue; r = adev->ip_blocks[i].version->funcs->resume(adev); if (r) { @@ -2210,6 +2263,11 @@ static int amdgpu_device_ip_resume(struct amdgpu_device *adev) r = amdgpu_device_ip_resume_phase1(adev); if (r) return r; + + r = amdgpu_device_fw_loading(adev); + if (r) + return r; + r = amdgpu_device_ip_resume_phase2(adev); return r; @@ -2299,6 +2357,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev) return amdgpu_device_asic_has_dc_support(adev->asic_type); } + +static void amdgpu_device_xgmi_reset_func(struct work_struct *__work) +{ + struct amdgpu_device *adev = + container_of(__work, struct amdgpu_device, xgmi_reset_work); + + adev->asic_reset_res = amdgpu_asic_reset(adev); + if (adev->asic_reset_res) + DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s", + adev->asic_reset_res, adev->ddev->unique); +} + + /** * amdgpu_device_init - initialize the driver * @@ -2335,7 +2406,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, adev->mman.buffer_funcs = NULL; adev->mman.buffer_funcs_ring = NULL; adev->vm_manager.vm_pte_funcs = NULL; - adev->vm_manager.vm_pte_num_rings = 0; + adev->vm_manager.vm_pte_num_rqs = 0; adev->gmc.gmc_funcs = NULL; adev->fence_context = dma_fence_context_alloc(AMDGPU_MAX_RINGS); bitmap_zero(adev->gfx.pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); @@ -2367,6 +2438,7 @@ int amdgpu_device_init(struct amdgpu_device *adev, mutex_init(&adev->gfx.gpu_clock_mutex); mutex_init(&adev->srbm_mutex); mutex_init(&adev->gfx.pipe_reserve_mutex); + mutex_init(&adev->gfx.gfx_off_mutex); mutex_init(&adev->grbm_idx_mutex); mutex_init(&adev->mn_lock); mutex_init(&adev->virt.vf_errors.lock); @@ -2393,7 +2465,12 @@ int amdgpu_device_init(struct amdgpu_device *adev, INIT_DELAYED_WORK(&adev->late_init_work, amdgpu_device_ip_late_init_func_handler); + INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work, + amdgpu_device_delay_enable_gfx_off); + INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func); + + adev->gfx.gfx_off_req_count = 1; adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false; /* Registers mapping */ @@ -2413,9 +2490,6 @@ int amdgpu_device_init(struct amdgpu_device *adev, DRM_INFO("register mmio base: 0x%08X\n", (uint32_t)adev->rmmio_base); DRM_INFO("register mmio size: %u\n", (unsigned)adev->rmmio_size); - /* doorbell bar mapping */ - amdgpu_device_doorbell_init(adev); - /* io port mapping */ for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) { if (pci_resource_flags(adev->pdev, i) & IORESOURCE_IO) { @@ -2434,6 +2508,9 @@ int amdgpu_device_init(struct amdgpu_device *adev, if (r) return r; + /* doorbell bar mapping and doorbell index init*/ + amdgpu_device_doorbell_init(adev); + /* if we have > 1 VGA cards, then disable the amdgpu VGA resources */ /* this will fail for cards that aren't VGA class devices, just * ignore it */ @@ -2700,11 +2777,14 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon) if (dev->switch_power_state == DRM_SWITCH_POWER_OFF) return 0; + adev->in_suspend = true; drm_kms_helper_poll_disable(dev); if (fbcon) amdgpu_fbdev_set_suspend(adev, 1); + cancel_delayed_work_sync(&adev->late_init_work); + if (!amdgpu_device_has_dc_support(adev)) { /* turn off display hw */ drm_modeset_lock_all(dev); @@ -2883,6 +2963,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon) #ifdef CONFIG_PM dev->dev->power.disable_depth--; #endif + adev->in_suspend = false; + return 0; } @@ -3041,71 +3123,22 @@ static int amdgpu_device_ip_post_soft_reset(struct amdgpu_device *adev) } /** - * amdgpu_device_recover_vram_from_shadow - restore shadowed VRAM buffers - * - * @adev: amdgpu_device pointer - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: amdgpu_bo buffer whose shadow is being restored - * @fence: dma_fence associated with the operation - * - * Restores the VRAM buffer contents from the shadow in GTT. Used to - * restore things like GPUVM page tables after a GPU reset where - * the contents of VRAM might be lost. - * Returns 0 on success, negative error code on failure. - */ -static int amdgpu_device_recover_vram_from_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct dma_fence **fence) -{ - uint32_t domain; - int r; - - if (!bo->shadow) - return 0; - - r = amdgpu_bo_reserve(bo, true); - if (r) - return r; - domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type); - /* if bo has been evicted, then no need to recover */ - if (domain == AMDGPU_GEM_DOMAIN_VRAM) { - r = amdgpu_bo_validate(bo->shadow); - if (r) { - DRM_ERROR("bo validate failed!\n"); - goto err; - } - - r = amdgpu_bo_restore_from_shadow(adev, ring, bo, - NULL, fence, true); - if (r) { - DRM_ERROR("recover page table failed!\n"); - goto err; - } - } -err: - amdgpu_bo_unreserve(bo); - return r; -} - -/** - * amdgpu_device_handle_vram_lost - Handle the loss of VRAM contents + * amdgpu_device_recover_vram - Recover some VRAM contents * * @adev: amdgpu_device pointer * * Restores the contents of VRAM buffers from the shadows in GTT. Used to * restore things like GPUVM page tables after a GPU reset where * the contents of VRAM might be lost. - * Returns 0 on success, 1 on failure. + * + * Returns: + * 0 on success, negative error code on failure. */ -static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) +static int amdgpu_device_recover_vram(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; - struct amdgpu_bo *bo, *tmp; struct dma_fence *fence = NULL, *next = NULL; - long r = 1; - int i = 0; - long tmo; + struct amdgpu_bo *shadow; + long r = 1, tmo; if (amdgpu_sriov_runtime(adev)) tmo = msecs_to_jiffies(8000); @@ -3114,123 +3147,43 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev) DRM_INFO("recover vram bo from shadow start\n"); mutex_lock(&adev->shadow_list_lock); - list_for_each_entry_safe(bo, tmp, &adev->shadow_list, shadow_list) { - next = NULL; - amdgpu_device_recover_vram_from_shadow(adev, ring, bo, &next); + list_for_each_entry(shadow, &adev->shadow_list, shadow_list) { + + /* No need to recover an evicted BO */ + if (shadow->tbo.mem.mem_type != TTM_PL_TT || + shadow->parent->tbo.mem.mem_type != TTM_PL_VRAM) + continue; + + r = amdgpu_bo_restore_shadow(shadow, &next); + if (r) + break; + if (fence) { r = dma_fence_wait_timeout(fence, false, tmo); - if (r == 0) - pr_err("wait fence %p[%d] timeout\n", fence, i); - else if (r < 0) - pr_err("wait fence %p[%d] interrupted\n", fence, i); - if (r < 1) { - dma_fence_put(fence); - fence = next; + dma_fence_put(fence); + fence = next; + if (r <= 0) break; - } - i++; + } else { + fence = next; } - - dma_fence_put(fence); - fence = next; } mutex_unlock(&adev->shadow_list_lock); - if (fence) { - r = dma_fence_wait_timeout(fence, false, tmo); - if (r == 0) - pr_err("wait fence %p[%d] timeout\n", fence, i); - else if (r < 0) - pr_err("wait fence %p[%d] interrupted\n", fence, i); - - } + if (fence) + tmo = dma_fence_wait_timeout(fence, false, tmo); dma_fence_put(fence); - if (r > 0) - DRM_INFO("recover vram bo from shadow done\n"); - else + if (r <= 0 || tmo <= 0) { DRM_ERROR("recover vram bo from shadow failed\n"); - - return (r > 0) ? 0 : 1; -} - -/** - * amdgpu_device_reset - reset ASIC/GPU for bare-metal or passthrough - * - * @adev: amdgpu device pointer - * - * attempt to do soft-reset or full-reset and reinitialize Asic - * return 0 means succeeded otherwise failed - */ -static int amdgpu_device_reset(struct amdgpu_device *adev) -{ - bool need_full_reset, vram_lost = 0; - int r; - - need_full_reset = amdgpu_device_ip_need_full_reset(adev); - - if (!need_full_reset) { - amdgpu_device_ip_pre_soft_reset(adev); - r = amdgpu_device_ip_soft_reset(adev); - amdgpu_device_ip_post_soft_reset(adev); - if (r || amdgpu_device_ip_check_soft_reset(adev)) { - DRM_INFO("soft reset failed, will fallback to full reset!\n"); - need_full_reset = true; - } - } - - if (need_full_reset) { - r = amdgpu_device_ip_suspend(adev); - -retry: - r = amdgpu_asic_reset(adev); - /* post card */ - amdgpu_atom_asic_init(adev->mode_info.atom_context); - - if (!r) { - dev_info(adev->dev, "GPU reset succeeded, trying to resume\n"); - r = amdgpu_device_ip_resume_phase1(adev); - if (r) - goto out; - - vram_lost = amdgpu_device_check_vram_lost(adev); - if (vram_lost) { - DRM_ERROR("VRAM is lost!\n"); - atomic_inc(&adev->vram_lost_counter); - } - - r = amdgpu_gtt_mgr_recover( - &adev->mman.bdev.man[TTM_PL_TT]); - if (r) - goto out; - - r = amdgpu_device_ip_resume_phase2(adev); - if (r) - goto out; - - if (vram_lost) - amdgpu_device_fill_reset_magic(adev); - } + return -EIO; } -out: - if (!r) { - amdgpu_irq_gpu_reset_resume_helper(adev); - r = amdgpu_ib_ring_tests(adev); - if (r) { - dev_err(adev->dev, "ib ring test failed (%d).\n", r); - r = amdgpu_device_ip_suspend(adev); - need_full_reset = true; - goto retry; - } - } - - if (!r && ((need_full_reset && !(adev->flags & AMD_IS_APU)) || vram_lost)) - r = amdgpu_device_handle_vram_lost(adev); - - return r; + DRM_INFO("recover vram bo from shadow done\n"); + return 0; } + /** * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf * @@ -3260,6 +3213,10 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev, /* we need recover gart prior to run SMC/CP/SDMA resume */ amdgpu_gtt_mgr_recover(&adev->mman.bdev.man[TTM_PL_TT]); + r = amdgpu_device_fw_loading(adev); + if (r) + return r; + /* now we are okay to resume SMC/CP/SDMA */ r = amdgpu_device_ip_reinit_late_sriov(adev); if (r) @@ -3272,49 +3229,67 @@ error: amdgpu_virt_release_full_gpu(adev, true); if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) { atomic_inc(&adev->vram_lost_counter); - r = amdgpu_device_handle_vram_lost(adev); + r = amdgpu_device_recover_vram(adev); } return r; } /** - * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * amdgpu_device_should_recover_gpu - check if we should try GPU recovery * * @adev: amdgpu device pointer - * @job: which job trigger hang - * @force: forces reset regardless of amdgpu_gpu_recovery * - * Attempt to reset the GPU if it has hung (all asics). - * Returns 0 for success or an error on failure. + * Check amdgpu_gpu_recovery and SRIOV status to see if we should try to recover + * a hung GPU. */ -int amdgpu_device_gpu_recover(struct amdgpu_device *adev, - struct amdgpu_job *job, bool force) +bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev) { - int i, r, resched; - - if (!force && !amdgpu_device_ip_check_soft_reset(adev)) { - DRM_INFO("No hardware hang detected. Did some blocks stall?\n"); - return 0; + if (!amdgpu_device_ip_check_soft_reset(adev)) { + DRM_INFO("Timeout, but no hardware hang detected.\n"); + return false; } - if (!force && (amdgpu_gpu_recovery == 0 || - (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev)))) { - DRM_INFO("GPU recovery disabled.\n"); - return 0; + if (amdgpu_gpu_recovery == 0) + goto disabled; + + if (amdgpu_sriov_vf(adev)) + return true; + + if (amdgpu_gpu_recovery == -1) { + switch (adev->asic_type) { + case CHIP_BONAIRE: + case CHIP_HAWAII: + case CHIP_TOPAZ: + case CHIP_TONGA: + case CHIP_FIJI: + case CHIP_POLARIS10: + case CHIP_POLARIS11: + case CHIP_POLARIS12: + case CHIP_VEGAM: + case CHIP_VEGA20: + case CHIP_VEGA10: + case CHIP_VEGA12: + break; + default: + goto disabled; + } } - dev_info(adev->dev, "GPU reset begin!\n"); + return true; - mutex_lock(&adev->lock_reset); - atomic_inc(&adev->gpu_reset_counter); - adev->in_gpu_reset = 1; +disabled: + DRM_INFO("GPU recovery disabled.\n"); + return false; +} - /* Block kfd */ - amdgpu_amdkfd_pre_reset(adev); - /* block TTM */ - resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev); +static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev, + struct amdgpu_job *job, + bool *need_full_reset_arg) +{ + int i, r = 0; + bool need_full_reset = *need_full_reset_arg; /* block all schedulers and reset given job's ring */ for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { @@ -3325,7 +3300,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, kthread_park(ring->sched.thread); - if (job && job->base.sched == &ring->sched) + if (job && job->base.sched != &ring->sched) continue; drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL); @@ -3334,10 +3309,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, amdgpu_fence_driver_force_completion(ring); } - if (amdgpu_sriov_vf(adev)) - r = amdgpu_device_reset_sriov(adev, job ? false : true); - else - r = amdgpu_device_reset(adev); + + + if (!amdgpu_sriov_vf(adev)) { + + if (!need_full_reset) + need_full_reset = amdgpu_device_ip_need_full_reset(adev); + + if (!need_full_reset) { + amdgpu_device_ip_pre_soft_reset(adev); + r = amdgpu_device_ip_soft_reset(adev); + amdgpu_device_ip_post_soft_reset(adev); + if (r || amdgpu_device_ip_check_soft_reset(adev)) { + DRM_INFO("soft reset failed, will fallback to full reset!\n"); + need_full_reset = true; + } + } + + if (need_full_reset) + r = amdgpu_device_ip_suspend(adev); + + *need_full_reset_arg = need_full_reset; + } + + return r; +} + +static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive, + struct list_head *device_list_handle, + bool *need_full_reset_arg) +{ + struct amdgpu_device *tmp_adev = NULL; + bool need_full_reset = *need_full_reset_arg, vram_lost = false; + int r = 0; + + /* + * ASIC reset has to be done on all HGMI hive nodes ASAP + * to allow proper links negotiation in FW (within 1 sec) + */ + if (need_full_reset) { + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + /* For XGMI run all resets in parallel to speed up the process */ + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work)) + r = -EALREADY; + } else + r = amdgpu_asic_reset(tmp_adev); + + if (r) { + DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s", + r, tmp_adev->ddev->unique); + break; + } + } + + /* For XGMI wait for all PSP resets to complete before proceed */ + if (!r) { + list_for_each_entry(tmp_adev, device_list_handle, + gmc.xgmi.head) { + if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) { + flush_work(&tmp_adev->xgmi_reset_work); + r = tmp_adev->asic_reset_res; + if (r) + break; + } + } + } + } + + + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + if (need_full_reset) { + /* post card */ + if (amdgpu_atom_asic_init(tmp_adev->mode_info.atom_context)) + DRM_WARN("asic atom init failed!"); + + if (!r) { + dev_info(tmp_adev->dev, "GPU reset succeeded, trying to resume\n"); + r = amdgpu_device_ip_resume_phase1(tmp_adev); + if (r) + goto out; + + vram_lost = amdgpu_device_check_vram_lost(tmp_adev); + if (vram_lost) { + DRM_ERROR("VRAM is lost!\n"); + atomic_inc(&tmp_adev->vram_lost_counter); + } + + r = amdgpu_gtt_mgr_recover( + &tmp_adev->mman.bdev.man[TTM_PL_TT]); + if (r) + goto out; + + r = amdgpu_device_fw_loading(tmp_adev); + if (r) + return r; + + r = amdgpu_device_ip_resume_phase2(tmp_adev); + if (r) + goto out; + + if (vram_lost) + amdgpu_device_fill_reset_magic(tmp_adev); + + /* Update PSP FW topology after reset */ + if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1) + r = amdgpu_xgmi_update_topology(hive, tmp_adev); + } + } + + +out: + if (!r) { + amdgpu_irq_gpu_reset_resume_helper(tmp_adev); + r = amdgpu_ib_ring_tests(tmp_adev); + if (r) { + dev_err(tmp_adev->dev, "ib ring test failed (%d).\n", r); + r = amdgpu_device_ip_suspend(tmp_adev); + need_full_reset = true; + r = -EAGAIN; + goto end; + } + } + + if (!r) + r = amdgpu_device_recover_vram(tmp_adev); + else + tmp_adev->asic_reset_res = r; + } + +end: + *need_full_reset_arg = need_full_reset; + return r; +} + +static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int i; for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { struct amdgpu_ring *ring = adev->rings[i]; @@ -3349,7 +3458,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, * or all rings (in the case @job is NULL) * after above amdgpu_reset accomplished */ - if ((!job || job->base.sched == &ring->sched) && !r) + if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res) drm_sched_job_recovery(&ring->sched); kthread_unpark(ring->sched.thread); @@ -3359,21 +3468,144 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev, drm_helper_resume_force_mode(adev->ddev); } - ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched); + adev->asic_reset_res = 0; +} - if (r) { - /* bad news, how to tell it to userspace ? */ - dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); - amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); - } else { - dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter)); - } +static void amdgpu_device_lock_adev(struct amdgpu_device *adev) +{ + mutex_lock(&adev->lock_reset); + atomic_inc(&adev->gpu_reset_counter); + adev->in_gpu_reset = 1; + /* Block kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_pre_reset(adev); +} - /*unlock kfd */ - amdgpu_amdkfd_post_reset(adev); +static void amdgpu_device_unlock_adev(struct amdgpu_device *adev) +{ + /*unlock kfd: SRIOV would do it separately */ + if (!amdgpu_sriov_vf(adev)) + amdgpu_amdkfd_post_reset(adev); amdgpu_vf_error_trans_all(adev); adev->in_gpu_reset = 0; mutex_unlock(&adev->lock_reset); +} + + +/** + * amdgpu_device_gpu_recover - reset the asic and recover scheduler + * + * @adev: amdgpu device pointer + * @job: which job trigger hang + * + * Attempt to reset the GPU if it has hung (all asics). + * Attempt to do soft-reset or full-reset and reinitialize Asic + * Returns 0 for success or an error on failure. + */ + +int amdgpu_device_gpu_recover(struct amdgpu_device *adev, + struct amdgpu_job *job) +{ + int r; + struct amdgpu_hive_info *hive = NULL; + bool need_full_reset = false; + struct amdgpu_device *tmp_adev = NULL; + struct list_head device_list, *device_list_handle = NULL; + + INIT_LIST_HEAD(&device_list); + + dev_info(adev->dev, "GPU reset begin!\n"); + + /* + * In case of XGMI hive disallow concurrent resets to be triggered + * by different nodes. No point also since the one node already executing + * reset will also reset all the other nodes in the hive. + */ + hive = amdgpu_get_xgmi_hive(adev); + if (hive && adev->gmc.xgmi.num_physical_nodes > 1 && + !mutex_trylock(&hive->hive_lock)) + return 0; + + /* Start with adev pre asic reset first for soft reset check.*/ + amdgpu_device_lock_adev(adev); + r = amdgpu_device_pre_asic_reset(adev, + job, + &need_full_reset); + if (r) { + /*TODO Should we stop ?*/ + DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", + r, adev->ddev->unique); + adev->asic_reset_res = r; + } + + /* Build list of devices to reset */ + if (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) { + if (!hive) { + amdgpu_device_unlock_adev(adev); + return -ENODEV; + } + + /* + * In case we are in XGMI hive mode device reset is done for all the + * nodes in the hive to retrain all XGMI links and hence the reset + * sequence is executed in loop on all nodes. + */ + device_list_handle = &hive->device_list; + } else { + list_add_tail(&adev->gmc.xgmi.head, &device_list); + device_list_handle = &device_list; + } + +retry: /* Rest of adevs pre asic reset from XGMI hive. */ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + + if (tmp_adev == adev) + continue; + + amdgpu_device_lock_adev(tmp_adev); + r = amdgpu_device_pre_asic_reset(tmp_adev, + NULL, + &need_full_reset); + /*TODO Should we stop ?*/ + if (r) { + DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ", + r, tmp_adev->ddev->unique); + tmp_adev->asic_reset_res = r; + } + } + + /* Actual ASIC resets if needed.*/ + /* TODO Implement XGMI hive reset logic for SRIOV */ + if (amdgpu_sriov_vf(adev)) { + r = amdgpu_device_reset_sriov(adev, job ? false : true); + if (r) + adev->asic_reset_res = r; + } else { + r = amdgpu_do_asic_reset(hive, device_list_handle, &need_full_reset); + if (r && r == -EAGAIN) + goto retry; + } + + /* Post ASIC reset for all devs .*/ + list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) { + amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL); + + if (r) { + /* bad news, how to tell it to userspace ? */ + dev_info(tmp_adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter)); + amdgpu_vf_error_put(tmp_adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r); + } else { + dev_info(tmp_adev->dev, "GPU reset(%d) succeeded!\n", atomic_read(&adev->gpu_reset_counter)); + } + + amdgpu_device_unlock_adev(tmp_adev); + } + + if (hive && adev->gmc.xgmi.num_physical_nodes > 1) + mutex_unlock(&hive->hive_lock); + + if (r) + dev_info(adev->dev, "GPU reset end with ret = %d\n", r); return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c index 6748cd7fc129..15ce7e681d67 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c @@ -626,6 +626,18 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev) "dither", amdgpu_dither_enum_list, sz); + if (amdgpu_device_has_dc_support(adev)) { + adev->mode_info.max_bpc_property = + drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16); + if (!adev->mode_info.max_bpc_property) + return -ENOMEM; + adev->mode_info.abm_level_property = + drm_property_create_range(adev->ddev, 0, + "abm level", 0, 4); + if (!adev->mode_info.abm_level_property) + return -ENOMEM; + } + return 0; } @@ -850,7 +862,12 @@ int amdgpu_display_get_crtc_scanoutpos(struct drm_device *dev, /* Inside "upper part" of vblank area? Apply corrective offset if so: */ if (in_vbl && (*vpos >= vbl_start)) { vtotal = mode->crtc_vtotal; - *vpos = *vpos - vtotal; + + /* With variable refresh rate displays the vpos can exceed + * the vtotal value. Clamp to 0 to return -vbl_end instead + * of guessing the remaining number of lines until scanout. + */ + *vpos = (*vpos < vtotal) ? (*vpos - vtotal) : 0; } /* Correct for shifted end of vbl at vbl_end. */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h index f66e3e3fef0a..06b922fe0d42 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.h @@ -23,6 +23,21 @@ #ifndef __AMDGPU_DISPLAY_H__ #define __AMDGPU_DISPLAY_H__ +#define amdgpu_display_vblank_get_counter(adev, crtc) (adev)->mode_info.funcs->vblank_get_counter((adev), (crtc)) +#define amdgpu_display_backlight_set_level(adev, e, l) (adev)->mode_info.funcs->backlight_set_level((e), (l)) +#define amdgpu_display_backlight_get_level(adev, e) (adev)->mode_info.funcs->backlight_get_level((e)) +#define amdgpu_display_hpd_sense(adev, h) (adev)->mode_info.funcs->hpd_sense((adev), (h)) +#define amdgpu_display_hpd_set_polarity(adev, h) (adev)->mode_info.funcs->hpd_set_polarity((adev), (h)) +#define amdgpu_display_hpd_get_gpio_reg(adev) (adev)->mode_info.funcs->hpd_get_gpio_reg((adev)) +#define amdgpu_display_bandwidth_update(adev) (adev)->mode_info.funcs->bandwidth_update((adev)) +#define amdgpu_display_page_flip(adev, crtc, base, async) (adev)->mode_info.funcs->page_flip((adev), (crtc), (base), (async)) +#define amdgpu_display_page_flip_get_scanoutpos(adev, crtc, vbl, pos) (adev)->mode_info.funcs->page_flip_get_scanoutpos((adev), (crtc), (vbl), (pos)) +#define amdgpu_display_add_encoder(adev, e, s, c) (adev)->mode_info.funcs->add_encoder((adev), (e), (s), (c)) +#define amdgpu_display_add_connector(adev, ci, sd, ct, ib, coi, h, r) (adev)->mode_info.funcs->add_connector((adev), (ci), (sd), (ct), (ib), (coi), (h), (r)) + +int amdgpu_display_freesync_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +void amdgpu_display_update_priority(struct amdgpu_device *adev); uint32_t amdgpu_display_supported_domains(struct amdgpu_device *adev); struct drm_framebuffer * amdgpu_display_user_framebuffer_create(struct drm_device *dev, diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h new file mode 100644 index 000000000000..be620b29f4aa --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h @@ -0,0 +1,243 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +/* + * GPU doorbell structures, functions & helpers + */ +struct amdgpu_doorbell { + /* doorbell mmio */ + resource_size_t base; + resource_size_t size; + u32 __iomem *ptr; + u32 num_doorbells; /* Number of doorbells actually reserved for amdgpu. */ +}; + +/* Reserved doorbells for amdgpu (including multimedia). + * KFD can use all the rest in the 2M doorbell bar. + * For asic before vega10, doorbell is 32-bit, so the + * index/offset is in dword. For vega10 and after, doorbell + * can be 64-bit, so the index defined is in qword. + */ +struct amdgpu_doorbell_index { + uint32_t kiq; + uint32_t mec_ring0; + uint32_t mec_ring1; + uint32_t mec_ring2; + uint32_t mec_ring3; + uint32_t mec_ring4; + uint32_t mec_ring5; + uint32_t mec_ring6; + uint32_t mec_ring7; + uint32_t userqueue_start; + uint32_t userqueue_end; + uint32_t gfx_ring0; + uint32_t sdma_engine0; + uint32_t sdma_engine1; + uint32_t sdma_engine2; + uint32_t sdma_engine3; + uint32_t sdma_engine4; + uint32_t sdma_engine5; + uint32_t sdma_engine6; + uint32_t sdma_engine7; + uint32_t ih; + union { + struct { + uint32_t vcn_ring0_1; + uint32_t vcn_ring2_3; + uint32_t vcn_ring4_5; + uint32_t vcn_ring6_7; + } vcn; + struct { + uint32_t uvd_ring0_1; + uint32_t uvd_ring2_3; + uint32_t uvd_ring4_5; + uint32_t uvd_ring6_7; + uint32_t vce_ring0_1; + uint32_t vce_ring2_3; + uint32_t vce_ring4_5; + uint32_t vce_ring6_7; + } uvd_vce; + }; + uint32_t max_assignment; +}; + +typedef enum _AMDGPU_DOORBELL_ASSIGNMENT +{ + AMDGPU_DOORBELL_KIQ = 0x000, + AMDGPU_DOORBELL_HIQ = 0x001, + AMDGPU_DOORBELL_DIQ = 0x002, + AMDGPU_DOORBELL_MEC_RING0 = 0x010, + AMDGPU_DOORBELL_MEC_RING1 = 0x011, + AMDGPU_DOORBELL_MEC_RING2 = 0x012, + AMDGPU_DOORBELL_MEC_RING3 = 0x013, + AMDGPU_DOORBELL_MEC_RING4 = 0x014, + AMDGPU_DOORBELL_MEC_RING5 = 0x015, + AMDGPU_DOORBELL_MEC_RING6 = 0x016, + AMDGPU_DOORBELL_MEC_RING7 = 0x017, + AMDGPU_DOORBELL_GFX_RING0 = 0x020, + AMDGPU_DOORBELL_sDMA_ENGINE0 = 0x1E0, + AMDGPU_DOORBELL_sDMA_ENGINE1 = 0x1E1, + AMDGPU_DOORBELL_IH = 0x1E8, + AMDGPU_DOORBELL_MAX_ASSIGNMENT = 0x3FF, + AMDGPU_DOORBELL_INVALID = 0xFFFF +} AMDGPU_DOORBELL_ASSIGNMENT; + +typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT +{ + /* Compute + GFX: 0~255 */ + AMDGPU_VEGA20_DOORBELL_KIQ = 0x000, + AMDGPU_VEGA20_DOORBELL_HIQ = 0x001, + AMDGPU_VEGA20_DOORBELL_DIQ = 0x002, + AMDGPU_VEGA20_DOORBELL_MEC_RING0 = 0x003, + AMDGPU_VEGA20_DOORBELL_MEC_RING1 = 0x004, + AMDGPU_VEGA20_DOORBELL_MEC_RING2 = 0x005, + AMDGPU_VEGA20_DOORBELL_MEC_RING3 = 0x006, + AMDGPU_VEGA20_DOORBELL_MEC_RING4 = 0x007, + AMDGPU_VEGA20_DOORBELL_MEC_RING5 = 0x008, + AMDGPU_VEGA20_DOORBELL_MEC_RING6 = 0x009, + AMDGPU_VEGA20_DOORBELL_MEC_RING7 = 0x00A, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_START = 0x00B, + AMDGPU_VEGA20_DOORBELL_USERQUEUE_END = 0x08A, + AMDGPU_VEGA20_DOORBELL_GFX_RING0 = 0x08B, + /* SDMA:256~335*/ + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0 = 0x100, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1 = 0x10A, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2 = 0x114, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3 = 0x11E, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4 = 0x128, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5 = 0x132, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6 = 0x13C, + AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7 = 0x146, + /* IH: 376~391 */ + AMDGPU_VEGA20_DOORBELL_IH = 0x178, + /* MMSCH: 392~407 + * overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_VEGA20_DOORBELL64_VCN0_1 = 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_VEGA20_DOORBELL64_VCN2_3 = 0x189, + AMDGPU_VEGA20_DOORBELL64_VCN4_5 = 0x18A, + AMDGPU_VEGA20_DOORBELL64_VCN6_7 = 0x18B, + + AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1 = 0x188, + AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3 = 0x189, + AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5 = 0x18A, + AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7 = 0x18B, + + AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1 = 0x18C, + AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3 = 0x18D, + AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5 = 0x18E, + AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7 = 0x18F, + AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT = 0x18F, + AMDGPU_VEGA20_DOORBELL_INVALID = 0xFFFF +} AMDGPU_VEGA20_DOORBELL_ASSIGNMENT; + +/* + * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space + */ +typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT +{ + /* + * All compute related doorbells: kiq, hiq, diq, traditional compute queue, user queue, should locate in + * a continues range so that programming CP_MEC_DOORBELL_RANGE_LOWER/UPPER can cover this range. + * Compute related doorbells are allocated from 0x00 to 0x8a + */ + + + /* kernel scheduling */ + AMDGPU_DOORBELL64_KIQ = 0x00, + + /* HSA interface queue and debug queue */ + AMDGPU_DOORBELL64_HIQ = 0x01, + AMDGPU_DOORBELL64_DIQ = 0x02, + + /* Compute engines */ + AMDGPU_DOORBELL64_MEC_RING0 = 0x03, + AMDGPU_DOORBELL64_MEC_RING1 = 0x04, + AMDGPU_DOORBELL64_MEC_RING2 = 0x05, + AMDGPU_DOORBELL64_MEC_RING3 = 0x06, + AMDGPU_DOORBELL64_MEC_RING4 = 0x07, + AMDGPU_DOORBELL64_MEC_RING5 = 0x08, + AMDGPU_DOORBELL64_MEC_RING6 = 0x09, + AMDGPU_DOORBELL64_MEC_RING7 = 0x0a, + + /* User queue doorbell range (128 doorbells) */ + AMDGPU_DOORBELL64_USERQUEUE_START = 0x0b, + AMDGPU_DOORBELL64_USERQUEUE_END = 0x8a, + + /* Graphics engine */ + AMDGPU_DOORBELL64_GFX_RING0 = 0x8b, + + /* + * Other graphics doorbells can be allocated here: from 0x8c to 0xdf + * Graphics voltage island aperture 1 + * default non-graphics QWORD index is 0xe0 - 0xFF inclusive + */ + + /* For vega10 sriov, the sdma doorbell must be fixed as follow + * to keep the same setting with host driver, or it will + * happen conflicts + */ + AMDGPU_DOORBELL64_sDMA_ENGINE0 = 0xF0, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE0 = 0xF1, + AMDGPU_DOORBELL64_sDMA_ENGINE1 = 0xF2, + AMDGPU_DOORBELL64_sDMA_HI_PRI_ENGINE1 = 0xF3, + + /* Interrupt handler */ + AMDGPU_DOORBELL64_IH = 0xF4, /* For legacy interrupt ring buffer */ + AMDGPU_DOORBELL64_IH_RING1 = 0xF5, /* For page migration request log */ + AMDGPU_DOORBELL64_IH_RING2 = 0xF6, /* For page migration translation/invalidation log */ + + /* VCN engine use 32 bits doorbell */ + AMDGPU_DOORBELL64_VCN0_1 = 0xF8, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */ + AMDGPU_DOORBELL64_VCN2_3 = 0xF9, + AMDGPU_DOORBELL64_VCN4_5 = 0xFA, + AMDGPU_DOORBELL64_VCN6_7 = 0xFB, + + /* overlap the doorbell assignment with VCN as they are mutually exclusive + * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD + */ + AMDGPU_DOORBELL64_UVD_RING0_1 = 0xF8, + AMDGPU_DOORBELL64_UVD_RING2_3 = 0xF9, + AMDGPU_DOORBELL64_UVD_RING4_5 = 0xFA, + AMDGPU_DOORBELL64_UVD_RING6_7 = 0xFB, + + AMDGPU_DOORBELL64_VCE_RING0_1 = 0xFC, + AMDGPU_DOORBELL64_VCE_RING2_3 = 0xFD, + AMDGPU_DOORBELL64_VCE_RING4_5 = 0xFE, + AMDGPU_DOORBELL64_VCE_RING6_7 = 0xFF, + + AMDGPU_DOORBELL64_MAX_ASSIGNMENT = 0xFF, + AMDGPU_DOORBELL64_INVALID = 0xFFFF +} AMDGPU_DOORBELL64_ASSIGNMENT; + +u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v); +u64 amdgpu_mm_rdoorbell64(struct amdgpu_device *adev, u32 index); +void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v); + +#define RDOORBELL32(index) amdgpu_mm_rdoorbell(adev, (index)) +#define WDOORBELL32(index, v) amdgpu_mm_wdoorbell(adev, (index), (v)) +#define RDOORBELL64(index) amdgpu_mm_rdoorbell64(adev, (index)) +#define WDOORBELL64(index, v) amdgpu_mm_wdoorbell64(adev, (index), (v)) + diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h index ff24e1cc5b65..f972cd156795 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h @@ -278,6 +278,9 @@ enum amdgpu_pcie_gen { #define amdgpu_dpm_get_fan_speed_rpm(adev, s) \ ((adev)->powerplay.pp_funcs->get_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) +#define amdgpu_dpm_set_fan_speed_rpm(adev, s) \ + ((adev)->powerplay.pp_funcs->set_fan_speed_rpm)((adev)->powerplay.pp_handle, (s)) + #define amdgpu_dpm_get_sclk(adev, l) \ ((adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (l))) @@ -357,6 +360,10 @@ enum amdgpu_pcie_gen { ((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\ (adev)->powerplay.pp_handle, type, parameter, size)) +#define amdgpu_dpm_enable_mgpu_fan_boost(adev) \ + ((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\ + (adev)->powerplay.pp_handle)) + struct amdgpu_dpm { struct amdgpu_ps *ps; /* number of valid power states */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c index 0f41d8647376..c806f984bcc5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c @@ -36,6 +36,7 @@ #include "amdgpu.h" #include "amdgpu_irq.h" +#include "amdgpu_gem.h" #include "amdgpu_amdkfd.h" @@ -126,6 +127,12 @@ int amdgpu_compute_multipipe = -1; int amdgpu_gpu_recovery = -1; /* auto */ int amdgpu_emu_mode = 0; uint amdgpu_smu_memory_pool_size = 0; +/* FBC (bit 0) disabled by default*/ +uint amdgpu_dc_feature_mask = 0; + +struct amdgpu_mgpu_info mgpu_info = { + .mutex = __MUTEX_INITIALIZER(mgpu_info.mutex), +}; /** * DOC: vramlimit (int) @@ -447,9 +454,10 @@ module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444); /** * DOC: param_buf_per_se (int) - * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx). + * Override the size of Off-Chip Parameter Cache per Shader Engine in Byte. + * The default is 0 (depending on gfx). */ -MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)"); +MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Parameter Cache per Shader Engine (default depending on gfx)"); module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444); /** @@ -531,6 +539,110 @@ MODULE_PARM_DESC(smu_memory_pool_size, "0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte"); module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444); +#ifdef CONFIG_HSA_AMD +/** + * DOC: sched_policy (int) + * Set scheduling policy. Default is HWS(hardware scheduling) with over-subscription. + * Setting 1 disables over-subscription. Setting 2 disables HWS and statically + * assigns queues to HQDs. + */ +int sched_policy = KFD_SCHED_POLICY_HWS; +module_param(sched_policy, int, 0444); +MODULE_PARM_DESC(sched_policy, + "Scheduling policy (0 = HWS (Default), 1 = HWS without over-subscription, 2 = Non-HWS (Used for debugging only)"); + +/** + * DOC: hws_max_conc_proc (int) + * Maximum number of processes that HWS can schedule concurrently. The maximum is the + * number of VMIDs assigned to the HWS, which is also the default. + */ +int hws_max_conc_proc = 8; +module_param(hws_max_conc_proc, int, 0444); +MODULE_PARM_DESC(hws_max_conc_proc, + "Max # processes HWS can execute concurrently when sched_policy=0 (0 = no concurrency, #VMIDs for KFD = Maximum(default))"); + +/** + * DOC: cwsr_enable (int) + * CWSR(compute wave store and resume) allows the GPU to preempt shader execution in + * the middle of a compute wave. Default is 1 to enable this feature. Setting 0 + * disables it. + */ +int cwsr_enable = 1; +module_param(cwsr_enable, int, 0444); +MODULE_PARM_DESC(cwsr_enable, "CWSR enable (0 = Off, 1 = On (Default))"); + +/** + * DOC: max_num_of_queues_per_device (int) + * Maximum number of queues per device. Valid setting is between 1 and 4096. Default + * is 4096. + */ +int max_num_of_queues_per_device = KFD_MAX_NUM_OF_QUEUES_PER_DEVICE_DEFAULT; +module_param(max_num_of_queues_per_device, int, 0444); +MODULE_PARM_DESC(max_num_of_queues_per_device, + "Maximum number of supported queues per device (1 = Minimum, 4096 = default)"); + +/** + * DOC: send_sigterm (int) + * Send sigterm to HSA process on unhandled exceptions. Default is not to send sigterm + * but just print errors on dmesg. Setting 1 enables sending sigterm. + */ +int send_sigterm; +module_param(send_sigterm, int, 0444); +MODULE_PARM_DESC(send_sigterm, + "Send sigterm to HSA process on unhandled exception (0 = disable, 1 = enable)"); + +/** + * DOC: debug_largebar (int) + * Set debug_largebar as 1 to enable simulating large-bar capability on non-large bar + * system. This limits the VRAM size reported to ROCm applications to the visible + * size, usually 256MB. + * Default value is 0, diabled. + */ +int debug_largebar; +module_param(debug_largebar, int, 0444); +MODULE_PARM_DESC(debug_largebar, + "Debug large-bar flag used to simulate large-bar capability on non-large bar machine (0 = disable, 1 = enable)"); + +/** + * DOC: ignore_crat (int) + * Ignore CRAT table during KFD initialization. By default, KFD uses the ACPI CRAT + * table to get information about AMD APUs. This option can serve as a workaround on + * systems with a broken CRAT table. + */ +int ignore_crat; +module_param(ignore_crat, int, 0444); +MODULE_PARM_DESC(ignore_crat, + "Ignore CRAT table during KFD initialization (0 = use CRAT (default), 1 = ignore CRAT)"); + +/** + * DOC: noretry (int) + * This parameter sets sh_mem_config.retry_disable. Default value, 0, enables retry. + * Setting 1 disables retry. + * Retry is needed for recoverable page faults. + */ +int noretry; +module_param(noretry, int, 0644); +MODULE_PARM_DESC(noretry, + "Set sh_mem_config.retry_disable on Vega10 (0 = retry enabled (default), 1 = retry disabled)"); + +/** + * DOC: halt_if_hws_hang (int) + * Halt if HWS hang is detected. Default value, 0, disables the halt on hang. + * Setting 1 enables halt on hang. + */ +int halt_if_hws_hang; +module_param(halt_if_hws_hang, int, 0644); +MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)"); +#endif + +/** + * DOC: dcfeaturemask (uint) + * Override display features enabled. See enum DC_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h. + * The default is the current set of stable display features. + */ +MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))"); +module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444); + static const struct pci_device_id pciidlist[] = { #ifdef CONFIG_DRM_AMDGPU_SI {0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI}, @@ -753,6 +865,7 @@ static const struct pci_device_id pciidlist[] = { /* VEGAM */ {0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, {0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, + {0x1002, 0x694F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM}, /* Vega 10 */ {0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, @@ -761,7 +874,13 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x6869, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x686a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x686b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x686d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x686e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, + {0x1002, 0x686f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, {0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10}, /* Vega 12 */ {0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, @@ -770,14 +889,16 @@ static const struct pci_device_id pciidlist[] = { {0x1002, 0x69A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, {0x1002, 0x69AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12}, /* Vega 20 */ - {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, - {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20|AMD_EXP_HW_SUPPORT}, + {0x1002, 0x66A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, + {0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20}, /* Raven */ {0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, + {0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU}, {0, 0, 0} }; @@ -786,28 +907,6 @@ MODULE_DEVICE_TABLE(pci, pciidlist); static struct drm_driver kms_driver; -static int amdgpu_kick_out_firmware_fb(struct pci_dev *pdev) -{ - struct apertures_struct *ap; - bool primary = false; - - ap = alloc_apertures(1); - if (!ap) - return -ENOMEM; - - ap->ranges[0].base = pci_resource_start(pdev, 0); - ap->ranges[0].size = pci_resource_len(pdev, 0); - -#ifdef CONFIG_X86 - primary = pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW; -#endif - drm_fb_helper_remove_conflicting_framebuffers(ap, "amdgpudrmfb", primary); - kfree(ap); - - return 0; -} - - static int amdgpu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent) { @@ -826,30 +925,18 @@ static int amdgpu_pci_probe(struct pci_dev *pdev, return -ENODEV; } - /* - * Initialize amdkfd before starting radeon. If it was not loaded yet, - * defer radeon probing - */ - ret = amdgpu_amdkfd_init(); - if (ret == -EPROBE_DEFER) - return ret; - /* Get rid of things like offb */ - ret = amdgpu_kick_out_firmware_fb(pdev); + ret = drm_fb_helper_remove_conflicting_pci_framebuffers(pdev, 0, "amdgpudrmfb"); if (ret) return ret; - /* warn the user if they mix atomic and non-atomic capable GPUs */ - if ((kms_driver.driver_features & DRIVER_ATOMIC) && !supports_atomic) - DRM_ERROR("Mixing atomic and non-atomic capable GPUs!\n"); - /* support atomic early so the atomic debugfs stuff gets created */ - if (supports_atomic) - kms_driver.driver_features |= DRIVER_ATOMIC; - dev = drm_dev_alloc(&kms_driver, &pdev->dev); if (IS_ERR(dev)) return PTR_ERR(dev); + if (!supports_atomic) + dev->driver_features &= ~DRIVER_ATOMIC; + ret = pci_enable_device(pdev); if (ret) goto err_free; @@ -882,8 +969,8 @@ amdgpu_pci_remove(struct pci_dev *pdev) { struct drm_device *dev = pci_get_drvdata(pdev); - drm_dev_unregister(dev); - drm_dev_put(dev); + DRM_ERROR("Device removal is currently not supported outside of fbcon\n"); + drm_dev_unplug(dev); pci_disable_device(pdev); pci_set_drvdata(pdev, NULL); } @@ -1101,7 +1188,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe, static struct drm_driver kms_driver = { .driver_features = - DRIVER_USE_AGP | + DRIVER_USE_AGP | DRIVER_ATOMIC | DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM | DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ, .load = amdgpu_driver_load_kms, @@ -1142,9 +1229,6 @@ static struct drm_driver kms_driver = { .patchlevel = KMS_DRIVER_PATCHLEVEL, }; -static struct drm_driver *driver; -static struct pci_driver *pdriver; - static struct pci_driver amdgpu_kms_pci_driver = { .name = DRIVER_NAME, .id_table = pciidlist, @@ -1174,12 +1258,14 @@ static int __init amdgpu_init(void) goto error_fence; DRM_INFO("amdgpu kernel modesetting enabled.\n"); - driver = &kms_driver; - pdriver = &amdgpu_kms_pci_driver; - driver->num_ioctls = amdgpu_max_kms_ioctl; + kms_driver.num_ioctls = amdgpu_max_kms_ioctl; amdgpu_register_atpx_handler(); + + /* Ignore KFD init failures. Normal when CONFIG_HSA_AMD is not set. */ + amdgpu_amdkfd_init(); + /* let modprobe override vga console setting */ - return pci_register_driver(pdriver); + return pci_register_driver(&amdgpu_kms_pci_driver); error_fence: amdgpu_sync_fini(); @@ -1191,7 +1277,7 @@ error_sync: static void __exit amdgpu_exit(void) { amdgpu_amdkfd_fini(); - pci_unregister_driver(pdriver); + pci_unregister_driver(&amdgpu_kms_pci_driver); amdgpu_unregister_atpx_handler(); amdgpu_sync_fini(); amdgpu_fence_slab_fini(); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c index ae8fac34f7a5..ec78e2b2015c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c @@ -28,6 +28,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "atom.h" #include "atombios_encoders.h" diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c index 69c5d22f29bd..5cbde74b97dd 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c @@ -33,6 +33,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "cikd.h" +#include "amdgpu_gem.h" #include <drm/drm_fb_helper.h> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c index 7056925eb386..ee47c11e92ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c @@ -216,8 +216,10 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring) * Checks the current fence value and calculates the last * signalled fence value. Wakes the fence queue if the * sequence number has increased. + * + * Returns true if fence was processed */ -void amdgpu_fence_process(struct amdgpu_ring *ring) +bool amdgpu_fence_process(struct amdgpu_ring *ring) { struct amdgpu_fence_driver *drv = &ring->fence_drv; uint32_t seq, last_seq; @@ -229,11 +231,12 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq); - if (seq != ring->fence_drv.sync_seq) + if (del_timer(&ring->fence_drv.fallback_timer) && + seq != ring->fence_drv.sync_seq) amdgpu_fence_schedule_fallback(ring); if (unlikely(seq == last_seq)) - return; + return false; last_seq &= drv->num_fences_mask; seq &= drv->num_fences_mask; @@ -260,6 +263,8 @@ void amdgpu_fence_process(struct amdgpu_ring *ring) dma_fence_put(fence); } while (last_seq != seq); + + return true; } /** @@ -274,7 +279,8 @@ static void amdgpu_fence_fallback(struct timer_list *t) struct amdgpu_ring *ring = from_timer(ring, t, fence_drv.fallback_timer); - amdgpu_fence_process(ring); + if (amdgpu_fence_process(ring)) + DRM_WARN("Fence fallback timer expired on ring %s\n", ring->name); } /** @@ -392,9 +398,9 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring, ring->fence_drv.irq_type = irq_type; ring->fence_drv.initialized = true; - dev_dbg(adev->dev, "fence driver on ring %d use gpu addr 0x%016llx, " - "cpu addr 0x%p\n", ring->idx, - ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); + DRM_DEV_DEBUG(adev->dev, "fence driver on ring %s use gpu addr " + "0x%016llx, cpu addr 0x%p\n", ring->name, + ring->fence_drv.gpu_addr, ring->fence_drv.cpu_addr); return 0; } @@ -701,7 +707,7 @@ static int amdgpu_debugfs_gpu_recover(struct seq_file *m, void *data) struct amdgpu_device *adev = dev->dev_private; seq_printf(m, "gpu recover\n"); - amdgpu_device_gpu_recover(adev, NULL, true); + amdgpu_device_gpu_recover(adev, NULL); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c index a54d5655a191..6d11e1721147 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c @@ -112,7 +112,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) { int r; - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { struct amdgpu_bo_param bp; memset(&bp, 0, sizeof(bp)); @@ -123,7 +123,7 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev) AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; bp.type = ttm_bo_type_kernel; bp.resv = NULL; - r = amdgpu_bo_create(adev, &bp, &adev->gart.robj); + r = amdgpu_bo_create(adev, &bp, &adev->gart.bo); if (r) { return r; } @@ -145,19 +145,18 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev) { int r; - r = amdgpu_bo_reserve(adev->gart.robj, false); + r = amdgpu_bo_reserve(adev->gart.bo, false); if (unlikely(r != 0)) return r; - r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM); + r = amdgpu_bo_pin(adev->gart.bo, AMDGPU_GEM_DOMAIN_VRAM); if (r) { - amdgpu_bo_unreserve(adev->gart.robj); + amdgpu_bo_unreserve(adev->gart.bo); return r; } - r = amdgpu_bo_kmap(adev->gart.robj, &adev->gart.ptr); + r = amdgpu_bo_kmap(adev->gart.bo, &adev->gart.ptr); if (r) - amdgpu_bo_unpin(adev->gart.robj); - amdgpu_bo_unreserve(adev->gart.robj); - adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj); + amdgpu_bo_unpin(adev->gart.bo); + amdgpu_bo_unreserve(adev->gart.bo); return r; } @@ -173,14 +172,14 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) { int r; - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { return; } - r = amdgpu_bo_reserve(adev->gart.robj, true); + r = amdgpu_bo_reserve(adev->gart.bo, true); if (likely(r == 0)) { - amdgpu_bo_kunmap(adev->gart.robj); - amdgpu_bo_unpin(adev->gart.robj); - amdgpu_bo_unreserve(adev->gart.robj); + amdgpu_bo_kunmap(adev->gart.bo); + amdgpu_bo_unpin(adev->gart.bo); + amdgpu_bo_unreserve(adev->gart.bo); adev->gart.ptr = NULL; } } @@ -196,10 +195,10 @@ void amdgpu_gart_table_vram_unpin(struct amdgpu_device *adev) */ void amdgpu_gart_table_vram_free(struct amdgpu_device *adev) { - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { return; } - amdgpu_bo_unref(&adev->gart.robj); + amdgpu_bo_unref(&adev->gart.bo); } /* @@ -249,7 +248,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, } mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); return 0; } @@ -260,6 +259,8 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset, * @offset: offset into the GPU's gart aperture * @pages: number of pages to bind * @dma_addr: DMA addresses of pages + * @flags: page table entry flags + * @dst: CPU address of the gart table * * Map the dma_addresses into GART entries (all asics). * Returns 0 for success, -EINVAL for failure. @@ -332,7 +333,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset, mb(); amdgpu_asic_flush_hdp(adev, NULL); - amdgpu_gmc_flush_gpu_tlb(adev, 0); + amdgpu_gmc_flush_gpu_tlb(adev, 0, 0); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h index 9f9e9dc87da1..afa2e2877d87 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h @@ -40,8 +40,8 @@ struct amdgpu_bo; #define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE) struct amdgpu_gart { - u64 table_addr; - struct amdgpu_bo *robj; + struct amdgpu_bo *bo; + /* CPU kmapped address of gart table */ void *ptr; unsigned num_gpu_pages; unsigned num_cpu_pages; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h index e73728d90388..ecbcefe49a98 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h @@ -24,13 +24,6 @@ #ifndef __AMDGPU_GDS_H__ #define __AMDGPU_GDS_H__ -/* Because TTM request that alloacted buffer should be PAGE_SIZE aligned, - * we should report GDS/GWS/OA size as PAGE_SIZE aligned - * */ -#define AMDGPU_GDS_SHIFT 2 -#define AMDGPU_GWS_SHIFT PAGE_SHIFT -#define AMDGPU_OA_SHIFT PAGE_SHIFT - struct amdgpu_ring; struct amdgpu_bo; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c index 71792d820ae0..f4f00217546e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c @@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj, INIT_LIST_HEAD(&duplicates); tv.bo = &bo->tbo; - tv.shared = true; + tv.num_shared = 1; list_add(&tv.head, &list); amdgpu_vm_get_pd_bo(vm, &list, &vm_pd); @@ -244,16 +244,10 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, return -EINVAL; } flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - if (args->in.domains == AMDGPU_GEM_DOMAIN_GDS) - size = size << AMDGPU_GDS_SHIFT; - else if (args->in.domains == AMDGPU_GEM_DOMAIN_GWS) - size = size << AMDGPU_GWS_SHIFT; - else if (args->in.domains == AMDGPU_GEM_DOMAIN_OA) - size = size << AMDGPU_OA_SHIFT; - else - return -EINVAL; + /* GDS allocations must be DW aligned */ + if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS) + size = ALIGN(size, 4); } - size = roundup(size, PAGE_SIZE); if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) { r = amdgpu_bo_reserve(vm->root.base.bo, false); @@ -572,16 +566,16 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -EINVAL; } - if (args->va_address >= AMDGPU_VA_HOLE_START && - args->va_address < AMDGPU_VA_HOLE_END) { + if (args->va_address >= AMDGPU_GMC_HOLE_START && + args->va_address < AMDGPU_GMC_HOLE_END) { dev_dbg(&dev->pdev->dev, "va_address 0x%LX is in VA hole 0x%LX-0x%LX\n", - args->va_address, AMDGPU_VA_HOLE_START, - AMDGPU_VA_HOLE_END); + args->va_address, AMDGPU_GMC_HOLE_START, + AMDGPU_GMC_HOLE_END); return -EINVAL; } - args->va_address &= AMDGPU_VA_HOLE_MASK; + args->va_address &= AMDGPU_GMC_HOLE_MASK; if ((args->flags & ~valid_flags) && (args->flags & ~prt_flags)) { dev_dbg(&dev->pdev->dev, "invalid flags combination 0x%08X\n", @@ -610,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, return -ENOENT; abo = gem_to_amdgpu_bo(gobj); tv.bo = &abo->tbo; - tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID); + if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) + tv.num_shared = 1; + else + tv.num_shared = 0; list_add(&tv.head, &list); } else { gobj = NULL; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h new file mode 100644 index 000000000000..f1ddfc50bcc7 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h @@ -0,0 +1,94 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __AMDGPU_GEM_H__ +#define __AMDGPU_GEM_H__ + +#include <drm/amdgpu_drm.h> +#include <drm/drm_gem.h> + +/* + * GEM. + */ + +#define AMDGPU_GEM_DOMAIN_MAX 0x3 +#define gem_to_amdgpu_bo(gobj) container_of((gobj), struct amdgpu_bo, gem_base) + +void amdgpu_gem_object_free(struct drm_gem_object *obj); +int amdgpu_gem_object_open(struct drm_gem_object *obj, + struct drm_file *file_priv); +void amdgpu_gem_object_close(struct drm_gem_object *obj, + struct drm_file *file_priv); +unsigned long amdgpu_gem_timeout(uint64_t timeout_ns); +struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj); +struct drm_gem_object * +amdgpu_gem_prime_import_sg_table(struct drm_device *dev, + struct dma_buf_attachment *attach, + struct sg_table *sg); +struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, + struct drm_gem_object *gobj, + int flags); +struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, + struct dma_buf *dma_buf); +struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *); +void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj); +void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr); +int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma); + +extern const struct dma_buf_ops amdgpu_dmabuf_ops; + +/* + * GEM objects. + */ +void amdgpu_gem_force_release(struct amdgpu_device *adev); +int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size, + int alignment, u32 initial_domain, + u64 flags, enum ttm_bo_type type, + struct reservation_object *resv, + struct drm_gem_object **obj); + +int amdgpu_mode_dumb_create(struct drm_file *file_priv, + struct drm_device *dev, + struct drm_mode_create_dumb *args); +int amdgpu_mode_dumb_mmap(struct drm_file *filp, + struct drm_device *dev, + uint32_t handle, uint64_t *offset_p); + +int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_info_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_mmap_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_wait_idle_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); +int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data, + struct drm_file *filp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index 239bf2a4b3c6..97a60da62004 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -25,10 +25,46 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_gfx.h" +#include "amdgpu_rlc.h" + +/* delay 0.1 second to enable gfx off feature */ +#define GFX_OFF_DELAY_ENABLE msecs_to_jiffies(100) /* - * GPU scratch registers helpers function. + * GPU GFX IP block helpers function. */ + +int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue) +{ + int bit = 0; + + bit += mec * adev->gfx.mec.num_pipe_per_mec + * adev->gfx.mec.num_queue_per_pipe; + bit += pipe * adev->gfx.mec.num_queue_per_pipe; + bit += queue; + + return bit; +} + +void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue) +{ + *queue = bit % adev->gfx.mec.num_queue_per_pipe; + *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) + % adev->gfx.mec.num_pipe_per_mec; + *mec = (bit / adev->gfx.mec.num_queue_per_pipe) + / adev->gfx.mec.num_pipe_per_mec; + +} + +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, + int mec, int pipe, int queue) +{ + return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), + adev->gfx.mec.queue_bitmap); +} + /** * amdgpu_gfx_scratch_get - Allocate a scratch register * @@ -214,7 +250,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, ring->adev = NULL; ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_KIQ; + ring->doorbell_index = adev->doorbell_index.kiq; r = amdgpu_gfx_kiq_acquire(adev, ring); if (r) @@ -340,3 +376,40 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev) &ring->mqd_gpu_addr, &ring->mqd_ptr); } + +/* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable + * + * @adev: amdgpu_device pointer + * @bool enable true: enable gfx off feature, false: disable gfx off feature + * + * 1. gfx off feature will be enabled by gfx ip after gfx cg gp enabled. + * 2. other client can send request to disable gfx off feature, the request should be honored. + * 3. other client can cancel their request of disable gfx off feature + * 4. other client should not send request to enable gfx off feature before disable gfx off feature. + */ + +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable) +{ + if (!(adev->powerplay.pp_feature & PP_GFXOFF_MASK)) + return; + + if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu) + return; + + + mutex_lock(&adev->gfx.gfx_off_mutex); + + if (!enable) + adev->gfx.gfx_off_req_count++; + else if (adev->gfx.gfx_off_req_count > 0) + adev->gfx.gfx_off_req_count--; + + if (enable && !adev->gfx.gfx_off_state && !adev->gfx.gfx_off_req_count) { + schedule_delayed_work(&adev->gfx.gfx_off_delay_work, GFX_OFF_DELAY_ENABLE); + } else if (!enable && adev->gfx.gfx_off_state) { + if (!amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false)) + adev->gfx.gfx_off_state = false; + } + + mutex_unlock(&adev->gfx.gfx_off_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h index 1f279050d334..f790e15bcd08 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h @@ -24,28 +24,245 @@ #ifndef __AMDGPU_GFX_H__ #define __AMDGPU_GFX_H__ -int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg); -void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); +/* + * GFX stuff + */ +#include "clearstate_defs.h" +#include "amdgpu_ring.h" +#include "amdgpu_rlc.h" -void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, - unsigned max_sh); +/* GFX current status */ +#define AMDGPU_GFX_NORMAL_MODE 0x00000000L +#define AMDGPU_GFX_SAFE_MODE 0x00000001L +#define AMDGPU_GFX_PG_DISABLED_MODE 0x00000002L +#define AMDGPU_GFX_CG_DISABLED_MODE 0x00000004L +#define AMDGPU_GFX_LBPW_DISABLED_MODE 0x00000008L -void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); +#define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES -int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq); +struct amdgpu_mec { + struct amdgpu_bo *hpd_eop_obj; + u64 hpd_eop_gpu_addr; + struct amdgpu_bo *mec_fw_obj; + u64 mec_fw_gpu_addr; + u32 num_mec; + u32 num_pipe_per_mec; + u32 num_queue_per_pipe; + void *mqd_backup[AMDGPU_MAX_COMPUTE_RINGS + 1]; -void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, - struct amdgpu_irq_src *irq); + /* These are the resources for which amdgpu takes ownership */ + DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); +}; -void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); -int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, - unsigned hpd_size); +struct amdgpu_kiq { + u64 eop_gpu_addr; + struct amdgpu_bo *eop_obj; + spinlock_t ring_lock; + struct amdgpu_ring ring; + struct amdgpu_irq_src irq; +}; -int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, - unsigned mqd_size); -void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); +/* + * GPU scratch registers structures, functions & helpers + */ +struct amdgpu_scratch { + unsigned num_reg; + uint32_t reg_base; + uint32_t free_mask; +}; + +/* + * GFX configurations + */ +#define AMDGPU_GFX_MAX_SE 4 +#define AMDGPU_GFX_MAX_SH_PER_SE 2 + +struct amdgpu_rb_config { + uint32_t rb_backend_disable; + uint32_t user_rb_backend_disable; + uint32_t raster_config; + uint32_t raster_config_1; +}; + +struct gb_addr_config { + uint16_t pipe_interleave_size; + uint8_t num_pipes; + uint8_t max_compress_frags; + uint8_t num_banks; + uint8_t num_se; + uint8_t num_rb_per_se; +}; + +struct amdgpu_gfx_config { + unsigned max_shader_engines; + unsigned max_tile_pipes; + unsigned max_cu_per_sh; + unsigned max_sh_per_se; + unsigned max_backends_per_se; + unsigned max_texture_channel_caches; + unsigned max_gprs; + unsigned max_gs_threads; + unsigned max_hw_contexts; + unsigned sc_prim_fifo_size_frontend; + unsigned sc_prim_fifo_size_backend; + unsigned sc_hiz_tile_fifo_size; + unsigned sc_earlyz_tile_fifo_size; + + unsigned num_tile_pipes; + unsigned backend_enable_mask; + unsigned mem_max_burst_length_bytes; + unsigned mem_row_size_in_kb; + unsigned shader_engine_tile_size; + unsigned num_gpus; + unsigned multi_gpu_tile_size; + unsigned mc_arb_ramcfg; + unsigned gb_addr_config; + unsigned num_rbs; + unsigned gs_vgt_table_depth; + unsigned gs_prim_buffer_depth; + + uint32_t tile_mode_array[32]; + uint32_t macrotile_mode_array[16]; + + struct gb_addr_config gb_addr_config_fields; + struct amdgpu_rb_config rb_config[AMDGPU_GFX_MAX_SE][AMDGPU_GFX_MAX_SH_PER_SE]; + + /* gfx configure feature */ + uint32_t double_offchip_lds_buf; + /* cached value of DB_DEBUG2 */ + uint32_t db_debug2; +}; + +struct amdgpu_cu_info { + uint32_t simd_per_cu; + uint32_t max_waves_per_simd; + uint32_t wave_front_size; + uint32_t max_scratch_slots_per_cu; + uint32_t lds_size; + + /* total active CU number */ + uint32_t number; + uint32_t ao_cu_mask; + uint32_t ao_cu_bitmap[4][4]; + uint32_t bitmap[4][4]; +}; + +struct amdgpu_gfx_funcs { + /* get the gpu clock counter */ + uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev); + void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, + u32 sh_num, u32 instance); + void (*read_wave_data)(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t *dst, int *no_fields); + void (*read_wave_vgprs)(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t thread, uint32_t start, + uint32_t size, uint32_t *dst); + void (*read_wave_sgprs)(struct amdgpu_device *adev, uint32_t simd, + uint32_t wave, uint32_t start, uint32_t size, + uint32_t *dst); + void (*select_me_pipe_q)(struct amdgpu_device *adev, u32 me, u32 pipe, + u32 queue); +}; + +struct amdgpu_ngg_buf { + struct amdgpu_bo *bo; + uint64_t gpu_addr; + uint32_t size; + uint32_t bo_size; +}; + +enum { + NGG_PRIM = 0, + NGG_POS, + NGG_CNTL, + NGG_PARAM, + NGG_BUF_MAX +}; + +struct amdgpu_ngg { + struct amdgpu_ngg_buf buf[NGG_BUF_MAX]; + uint32_t gds_reserve_addr; + uint32_t gds_reserve_size; + bool init; +}; + +struct sq_work { + struct work_struct work; + unsigned ih_data; +}; + +struct amdgpu_gfx { + struct mutex gpu_clock_mutex; + struct amdgpu_gfx_config config; + struct amdgpu_rlc rlc; + struct amdgpu_mec mec; + struct amdgpu_kiq kiq; + struct amdgpu_scratch scratch; + const struct firmware *me_fw; /* ME firmware */ + uint32_t me_fw_version; + const struct firmware *pfp_fw; /* PFP firmware */ + uint32_t pfp_fw_version; + const struct firmware *ce_fw; /* CE firmware */ + uint32_t ce_fw_version; + const struct firmware *rlc_fw; /* RLC firmware */ + uint32_t rlc_fw_version; + const struct firmware *mec_fw; /* MEC firmware */ + uint32_t mec_fw_version; + const struct firmware *mec2_fw; /* MEC2 firmware */ + uint32_t mec2_fw_version; + uint32_t me_feature_version; + uint32_t ce_feature_version; + uint32_t pfp_feature_version; + uint32_t rlc_feature_version; + uint32_t rlc_srlc_fw_version; + uint32_t rlc_srlc_feature_version; + uint32_t rlc_srlg_fw_version; + uint32_t rlc_srlg_feature_version; + uint32_t rlc_srls_fw_version; + uint32_t rlc_srls_feature_version; + uint32_t mec_feature_version; + uint32_t mec2_feature_version; + bool mec_fw_write_wait; + bool me_fw_write_wait; + struct amdgpu_ring gfx_ring[AMDGPU_MAX_GFX_RINGS]; + unsigned num_gfx_rings; + struct amdgpu_ring compute_ring[AMDGPU_MAX_COMPUTE_RINGS]; + unsigned num_compute_rings; + struct amdgpu_irq_src eop_irq; + struct amdgpu_irq_src priv_reg_irq; + struct amdgpu_irq_src priv_inst_irq; + struct amdgpu_irq_src cp_ecc_error_irq; + struct amdgpu_irq_src sq_irq; + struct sq_work sq_work; + + /* gfx status */ + uint32_t gfx_current_status; + /* ce ram size*/ + unsigned ce_ram_size; + struct amdgpu_cu_info cu_info; + const struct amdgpu_gfx_funcs *funcs; + + /* reset mask */ + uint32_t grbm_soft_reset; + uint32_t srbm_soft_reset; + + /* NGG */ + struct amdgpu_ngg ngg; + + /* gfx off */ + bool gfx_off_state; /* true: enabled, false: disabled */ + struct mutex gfx_off_mutex; + uint32_t gfx_off_req_count; /* default 1, enable gfx off: dec 1, disable gfx off: add 1 */ + struct delayed_work gfx_off_delay_work; + + /* pipe reservation */ + struct mutex pipe_reserve_mutex; + DECLARE_BITMAP (pipe_reserve_bitmap, AMDGPU_MAX_COMPUTE_QUEUES); +}; + +#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev)) +#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance)) +#define amdgpu_gfx_select_me_pipe_q(adev, me, pipe, q) (adev)->gfx.funcs->select_me_pipe_q((adev), (me), (pipe), (q)) /** * amdgpu_gfx_create_bitmask - create a bitmask @@ -60,34 +277,34 @@ static inline u32 amdgpu_gfx_create_bitmask(u32 bit_width) return (u32)((1ULL << bit_width) - 1); } -static inline int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, - int mec, int pipe, int queue) -{ - int bit = 0; +int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg); +void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg); - bit += mec * adev->gfx.mec.num_pipe_per_mec - * adev->gfx.mec.num_queue_per_pipe; - bit += pipe * adev->gfx.mec.num_queue_per_pipe; - bit += queue; +void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, + unsigned max_sh); - return bit; -} +int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev, + struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); -static inline void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, - int *mec, int *pipe, int *queue) -{ - *queue = bit % adev->gfx.mec.num_queue_per_pipe; - *pipe = (bit / adev->gfx.mec.num_queue_per_pipe) - % adev->gfx.mec.num_pipe_per_mec; - *mec = (bit / adev->gfx.mec.num_queue_per_pipe) - / adev->gfx.mec.num_pipe_per_mec; +void amdgpu_gfx_kiq_free_ring(struct amdgpu_ring *ring, + struct amdgpu_irq_src *irq); -} -static inline bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, - int mec, int pipe, int queue) -{ - return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue), - adev->gfx.mec.queue_bitmap); -} +void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev); +int amdgpu_gfx_kiq_init(struct amdgpu_device *adev, + unsigned hpd_size); + +int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev, + unsigned mqd_size); +void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev); + +void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev); +int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec, + int pipe, int queue); +void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit, + int *mec, int *pipe, int *queue); +bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec, + int pipe, int queue); +void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c new file mode 100644 index 000000000000..d73367cab4f3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -0,0 +1,215 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + */ + +#include "amdgpu.h" + +/** + * amdgpu_gmc_get_pde_for_bo - get the PDE for a BO + * + * @bo: the BO to get the PDE for + * @level: the level in the PD hirarchy + * @addr: resulting addr + * @flags: resulting flags + * + * Get the address and flags to be used for a PDE (Page Directory Entry). + */ +void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, + uint64_t *addr, uint64_t *flags) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + struct ttm_dma_tt *ttm; + + switch (bo->tbo.mem.mem_type) { + case TTM_PL_TT: + ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm); + *addr = ttm->dma_address[0]; + break; + case TTM_PL_VRAM: + *addr = amdgpu_bo_gpu_offset(bo); + break; + default: + *addr = 0; + break; + } + *flags = amdgpu_ttm_tt_pde_flags(bo->tbo.ttm, &bo->tbo.mem); + amdgpu_gmc_get_vm_pde(adev, level, addr, flags); +} + +/** + * amdgpu_gmc_pd_addr - return the address of the root directory + * + */ +uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev); + uint64_t pd_addr; + + /* TODO: move that into ASIC specific code */ + if (adev->asic_type >= CHIP_VEGA10) { + uint64_t flags = AMDGPU_PTE_VALID; + + amdgpu_gmc_get_pde_for_bo(bo, -1, &pd_addr, &flags); + pd_addr |= flags; + } else { + pd_addr = amdgpu_bo_gpu_offset(bo); + } + return pd_addr; +} + +/** + * amdgpu_gmc_agp_addr - return the address in the AGP address space + * + * @tbo: TTM BO which needs the address, must be in GTT domain + * + * Tries to figure out how to access the BO through the AGP aperture. Returns + * AMDGPU_BO_INVALID_OFFSET if that is not possible. + */ +uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo) +{ + struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev); + struct ttm_dma_tt *ttm; + + if (bo->num_pages != 1 || bo->ttm->caching_state == tt_cached) + return AMDGPU_BO_INVALID_OFFSET; + + ttm = container_of(bo->ttm, struct ttm_dma_tt, ttm); + if (ttm->dma_address[0] + PAGE_SIZE >= adev->gmc.agp_size) + return AMDGPU_BO_INVALID_OFFSET; + + return adev->gmc.agp_start + ttm->dma_address[0]; +} + +/** + * amdgpu_gmc_vram_location - try to find VRAM location + * + * @adev: amdgpu device structure holding all necessary informations + * @mc: memory controller structure holding memory informations + * @base: base address at which to put VRAM + * + * Function will try to place VRAM at base address provided + * as parameter. + */ +void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, + u64 base) +{ + uint64_t limit = (uint64_t)amdgpu_vram_limit << 20; + + mc->vram_start = base; + mc->vram_end = mc->vram_start + mc->mc_vram_size - 1; + if (limit && limit < mc->real_vram_size) + mc->real_vram_size = limit; + + if (mc->xgmi.num_physical_nodes == 0) { + mc->fb_start = mc->vram_start; + mc->fb_end = mc->vram_end; + } + dev_info(adev->dev, "VRAM: %lluM 0x%016llX - 0x%016llX (%lluM used)\n", + mc->mc_vram_size >> 20, mc->vram_start, + mc->vram_end, mc->real_vram_size >> 20); +} + +/** + * amdgpu_gmc_gart_location - try to find GART location + * + * @adev: amdgpu device structure holding all necessary informations + * @mc: memory controller structure holding memory informations + * + * Function will place try to place GART before or after VRAM. + * + * If GART size is bigger than space left then we ajust GART size. + * Thus function will never fails. + */ +void amdgpu_gmc_gart_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) +{ + const uint64_t four_gb = 0x100000000ULL; + u64 size_af, size_bf; + /*To avoid the hole, limit the max mc address to AMDGPU_GMC_HOLE_START*/ + u64 max_mc_address = min(adev->gmc.mc_mask, AMDGPU_GMC_HOLE_START - 1); + + mc->gart_size += adev->pm.smu_prv_buffer_size; + + /* VCE doesn't like it when BOs cross a 4GB segment, so align + * the GART base on a 4GB boundary as well. + */ + size_bf = mc->fb_start; + size_af = max_mc_address + 1 - ALIGN(mc->fb_end + 1, four_gb); + + if (mc->gart_size > max(size_bf, size_af)) { + dev_warn(adev->dev, "limiting GART\n"); + mc->gart_size = max(size_bf, size_af); + } + + if ((size_bf >= mc->gart_size && size_bf < size_af) || + (size_af < mc->gart_size)) + mc->gart_start = 0; + else + mc->gart_start = max_mc_address - mc->gart_size + 1; + + mc->gart_start &= ~(four_gb - 1); + mc->gart_end = mc->gart_start + mc->gart_size - 1; + dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n", + mc->gart_size >> 20, mc->gart_start, mc->gart_end); +} + +/** + * amdgpu_gmc_agp_location - try to find AGP location + * @adev: amdgpu device structure holding all necessary informations + * @mc: memory controller structure holding memory informations + * + * Function will place try to find a place for the AGP BAR in the MC address + * space. + * + * AGP BAR will be assigned the largest available hole in the address space. + * Should be called after VRAM and GART locations are setup. + */ +void amdgpu_gmc_agp_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc) +{ + const uint64_t sixteen_gb = 1ULL << 34; + const uint64_t sixteen_gb_mask = ~(sixteen_gb - 1); + u64 size_af, size_bf; + + if (mc->fb_start > mc->gart_start) { + size_bf = (mc->fb_start & sixteen_gb_mask) - + ALIGN(mc->gart_end + 1, sixteen_gb); + size_af = mc->mc_mask + 1 - ALIGN(mc->fb_end + 1, sixteen_gb); + } else { + size_bf = mc->fb_start & sixteen_gb_mask; + size_af = (mc->gart_start & sixteen_gb_mask) - + ALIGN(mc->fb_end + 1, sixteen_gb); + } + + if (size_bf > size_af) { + mc->agp_start = (mc->fb_start - size_bf) & sixteen_gb_mask; + mc->agp_size = size_bf; + } else { + mc->agp_start = ALIGN(mc->fb_end + 1, sixteen_gb); + mc->agp_size = size_af; + } + + mc->agp_end = mc->agp_start + mc->agp_size - 1; + dev_info(adev->dev, "AGP: %lluM 0x%016llX - 0x%016llX\n", + mc->agp_size >> 20, mc->agp_start, mc->agp_end); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h index bb5a47a45790..81e6070d255b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h @@ -30,6 +30,19 @@ #include "amdgpu_irq.h" +/* VA hole for 48bit addresses on Vega10 */ +#define AMDGPU_GMC_HOLE_START 0x0000800000000000ULL +#define AMDGPU_GMC_HOLE_END 0xffff800000000000ULL + +/* + * Hardware is programmed as if the hole doesn't exists with start and end + * address values. + * + * This mask is used to remove the upper 16bits of the VA and so come up with + * the linear addr value. + */ +#define AMDGPU_GMC_HOLE_MASK 0x0000ffffffffffffULL + struct firmware; /* @@ -51,7 +64,7 @@ struct amdgpu_vmhub { struct amdgpu_gmc_funcs { /* flush the vm tlb via mmio */ void (*flush_gpu_tlb)(struct amdgpu_device *adev, - uint32_t vmid); + uint32_t vmid, uint32_t flush_type); /* flush the vm tlb via ring */ uint64_t (*emit_flush_gpu_tlb)(struct amdgpu_ring *ring, unsigned vmid, uint64_t pd_addr); @@ -74,6 +87,21 @@ struct amdgpu_gmc_funcs { u64 *dst, u64 *flags); }; +struct amdgpu_xgmi { + /* from psp */ + u64 node_id; + u64 hive_id; + /* fixed per family */ + u64 node_segment_size; + /* physical node (0-3) */ + unsigned physical_node_id; + /* number of nodes (0-4) */ + unsigned num_physical_nodes; + /* gpu list in the same hive */ + struct list_head head; + bool supported; +}; + struct amdgpu_gmc { resource_size_t aper_size; resource_size_t aper_base; @@ -81,11 +109,22 @@ struct amdgpu_gmc { * about vram size near mc fb location */ u64 mc_vram_size; u64 visible_vram_size; + u64 agp_size; + u64 agp_start; + u64 agp_end; u64 gart_size; u64 gart_start; u64 gart_end; u64 vram_start; u64 vram_end; + /* FB region , it's same as local vram region in single GPU, in XGMI + * configuration, this region covers all GPUs in the same hive , + * each GPU in the hive has the same view of this FB region . + * GPU0's vram starts at offset (0 * segment size) , + * GPU1 starts at offset (1 * segment size), etc. + */ + u64 fb_start; + u64 fb_end; unsigned vram_width; u64 real_vram_size; int vram_mtrr; @@ -109,8 +148,17 @@ struct amdgpu_gmc { atomic_t vm_fault_info_updated; const struct amdgpu_gmc_funcs *gmc_funcs; + + struct amdgpu_xgmi xgmi; }; +#define amdgpu_gmc_flush_gpu_tlb(adev, vmid, type) (adev)->gmc.gmc_funcs->flush_gpu_tlb((adev), (vmid), (type)) +#define amdgpu_gmc_emit_flush_gpu_tlb(r, vmid, addr) (r)->adev->gmc.gmc_funcs->emit_flush_gpu_tlb((r), (vmid), (addr)) +#define amdgpu_gmc_emit_pasid_mapping(r, vmid, pasid) (r)->adev->gmc.gmc_funcs->emit_pasid_mapping((r), (vmid), (pasid)) +#define amdgpu_gmc_set_pte_pde(adev, pt, idx, addr, flags) (adev)->gmc.gmc_funcs->set_pte_pde((adev), (pt), (idx), (addr), (flags)) +#define amdgpu_gmc_get_vm_pde(adev, level, dst, flags) (adev)->gmc.gmc_funcs->get_vm_pde((adev), (level), (dst), (flags)) +#define amdgpu_gmc_get_pte_flags(adev, flags) (adev)->gmc.gmc_funcs->get_vm_pte_flags((adev),(flags)) + /** * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR * @@ -126,4 +174,28 @@ static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc) return (gmc->real_vram_size == gmc->visible_vram_size); } +/** + * amdgpu_gmc_sign_extend - sign extend the given gmc address + * + * @addr: address to extend + */ +static inline uint64_t amdgpu_gmc_sign_extend(uint64_t addr) +{ + if (addr >= AMDGPU_GMC_HOLE_START) + addr |= AMDGPU_GMC_HOLE_END; + + return addr; +} + +void amdgpu_gmc_get_pde_for_bo(struct amdgpu_bo *bo, int level, + uint64_t *addr, uint64_t *flags); +uint64_t amdgpu_gmc_pd_addr(struct amdgpu_bo *bo); +uint64_t amdgpu_gmc_agp_addr(struct ttm_buffer_object *bo); +void amdgpu_gmc_vram_location(struct amdgpu_device *adev, struct amdgpu_gmc *mc, + u64 base); +void amdgpu_gmc_gart_location(struct amdgpu_device *adev, + struct amdgpu_gmc *mc); +void amdgpu_gmc_agp_location(struct amdgpu_device *adev, + struct amdgpu_gmc *mc); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c index 51b5e977ca88..c48207b377bc 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c @@ -32,6 +32,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "atom.h" +#include "amdgpu_trace.h" #define AMDGPU_IB_TEST_TIMEOUT msecs_to_jiffies(1000) @@ -145,7 +146,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, fence_ctx = 0; } - if (!ring->ready) { + if (!ring->sched.ready) { dev_err(adev->dev, "couldn't schedule ib on ring <%s>\n", ring->name); return -EINVAL; } @@ -170,6 +171,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, (amdgpu_sriov_vf(adev) && need_ctx_switch) || amdgpu_vm_need_pipeline_sync(ring, job))) { need_pipe_sync = true; + + if (tmp) + trace_amdgpu_ib_pipe_sync(job, tmp); + dma_fence_put(tmp); } @@ -216,8 +221,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs, !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */ continue; - amdgpu_ring_emit_ib(ring, ib, job ? job->vmid : 0, - need_ctx_switch); + amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch); need_ctx_switch = false; } @@ -342,11 +346,14 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) tmo_gfx = 8 * AMDGPU_IB_TEST_TIMEOUT; } - for (i = 0; i < AMDGPU_MAX_RINGS; ++i) { + for (i = 0; i < adev->num_rings; ++i) { struct amdgpu_ring *ring = adev->rings[i]; long tmo; - if (!ring || !ring->ready) + /* KIQ rings don't have an IB test because we never submit IBs + * to them and they have no interrupt support. + */ + if (!ring->sched.ready || !ring->funcs->test_ib) continue; /* MM engine need more time */ @@ -361,20 +368,23 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev) tmo = tmo_gfx; r = amdgpu_ring_test_ib(ring, tmo); - if (r) { - ring->ready = false; - - if (ring == &adev->gfx.gfx_ring[0]) { - /* oh, oh, that's really bad */ - DRM_ERROR("amdgpu: failed testing IB on GFX ring (%d).\n", r); - adev->accel_working = false; - return r; - - } else { - /* still not good, but we can live with it */ - DRM_ERROR("amdgpu: failed testing IB on ring %d (%d).\n", i, r); - ret = r; - } + if (!r) { + DRM_DEV_DEBUG(adev->dev, "ib test on %s succeeded\n", + ring->name); + continue; + } + + ring->sched.ready = false; + DRM_DEV_ERROR(adev->dev, "IB test failed on %s (%d).\n", + ring->name, r); + + if (ring == &adev->gfx.gfx_ring[0]) { + /* oh, oh, that's really bad */ + adev->accel_working = false; + return r; + + } else { + ret = r; } } return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c index 3a072a7a39f0..df9b173c3d0b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c @@ -574,7 +574,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev) /* skip over VMID 0, since it is the system VM */ for (j = 1; j < id_mgr->num_ids; ++j) { amdgpu_vmid_reset(adev, i, j); - amdgpu_sync_create(&id_mgr->ids[i].active); + amdgpu_sync_create(&id_mgr->ids[j].active); list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c index 06373d44b3da..8af67f649660 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c @@ -24,46 +24,21 @@ #include <drm/drmP.h> #include "amdgpu.h" #include "amdgpu_ih.h" -#include "amdgpu_amdkfd.h" - -/** - * amdgpu_ih_ring_alloc - allocate memory for the IH ring - * - * @adev: amdgpu_device pointer - * - * Allocate a ring buffer for the interrupt controller. - * Returns 0 for success, errors for failure. - */ -static int amdgpu_ih_ring_alloc(struct amdgpu_device *adev) -{ - int r; - - /* Allocate ring buffer */ - if (adev->irq.ih.ring_obj == NULL) { - r = amdgpu_bo_create_kernel(adev, adev->irq.ih.ring_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GTT, - &adev->irq.ih.ring_obj, - &adev->irq.ih.gpu_addr, - (void **)&adev->irq.ih.ring); - if (r) { - DRM_ERROR("amdgpu: failed to create ih ring buffer (%d).\n", r); - return r; - } - } - return 0; -} /** * amdgpu_ih_ring_init - initialize the IH state * * @adev: amdgpu_device pointer + * @ih: ih ring to initialize + * @ring_size: ring size to allocate + * @use_bus_addr: true when we can use dma_alloc_coherent * * Initializes the IH state and allocates a buffer * for the IH ring buffer. * Returns 0 for success, errors for failure. */ -int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, - bool use_bus_addr) +int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + unsigned ring_size, bool use_bus_addr) { u32 rb_bufsz; int r; @@ -71,70 +46,76 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, /* Align ring size */ rb_bufsz = order_base_2(ring_size / 4); ring_size = (1 << rb_bufsz) * 4; - adev->irq.ih.ring_size = ring_size; - adev->irq.ih.ptr_mask = adev->irq.ih.ring_size - 1; - adev->irq.ih.rptr = 0; - adev->irq.ih.use_bus_addr = use_bus_addr; + ih->ring_size = ring_size; + ih->ptr_mask = ih->ring_size - 1; + ih->rptr = 0; + ih->use_bus_addr = use_bus_addr; - if (adev->irq.ih.use_bus_addr) { - if (!adev->irq.ih.ring) { - /* add 8 bytes for the rptr/wptr shadows and - * add them to the end of the ring allocation. - */ - adev->irq.ih.ring = pci_alloc_consistent(adev->pdev, - adev->irq.ih.ring_size + 8, - &adev->irq.ih.rb_dma_addr); - if (adev->irq.ih.ring == NULL) - return -ENOMEM; - memset((void *)adev->irq.ih.ring, 0, adev->irq.ih.ring_size + 8); - adev->irq.ih.wptr_offs = (adev->irq.ih.ring_size / 4) + 0; - adev->irq.ih.rptr_offs = (adev->irq.ih.ring_size / 4) + 1; - } - return 0; + if (use_bus_addr) { + if (ih->ring) + return 0; + + /* add 8 bytes for the rptr/wptr shadows and + * add them to the end of the ring allocation. + */ + ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8, + &ih->rb_dma_addr, GFP_KERNEL); + if (ih->ring == NULL) + return -ENOMEM; + + memset((void *)ih->ring, 0, ih->ring_size + 8); + ih->wptr_offs = (ih->ring_size / 4) + 0; + ih->rptr_offs = (ih->ring_size / 4) + 1; } else { - r = amdgpu_device_wb_get(adev, &adev->irq.ih.wptr_offs); + r = amdgpu_device_wb_get(adev, &ih->wptr_offs); + if (r) + return r; + + r = amdgpu_device_wb_get(adev, &ih->rptr_offs); if (r) { - dev_err(adev->dev, "(%d) ih wptr_offs wb alloc failed\n", r); + amdgpu_device_wb_free(adev, ih->wptr_offs); return r; } - r = amdgpu_device_wb_get(adev, &adev->irq.ih.rptr_offs); + r = amdgpu_bo_create_kernel(adev, ih->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_GTT, + &ih->ring_obj, &ih->gpu_addr, + (void **)&ih->ring); if (r) { - amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); - dev_err(adev->dev, "(%d) ih rptr_offs wb alloc failed\n", r); + amdgpu_device_wb_free(adev, ih->rptr_offs); + amdgpu_device_wb_free(adev, ih->wptr_offs); return r; } - - return amdgpu_ih_ring_alloc(adev); } + return 0; } /** * amdgpu_ih_ring_fini - tear down the IH state * * @adev: amdgpu_device pointer + * @ih: ih ring to tear down * * Tears down the IH state and frees buffer * used for the IH ring buffer. */ -void amdgpu_ih_ring_fini(struct amdgpu_device *adev) +void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih) { - if (adev->irq.ih.use_bus_addr) { - if (adev->irq.ih.ring) { - /* add 8 bytes for the rptr/wptr shadows and - * add them to the end of the ring allocation. - */ - pci_free_consistent(adev->pdev, adev->irq.ih.ring_size + 8, - (void *)adev->irq.ih.ring, - adev->irq.ih.rb_dma_addr); - adev->irq.ih.ring = NULL; - } + if (ih->use_bus_addr) { + if (!ih->ring) + return; + + /* add 8 bytes for the rptr/wptr shadows and + * add them to the end of the ring allocation. + */ + dma_free_coherent(adev->dev, ih->ring_size + 8, + (void *)ih->ring, ih->rb_dma_addr); + ih->ring = NULL; } else { - amdgpu_bo_free_kernel(&adev->irq.ih.ring_obj, - &adev->irq.ih.gpu_addr, - (void **)&adev->irq.ih.ring); - amdgpu_device_wb_free(adev, adev->irq.ih.wptr_offs); - amdgpu_device_wb_free(adev, adev->irq.ih.rptr_offs); + amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr, + (void **)&ih->ring); + amdgpu_device_wb_free(adev, ih->wptr_offs); + amdgpu_device_wb_free(adev, ih->rptr_offs); } } @@ -142,133 +123,45 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev) * amdgpu_ih_process - interrupt handler * * @adev: amdgpu_device pointer + * @ih: ih ring to process * * Interrupt hander (VI), walk the IH ring. * Returns irq process return code. */ -int amdgpu_ih_process(struct amdgpu_device *adev) +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + void (*callback)(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih)) { - struct amdgpu_iv_entry entry; u32 wptr; - if (!adev->irq.ih.enabled || adev->shutdown) + if (!ih->enabled || adev->shutdown) return IRQ_NONE; wptr = amdgpu_ih_get_wptr(adev); restart_ih: /* is somebody else already processing irqs? */ - if (atomic_xchg(&adev->irq.ih.lock, 1)) + if (atomic_xchg(&ih->lock, 1)) return IRQ_NONE; - DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, adev->irq.ih.rptr, wptr); + DRM_DEBUG("%s: rptr %d, wptr %d\n", __func__, ih->rptr, wptr); /* Order reading of wptr vs. reading of IH ring data */ rmb(); - while (adev->irq.ih.rptr != wptr) { - u32 ring_index = adev->irq.ih.rptr >> 2; - - /* Prescreening of high-frequency interrupts */ - if (!amdgpu_ih_prescreen_iv(adev)) { - adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; - continue; - } - - /* Before dispatching irq to IP blocks, send it to amdkfd */ - amdgpu_amdkfd_interrupt(adev, - (const void *) &adev->irq.ih.ring[ring_index]); - - entry.iv_entry = (const uint32_t *) - &adev->irq.ih.ring[ring_index]; - amdgpu_ih_decode_iv(adev, &entry); - adev->irq.ih.rptr &= adev->irq.ih.ptr_mask; - - amdgpu_irq_dispatch(adev, &entry); + while (ih->rptr != wptr) { + callback(adev, ih); + ih->rptr &= ih->ptr_mask; } + amdgpu_ih_set_rptr(adev); - atomic_set(&adev->irq.ih.lock, 0); + atomic_set(&ih->lock, 0); /* make sure wptr hasn't changed while processing */ wptr = amdgpu_ih_get_wptr(adev); - if (wptr != adev->irq.ih.rptr) + if (wptr != ih->rptr) goto restart_ih; return IRQ_HANDLED; } -/** - * amdgpu_ih_add_fault - Add a page fault record - * - * @adev: amdgpu device pointer - * @key: 64-bit encoding of PASID and address - * - * This should be called when a retry page fault interrupt is - * received. If this is a new page fault, it will be added to a hash - * table. The return value indicates whether this is a new fault, or - * a fault that was already known and is already being handled. - * - * If there are too many pending page faults, this will fail. Retry - * interrupts should be ignored in this case until there is enough - * free space. - * - * Returns 0 if the fault was added, 1 if the fault was already known, - * -ENOSPC if there are too many pending faults. - */ -int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key) -{ - unsigned long flags; - int r = -ENOSPC; - - if (WARN_ON_ONCE(!adev->irq.ih.faults)) - /* Should be allocated in <IP>_ih_sw_init on GPUs that - * support retry faults and require retry filtering. - */ - return r; - - spin_lock_irqsave(&adev->irq.ih.faults->lock, flags); - - /* Only let the hash table fill up to 50% for best performance */ - if (adev->irq.ih.faults->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) - goto unlock_out; - - r = chash_table_copy_in(&adev->irq.ih.faults->hash, key, NULL); - if (!r) - adev->irq.ih.faults->count++; - - /* chash_table_copy_in should never fail unless we're losing count */ - WARN_ON_ONCE(r < 0); - -unlock_out: - spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags); - return r; -} - -/** - * amdgpu_ih_clear_fault - Remove a page fault record - * - * @adev: amdgpu device pointer - * @key: 64-bit encoding of PASID and address - * - * This should be called when a page fault has been handled. Any - * future interrupt with this key will be processed as a new - * page fault. - */ -void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key) -{ - unsigned long flags; - int r; - - if (!adev->irq.ih.faults) - return; - - spin_lock_irqsave(&adev->irq.ih.faults->lock, flags); - - r = chash_table_remove(&adev->irq.ih.faults->hash, key, NULL); - if (!WARN_ON_ONCE(r < 0)) { - adev->irq.ih.faults->count--; - WARN_ON_ONCE(adev->irq.ih.faults->count < 0); - } - - spin_unlock_irqrestore(&adev->irq.ih.faults->lock, flags); -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h index 0e01f115bbe5..f877bb78d10a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h @@ -24,20 +24,8 @@ #ifndef __AMDGPU_IH_H__ #define __AMDGPU_IH_H__ -#include <linux/chash.h> -#include "soc15_ih_clientid.h" - struct amdgpu_device; - -#define AMDGPU_IH_CLIENTID_LEGACY 0 -#define AMDGPU_IH_CLIENTID_MAX SOC15_IH_CLIENTID_MAX - -#define AMDGPU_PAGEFAULT_HASH_BITS 8 -struct amdgpu_retryfault_hashtable { - DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spinlock_t lock; - int count; -}; +struct amdgpu_iv_entry; /* * R6xx+ IH ring @@ -57,30 +45,26 @@ struct amdgpu_ih_ring { bool use_doorbell; bool use_bus_addr; dma_addr_t rb_dma_addr; /* only used when use_bus_addr = true */ - struct amdgpu_retryfault_hashtable *faults; }; -#define AMDGPU_IH_SRC_DATA_MAX_SIZE_DW 4 - -struct amdgpu_iv_entry { - unsigned client_id; - unsigned src_id; - unsigned ring_id; - unsigned vmid; - unsigned vmid_src; - uint64_t timestamp; - unsigned timestamp_src; - unsigned pasid; - unsigned pasid_src; - unsigned src_data[AMDGPU_IH_SRC_DATA_MAX_SIZE_DW]; - const uint32_t *iv_entry; +/* provided by the ih block */ +struct amdgpu_ih_funcs { + /* ring read/write ptr handling, called from interrupt context */ + u32 (*get_wptr)(struct amdgpu_device *adev); + void (*decode_iv)(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry); + void (*set_rptr)(struct amdgpu_device *adev); }; -int amdgpu_ih_ring_init(struct amdgpu_device *adev, unsigned ring_size, - bool use_bus_addr); -void amdgpu_ih_ring_fini(struct amdgpu_device *adev); -int amdgpu_ih_process(struct amdgpu_device *adev); -int amdgpu_ih_add_fault(struct amdgpu_device *adev, u64 key); -void amdgpu_ih_clear_fault(struct amdgpu_device *adev, u64 key); +#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev)) +#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv)) +#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev)) + +int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + unsigned ring_size, bool use_bus_addr); +void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih); +int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih, + void (*callback)(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih)); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c index 1abf5b5bac9e..b7968f426862 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c @@ -51,6 +51,7 @@ #include "atom.h" #include "amdgpu_connectors.h" #include "amdgpu_trace.h" +#include "amdgpu_amdkfd.h" #include <linux/pm_runtime.h> @@ -93,23 +94,6 @@ static void amdgpu_hotplug_work_func(struct work_struct *work) } /** - * amdgpu_irq_reset_work_func - execute GPU reset - * - * @work: work struct pointer - * - * Execute scheduled GPU reset (Cayman+). - * This function is called when the IRQ handler thinks we need a GPU reset. - */ -static void amdgpu_irq_reset_work_func(struct work_struct *work) -{ - struct amdgpu_device *adev = container_of(work, struct amdgpu_device, - reset_work); - - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_gpu_recover(adev, NULL, false); -} - -/** * amdgpu_irq_disable_all - disable *all* interrupts * * @adev: amdgpu device pointer @@ -123,7 +107,7 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) int r; spin_lock_irqsave(&adev->irq.lock, irqflags); - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -147,6 +131,27 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev) } /** + * amdgpu_irq_callback - callback from the IH ring + * + * @adev: amdgpu device pointer + * @ih: amdgpu ih ring + * + * Callback from IH ring processing to handle the entry at the current position + * and advance the read pointer. + */ +static void amdgpu_irq_callback(struct amdgpu_device *adev, + struct amdgpu_ih_ring *ih) +{ + u32 ring_index = ih->rptr >> 2; + struct amdgpu_iv_entry entry; + + entry.iv_entry = (const uint32_t *)&ih->ring[ring_index]; + amdgpu_ih_decode_iv(adev, &entry); + + amdgpu_irq_dispatch(adev, &entry); +} + +/** * amdgpu_irq_handler - IRQ handler * * @irq: IRQ number (unused) @@ -163,7 +168,7 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg) struct amdgpu_device *adev = dev->dev_private; irqreturn_t ret; - ret = amdgpu_ih_process(adev); + ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback); if (ret == IRQ_HANDLED) pm_runtime_mark_last_busy(dev->dev); return ret; @@ -233,15 +238,12 @@ int amdgpu_irq_init(struct amdgpu_device *adev) amdgpu_hotplug_work_func); } - INIT_WORK(&adev->reset_work, amdgpu_irq_reset_work_func); - adev->irq.installed = true; r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq); if (r) { adev->irq.installed = false; if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); return r; } adev->ddev->max_vblank_count = 0x00ffffff; @@ -270,10 +272,9 @@ void amdgpu_irq_fini(struct amdgpu_device *adev) pci_disable_msi(adev->pdev); if (!amdgpu_device_has_dc_support(adev)) flush_work(&adev->hotplug_work); - cancel_work_sync(&adev->reset_work); } - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; @@ -313,7 +314,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev, unsigned client_id, unsigned src_id, struct amdgpu_irq_src *source) { - if (client_id >= AMDGPU_IH_CLIENTID_MAX) + if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) return -EINVAL; if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) @@ -363,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev, unsigned client_id = entry->client_id; unsigned src_id = entry->src_id; struct amdgpu_irq_src *src; + bool handled = false; int r; trace_amdgpu_iv(entry); - if (client_id >= AMDGPU_IH_CLIENTID_MAX) { + if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) { DRM_DEBUG("Invalid client_id in IV: %d\n", client_id); - return; - } - if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { + } else if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) { DRM_DEBUG("Invalid src_id in IV: %d\n", src_id); - return; - } - if (adev->irq.virq[src_id]) { + } else if (adev->irq.virq[src_id]) { generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id)); - } else { - if (!adev->irq.client[client_id].sources) { - DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", - client_id, src_id); - return; - } - src = adev->irq.client[client_id].sources[src_id]; - if (!src) { - DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); - return; - } + } else if (!adev->irq.client[client_id].sources) { + DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n", + client_id, src_id); + } else if ((src = adev->irq.client[client_id].sources[src_id])) { r = src->funcs->process(adev, src, entry); - if (r) + if (r < 0) DRM_ERROR("error processing interrupt (%d)\n", r); + else if (r) + handled = true; + + } else { + DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id); } + + /* Send it to amdkfd as well if it isn't already handled */ + if (!handled) + amdgpu_amdkfd_interrupt(adev, entry->iv_entry); } /** @@ -440,7 +440,7 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev) { int i, j, k; - for (i = 0; i < AMDGPU_IH_CLIENTID_MAX; ++i) { + for (i = 0; i < AMDGPU_IRQ_CLIENTID_MAX; ++i) { if (!adev->irq.client[i].sources) continue; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h index 3375ad778edc..f6ce171cb8aa 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h @@ -25,19 +25,38 @@ #define __AMDGPU_IRQ_H__ #include <linux/irqdomain.h> +#include "soc15_ih_clientid.h" #include "amdgpu_ih.h" -#define AMDGPU_MAX_IRQ_SRC_ID 0x100 +#define AMDGPU_MAX_IRQ_SRC_ID 0x100 #define AMDGPU_MAX_IRQ_CLIENT_ID 0x100 +#define AMDGPU_IRQ_CLIENTID_LEGACY 0 +#define AMDGPU_IRQ_CLIENTID_MAX SOC15_IH_CLIENTID_MAX + +#define AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW 4 + struct amdgpu_device; -struct amdgpu_iv_entry; enum amdgpu_interrupt_state { AMDGPU_IRQ_STATE_DISABLE, AMDGPU_IRQ_STATE_ENABLE, }; +struct amdgpu_iv_entry { + unsigned client_id; + unsigned src_id; + unsigned ring_id; + unsigned vmid; + unsigned vmid_src; + uint64_t timestamp; + unsigned timestamp_src; + unsigned pasid; + unsigned pasid_src; + unsigned src_data[AMDGPU_IRQ_SRC_DATA_MAX_SIZE_DW]; + const uint32_t *iv_entry; +}; + struct amdgpu_irq_src { unsigned num_types; atomic_t *enabled_types; @@ -63,7 +82,7 @@ struct amdgpu_irq { bool installed; spinlock_t lock; /* interrupt sources */ - struct amdgpu_irq_client client[AMDGPU_IH_CLIENTID_MAX]; + struct amdgpu_irq_client client[AMDGPU_IRQ_CLIENTID_MAX]; /* status, etc. */ bool msi_enabled; /* msi enabled */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c index 391e2f7c03aa..0a17fb1af204 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c @@ -32,12 +32,25 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job) { struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); + struct amdgpu_task_info ti; + memset(&ti, 0, sizeof(struct amdgpu_task_info)); + + if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) { + DRM_ERROR("ring %s timeout, but soft recovered\n", + s_job->sched->name); + return; + } + + amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti); DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n", job->base.sched->name, atomic_read(&ring->fence_drv.last_seq), ring->fence_drv.sync_seq); + DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n", + ti.process_name, ti.tgid, ti.task_name, ti.pid); - amdgpu_device_gpu_recover(ring->adev, job, false); + if (amdgpu_device_should_recover_gpu(ring->adev)) + amdgpu_device_gpu_recover(ring->adev, job); } int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, @@ -66,6 +79,7 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs, amdgpu_sync_create(&(*job)->sync); amdgpu_sync_create(&(*job)->sched_sync); (*job)->vram_lost_counter = atomic_read(&adev->vram_lost_counter); + (*job)->vm_pd_addr = AMDGPU_BO_INVALID_OFFSET; return 0; } @@ -82,8 +96,6 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size, r = amdgpu_ib_get(adev, NULL, size, &(*job)->ibs[0]); if (r) kfree(*job); - else - (*job)->vm_pd_addr = adev->gart.table_addr; return r; } @@ -106,6 +118,8 @@ static void amdgpu_job_free_cb(struct drm_sched_job *s_job) struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched); struct amdgpu_job *job = to_amdgpu_job(s_job); + drm_sched_job_cleanup(s_job); + amdgpu_ring_priority_put(ring, s_job->s_priority); dma_fence_put(job->fence); amdgpu_sync_free(&job->sync); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h index 57cfe78a262b..e1b46a6703de 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h @@ -33,6 +33,8 @@ #define to_amdgpu_job(sched_job) \ container_of((sched_job), struct amdgpu_job, base) +#define AMDGPU_JOB_GET_VMID(job) ((job) ? (job)->vmid : 0) + struct amdgpu_fence; struct amdgpu_job { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index bd98cc5fb97b..bc62bf41b7e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -37,6 +37,32 @@ #include <linux/slab.h> #include <linux/pm_runtime.h> #include "amdgpu_amdkfd.h" +#include "amdgpu_gem.h" +#include "amdgpu_display.h" + +static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev) +{ + struct amdgpu_gpu_instance *gpu_instance; + int i; + + mutex_lock(&mgpu_info.mutex); + + for (i = 0; i < mgpu_info.num_gpu; i++) { + gpu_instance = &(mgpu_info.gpu_ins[i]); + if (gpu_instance->adev == adev) { + mgpu_info.gpu_ins[i] = + mgpu_info.gpu_ins[mgpu_info.num_gpu - 1]; + mgpu_info.num_gpu--; + if (adev->flags & AMD_IS_APU) + mgpu_info.num_apu--; + else + mgpu_info.num_dgpu--; + break; + } + } + + mutex_unlock(&mgpu_info.mutex); +} /** * amdgpu_driver_unload_kms - Main unload function for KMS. @@ -53,6 +79,8 @@ void amdgpu_driver_unload_kms(struct drm_device *dev) if (adev == NULL) return; + amdgpu_unregister_gpu_instance(adev); + if (adev->rmmio == NULL) goto done_free; @@ -73,6 +101,31 @@ done_free: dev->dev_private = NULL; } +static void amdgpu_register_gpu_instance(struct amdgpu_device *adev) +{ + struct amdgpu_gpu_instance *gpu_instance; + + mutex_lock(&mgpu_info.mutex); + + if (mgpu_info.num_gpu >= MAX_GPU_INSTANCE) { + DRM_ERROR("Cannot register more gpu instance\n"); + mutex_unlock(&mgpu_info.mutex); + return; + } + + gpu_instance = &(mgpu_info.gpu_ins[mgpu_info.num_gpu]); + gpu_instance->adev = adev; + gpu_instance->mgpu_fan_enabled = 0; + + mgpu_info.num_gpu++; + if (adev->flags & AMD_IS_APU) + mgpu_info.num_apu++; + else + mgpu_info.num_dgpu++; + + mutex_unlock(&mgpu_info.mutex); +} + /** * amdgpu_driver_load_kms - Main load function for KMS. * @@ -167,6 +220,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags) pm_runtime_put_autosuspend(dev->dev); } + amdgpu_register_gpu_instance(adev); out: if (r) { /* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */ @@ -255,9 +309,130 @@ static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info, fw_info->ver = adev->psp.asd_fw_version; fw_info->feature = adev->psp.asd_feature_version; break; + case AMDGPU_INFO_FW_DMCU: + fw_info->ver = adev->dm.dmcu_fw_version; + fw_info->feature = 0; + break; + default: + return -EINVAL; + } + return 0; +} + +static int amdgpu_hw_ip_info(struct amdgpu_device *adev, + struct drm_amdgpu_info *info, + struct drm_amdgpu_info_hw_ip *result) +{ + uint32_t ib_start_alignment = 0; + uint32_t ib_size_alignment = 0; + enum amd_ip_block_type type; + unsigned int num_rings = 0; + unsigned int i, j; + + if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) + return -EINVAL; + + switch (info->query_hw_ip.type) { + case AMDGPU_HW_IP_GFX: + type = AMD_IP_BLOCK_TYPE_GFX; + for (i = 0; i < adev->gfx.num_gfx_rings; i++) + if (adev->gfx.gfx_ring[i].sched.ready) + ++num_rings; + ib_start_alignment = 32; + ib_size_alignment = 32; + break; + case AMDGPU_HW_IP_COMPUTE: + type = AMD_IP_BLOCK_TYPE_GFX; + for (i = 0; i < adev->gfx.num_compute_rings; i++) + if (adev->gfx.compute_ring[i].sched.ready) + ++num_rings; + ib_start_alignment = 32; + ib_size_alignment = 32; + break; + case AMDGPU_HW_IP_DMA: + type = AMD_IP_BLOCK_TYPE_SDMA; + for (i = 0; i < adev->sdma.num_instances; i++) + if (adev->sdma.instance[i].ring.sched.ready) + ++num_rings; + ib_start_alignment = 256; + ib_size_alignment = 4; + break; + case AMDGPU_HW_IP_UVD: + type = AMD_IP_BLOCK_TYPE_UVD; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + if (adev->uvd.inst[i].ring.sched.ready) + ++num_rings; + } + ib_start_alignment = 64; + ib_size_alignment = 64; + break; + case AMDGPU_HW_IP_VCE: + type = AMD_IP_BLOCK_TYPE_VCE; + for (i = 0; i < adev->vce.num_rings; i++) + if (adev->vce.ring[i].sched.ready) + ++num_rings; + ib_start_alignment = 4; + ib_size_alignment = 1; + break; + case AMDGPU_HW_IP_UVD_ENC: + type = AMD_IP_BLOCK_TYPE_UVD; + for (i = 0; i < adev->uvd.num_uvd_inst; i++) { + if (adev->uvd.harvest_config & (1 << i)) + continue; + + for (j = 0; j < adev->uvd.num_enc_rings; j++) + if (adev->uvd.inst[i].ring_enc[j].sched.ready) + ++num_rings; + } + ib_start_alignment = 64; + ib_size_alignment = 64; + break; + case AMDGPU_HW_IP_VCN_DEC: + type = AMD_IP_BLOCK_TYPE_VCN; + if (adev->vcn.ring_dec.sched.ready) + ++num_rings; + ib_start_alignment = 16; + ib_size_alignment = 16; + break; + case AMDGPU_HW_IP_VCN_ENC: + type = AMD_IP_BLOCK_TYPE_VCN; + for (i = 0; i < adev->vcn.num_enc_rings; i++) + if (adev->vcn.ring_enc[i].sched.ready) + ++num_rings; + ib_start_alignment = 64; + ib_size_alignment = 1; + break; + case AMDGPU_HW_IP_VCN_JPEG: + type = AMD_IP_BLOCK_TYPE_VCN; + if (adev->vcn.ring_jpeg.sched.ready) + ++num_rings; + ib_start_alignment = 16; + ib_size_alignment = 16; + break; default: return -EINVAL; } + + for (i = 0; i < adev->num_ip_blocks; i++) + if (adev->ip_blocks[i].version->type == type && + adev->ip_blocks[i].status.valid) + break; + + if (i == adev->num_ip_blocks) + return 0; + + num_rings = min(amdgpu_ctx_num_entities[info->query_hw_ip.type], + num_rings); + + result->hw_ip_version_major = adev->ip_blocks[i].version->major; + result->hw_ip_version_minor = adev->ip_blocks[i].version->minor; + result->capabilities_flags = 0; + result->available_rings = (1 << num_rings) - 1; + result->ib_start_alignment = ib_start_alignment; + result->ib_size_alignment = ib_size_alignment; return 0; } @@ -286,15 +461,12 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_crtc *crtc; uint32_t ui32 = 0; uint64_t ui64 = 0; - int i, j, found; + int i, found; int ui32_size = sizeof(ui32); if (!info->return_size || !info->return_pointer) return -EINVAL; - /* Ensure IB tests are run on ring */ - flush_delayed_work(&adev->late_init_work); - switch (info->query) { case AMDGPU_INFO_ACCEL_WORKING: ui32 = adev->accel_working; @@ -316,101 +488,14 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file return copy_to_user(out, &ui32, min(size, 4u)) ? -EFAULT : 0; case AMDGPU_INFO_HW_IP_INFO: { struct drm_amdgpu_info_hw_ip ip = {}; - enum amd_ip_block_type type; - uint32_t ring_mask = 0; - uint32_t ib_start_alignment = 0; - uint32_t ib_size_alignment = 0; - - if (info->query_hw_ip.ip_instance >= AMDGPU_HW_IP_INSTANCE_MAX_COUNT) - return -EINVAL; + int ret; - switch (info->query_hw_ip.type) { - case AMDGPU_HW_IP_GFX: - type = AMD_IP_BLOCK_TYPE_GFX; - for (i = 0; i < adev->gfx.num_gfx_rings; i++) - ring_mask |= adev->gfx.gfx_ring[i].ready << i; - ib_start_alignment = 32; - ib_size_alignment = 32; - break; - case AMDGPU_HW_IP_COMPUTE: - type = AMD_IP_BLOCK_TYPE_GFX; - for (i = 0; i < adev->gfx.num_compute_rings; i++) - ring_mask |= adev->gfx.compute_ring[i].ready << i; - ib_start_alignment = 32; - ib_size_alignment = 32; - break; - case AMDGPU_HW_IP_DMA: - type = AMD_IP_BLOCK_TYPE_SDMA; - for (i = 0; i < adev->sdma.num_instances; i++) - ring_mask |= adev->sdma.instance[i].ring.ready << i; - ib_start_alignment = 256; - ib_size_alignment = 4; - break; - case AMDGPU_HW_IP_UVD: - type = AMD_IP_BLOCK_TYPE_UVD; - for (i = 0; i < adev->uvd.num_uvd_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) - continue; - ring_mask |= adev->uvd.inst[i].ring.ready; - } - ib_start_alignment = 64; - ib_size_alignment = 64; - break; - case AMDGPU_HW_IP_VCE: - type = AMD_IP_BLOCK_TYPE_VCE; - for (i = 0; i < adev->vce.num_rings; i++) - ring_mask |= adev->vce.ring[i].ready << i; - ib_start_alignment = 4; - ib_size_alignment = 1; - break; - case AMDGPU_HW_IP_UVD_ENC: - type = AMD_IP_BLOCK_TYPE_UVD; - for (i = 0; i < adev->uvd.num_uvd_inst; i++) { - if (adev->uvd.harvest_config & (1 << i)) - continue; - for (j = 0; j < adev->uvd.num_enc_rings; j++) - ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j; - } - ib_start_alignment = 64; - ib_size_alignment = 64; - break; - case AMDGPU_HW_IP_VCN_DEC: - type = AMD_IP_BLOCK_TYPE_VCN; - ring_mask = adev->vcn.ring_dec.ready; - ib_start_alignment = 16; - ib_size_alignment = 16; - break; - case AMDGPU_HW_IP_VCN_ENC: - type = AMD_IP_BLOCK_TYPE_VCN; - for (i = 0; i < adev->vcn.num_enc_rings; i++) - ring_mask |= adev->vcn.ring_enc[i].ready << i; - ib_start_alignment = 64; - ib_size_alignment = 1; - break; - case AMDGPU_HW_IP_VCN_JPEG: - type = AMD_IP_BLOCK_TYPE_VCN; - ring_mask = adev->vcn.ring_jpeg.ready; - ib_start_alignment = 16; - ib_size_alignment = 16; - break; - default: - return -EINVAL; - } + ret = amdgpu_hw_ip_info(adev, info, &ip); + if (ret) + return ret; - for (i = 0; i < adev->num_ip_blocks; i++) { - if (adev->ip_blocks[i].version->type == type && - adev->ip_blocks[i].status.valid) { - ip.hw_ip_version_major = adev->ip_blocks[i].version->major; - ip.hw_ip_version_minor = adev->ip_blocks[i].version->minor; - ip.capabilities_flags = 0; - ip.available_rings = ring_mask; - ip.ib_start_alignment = ib_start_alignment; - ip.ib_size_alignment = ib_size_alignment; - break; - } - } - return copy_to_user(out, &ip, - min((size_t)size, sizeof(ip))) ? -EFAULT : 0; + ret = copy_to_user(out, &ip, min((size_t)size, sizeof(ip))); + return ret ? -EFAULT : 0; } case AMDGPU_INFO_HW_IP_COUNT: { enum amd_ip_block_type type; @@ -492,13 +577,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file struct drm_amdgpu_info_gds gds_info; memset(&gds_info, 0, sizeof(gds_info)); - gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size >> AMDGPU_GDS_SHIFT; - gds_info.compute_partition_size = adev->gds.mem.cs_partition_size >> AMDGPU_GDS_SHIFT; - gds_info.gds_total_size = adev->gds.mem.total_size >> AMDGPU_GDS_SHIFT; - gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size >> AMDGPU_GWS_SHIFT; - gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size >> AMDGPU_GWS_SHIFT; - gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size >> AMDGPU_OA_SHIFT; - gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size >> AMDGPU_OA_SHIFT; + gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size; + gds_info.compute_partition_size = adev->gds.mem.cs_partition_size; + gds_info.gds_total_size = adev->gds.mem.total_size; + gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size; + gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size; + gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size; + gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size; return copy_to_user(out, &gds_info, min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0; } @@ -617,16 +702,17 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file vm_size -= AMDGPU_VA_RESERVED_SIZE; /* Older VCE FW versions are buggy and can handle only 40bits */ - if (adev->vce.fw_version < AMDGPU_VCE_FW_53_45) + if (adev->vce.fw_version && + adev->vce.fw_version < AMDGPU_VCE_FW_53_45) vm_size = min(vm_size, 1ULL << 40); dev_info.virtual_address_offset = AMDGPU_VA_RESERVED_SIZE; dev_info.virtual_address_max = - min(vm_size, AMDGPU_VA_HOLE_START); + min(vm_size, AMDGPU_GMC_HOLE_START); - if (vm_size > AMDGPU_VA_HOLE_START) { - dev_info.high_va_offset = AMDGPU_VA_HOLE_END; - dev_info.high_va_max = AMDGPU_VA_HOLE_END | vm_size; + if (vm_size > AMDGPU_GMC_HOLE_START) { + dev_info.high_va_offset = AMDGPU_GMC_HOLE_END; + dev_info.high_va_max = AMDGPU_GMC_HOLE_END | vm_size; } dev_info.virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE); dev_info.pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE; @@ -861,6 +947,9 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) struct amdgpu_fpriv *fpriv; int r, pasid; + /* Ensure IB tests are run on ring */ + flush_delayed_work(&adev->late_init_work); + file_priv->driver_priv = NULL; r = pm_runtime_get_sync(dev->dev); @@ -889,7 +978,10 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv) } if (amdgpu_sriov_vf(adev)) { - r = amdgpu_map_static_csa(adev, &fpriv->vm, &fpriv->csa_va); + uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK; + + r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj, + &fpriv->csa_va, csa_addr, AMDGPU_CSA_SIZE); if (r) goto error_vm; } @@ -941,10 +1033,10 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pm_runtime_get_sync(dev->dev); - if (adev->asic_type != CHIP_RAVEN) { + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_UVD) != NULL) amdgpu_uvd_free_handles(adev, file_priv); + if (amdgpu_device_ip_get_ip_block(adev, AMD_IP_BLOCK_TYPE_VCE) != NULL) amdgpu_vce_free_handles(adev, file_priv); - } amdgpu_vm_bo_rmv(adev, fpriv->prt_va); @@ -959,8 +1051,8 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev, pasid = fpriv->vm.pasid; pd = amdgpu_bo_ref(fpriv->vm.root.base.bo); - amdgpu_vm_fini(adev, &fpriv->vm); amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr); + amdgpu_vm_fini(adev, &fpriv->vm); if (pasid) amdgpu_pasid_free_delayed(pd->tbo.resv, pasid); @@ -1262,6 +1354,14 @@ static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data) seq_printf(m, "VCN feature version: %u, firmware version: 0x%08x\n", fw_info.feature, fw_info.ver); + /* DMCU */ + query_fw.fw_type = AMDGPU_INFO_FW_DMCU; + ret = amdgpu_firmware_info(&fw_info, &query_fw, adev); + if (ret) + return ret; + seq_printf(m, "DMCU feature version: %u, firmware version: 0x%08x\n", + fw_info.feature, fw_info.ver); + seq_printf(m, "VBIOS version: %s\n", ctx->vbios_version); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c index e55508b39496..3e6823fdd939 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c @@ -238,44 +238,40 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node, * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change * * @mn: our notifier - * @mm: the mm this callback is about - * @start: start of updated range - * @end: end of updated range + * @range: mmu notifier context * * Block for operations on BOs to finish and mark pages as accessed and * potentially dirty. */ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - bool blockable) + const struct mmu_notifier_range *range) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; + unsigned long end; /* notification is exclusive, but interval is inclusive */ - end -= 1; + end = range->end - 1; /* TODO we should be able to split locking for interval tree and * amdgpu_mn_invalidate_node */ - if (amdgpu_mn_read_lock(amn, blockable)) + if (amdgpu_mn_read_lock(amn, range->blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, range->start, end); while (it) { struct amdgpu_mn_node *node; - if (!blockable) { + if (!range->blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); + it = interval_tree_iter_next(it, range->start, end); - amdgpu_mn_invalidate_node(node, start, end); + amdgpu_mn_invalidate_node(node, range->start, end); } return 0; @@ -294,39 +290,38 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn, * are restorted in amdgpu_mn_invalidate_range_end_hsa. */ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end, - bool blockable) + const struct mmu_notifier_range *range) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); struct interval_tree_node *it; + unsigned long end; /* notification is exclusive, but interval is inclusive */ - end -= 1; + end = range->end - 1; - if (amdgpu_mn_read_lock(amn, blockable)) + if (amdgpu_mn_read_lock(amn, range->blockable)) return -EAGAIN; - it = interval_tree_iter_first(&amn->objects, start, end); + it = interval_tree_iter_first(&amn->objects, range->start, end); while (it) { struct amdgpu_mn_node *node; struct amdgpu_bo *bo; - if (!blockable) { + if (!range->blockable) { amdgpu_mn_read_unlock(amn); return -EAGAIN; } node = container_of(it, struct amdgpu_mn_node, it); - it = interval_tree_iter_next(it, start, end); + it = interval_tree_iter_next(it, range->start, end); list_for_each_entry(bo, &node->bos, mn_list) { struct kgd_mem *mem = bo->kfd_bo; if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm, - start, end)) - amdgpu_amdkfd_evict_userptr(mem, mm); + range->start, + end)) + amdgpu_amdkfd_evict_userptr(mem, range->mm); } } @@ -344,9 +339,7 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn, * Release the lock again to allow new command submissions. */ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) + const struct mmu_notifier_range *range) { struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h index b9e9e8b02fb7..aadd0fa42e43 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h @@ -38,7 +38,6 @@ #include <drm/drm_crtc_helper.h> #include <drm/drm_fb_helper.h> #include <drm/drm_plane_helper.h> -#include <drm/drm_fb_helper.h> #include <linux/i2c.h> #include <linux/i2c-algo-bit.h> #include <linux/hrtimer.h> @@ -57,7 +56,6 @@ struct amdgpu_hpd; #define to_amdgpu_connector(x) container_of(x, struct amdgpu_connector, base) #define to_amdgpu_encoder(x) container_of(x, struct amdgpu_encoder, base) #define to_amdgpu_framebuffer(x) container_of(x, struct amdgpu_framebuffer, base) -#define to_amdgpu_plane(x) container_of(x, struct amdgpu_plane, base) #define to_dm_plane_state(x) container_of(x, struct dm_plane_state, base); @@ -295,13 +293,6 @@ struct amdgpu_display_funcs { uint16_t connector_object_id, struct amdgpu_hpd *hpd, struct amdgpu_router *router); - /* it is used to enter or exit into free sync mode */ - int (*notify_freesync)(struct drm_device *dev, void *data, - struct drm_file *filp); - /* it is used to allow enablement of freesync mode */ - int (*set_freesync_property)(struct drm_connector *connector, - struct drm_property *property, - uint64_t val); }; @@ -325,7 +316,7 @@ struct amdgpu_mode_info { struct card_info *atom_card_info; bool mode_config_initialized; struct amdgpu_crtc *crtcs[AMDGPU_MAX_CRTCS]; - struct amdgpu_plane *planes[AMDGPU_MAX_PLANES]; + struct drm_plane *planes[AMDGPU_MAX_PLANES]; struct amdgpu_afmt *afmt[AMDGPU_MAX_AFMT_BLOCKS]; /* DVI-I properties */ struct drm_property *coherent_mode_property; @@ -339,6 +330,10 @@ struct amdgpu_mode_info { struct drm_property *audio_property; /* FMT dithering */ struct drm_property *dither_property; + /* maximum number of bits per channel for monitor color */ + struct drm_property *max_bpc_property; + /* Adaptive Backlight Modulation (power feature) */ + struct drm_property *abm_level_property; /* hardcoded DFP edid from BIOS */ struct edid *bios_hardcoded_edid; int bios_hardcoded_edid_size; @@ -434,11 +429,6 @@ struct amdgpu_crtc { struct drm_pending_vblank_event *event; }; -struct amdgpu_plane { - struct drm_plane base; - enum drm_plane_type plane_type; -}; - struct amdgpu_encoder_atom_dig { bool linkb; /* atom dig */ diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c index b0e14a3d54ef..728e15e5d68a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c @@ -51,18 +51,6 @@ * */ -static bool amdgpu_bo_need_backup(struct amdgpu_device *adev) -{ - if (adev->flags & AMD_IS_APU) - return false; - - if (amdgpu_gpu_recovery == 0 || - (amdgpu_gpu_recovery == -1 && !amdgpu_sriov_vf(adev))) - return false; - - return true; -} - /** * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting * @@ -93,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo) amdgpu_bo_subtract_pin_size(bo); if (bo->kfd_bo) - amdgpu_amdkfd_unreserve_system_memory_limit(bo); + amdgpu_amdkfd_unreserve_memory_limit(bo); amdgpu_bo_kunmap(bo); @@ -163,10 +151,7 @@ void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain) if (domain & AMDGPU_GEM_DOMAIN_GTT) { places[c].fpfn = 0; - if (flags & AMDGPU_GEM_CREATE_SHADOW) - places[c].lpfn = adev->gmc.gart_size >> PAGE_SHIFT; - else - places[c].lpfn = 0; + places[c].lpfn = 0; places[c].flags = TTM_PL_FLAG_TT; if (flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC) places[c].flags |= TTM_PL_FLAG_WC | @@ -253,6 +238,11 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev, bool free = false; int r; + if (!size) { + amdgpu_bo_unref(bo_ptr); + return 0; + } + memset(&bp, 0, sizeof(bp)); bp.size = size; bp.byte_align = align; @@ -346,7 +336,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev, if (r) return r; - amdgpu_bo_unreserve(*bo_ptr); + if (*bo_ptr) + amdgpu_bo_unreserve(*bo_ptr); return 0; } @@ -436,7 +427,11 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, int r; page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT; - size = ALIGN(size, PAGE_SIZE); + if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS | + AMDGPU_GEM_DOMAIN_OA)) + size <<= PAGE_SHIFT; + else + size = ALIGN(size, PAGE_SIZE); if (!amdgpu_bo_validate_size(adev, size, bp->domain)) return -ENOMEM; @@ -451,7 +446,7 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev, return -ENOMEM; drm_gem_private_object_init(adev->ddev, &bo->gem_base, size); INIT_LIST_HEAD(&bo->shadow_list); - INIT_LIST_HEAD(&bo->va); + bo->vm_bo = NULL; bo->preferred_domains = bp->preferred_domain ? bp->preferred_domain : bp->domain; bo->allowed_domains = bo->preferred_domains; @@ -541,7 +536,7 @@ fail_unreserve: } static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, - unsigned long size, int byte_align, + unsigned long size, struct amdgpu_bo *bo) { struct amdgpu_bo_param bp; @@ -552,7 +547,6 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, memset(&bp, 0, sizeof(bp)); bp.size = size; - bp.byte_align = byte_align; bp.domain = AMDGPU_GEM_DOMAIN_GTT; bp.flags = AMDGPU_GEM_CREATE_CPU_GTT_USWC | AMDGPU_GEM_CREATE_SHADOW; @@ -563,7 +557,7 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev, if (!r) { bo->shadow->parent = amdgpu_bo_ref(bo); mutex_lock(&adev->shadow_list_lock); - list_add_tail(&bo->shadow_list, &adev->shadow_list); + list_add_tail(&bo->shadow->shadow_list, &adev->shadow_list); mutex_unlock(&adev->shadow_list_lock); } @@ -596,12 +590,12 @@ int amdgpu_bo_create(struct amdgpu_device *adev, if (r) return r; - if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) { + if ((flags & AMDGPU_GEM_CREATE_SHADOW) && !(adev->flags & AMD_IS_APU)) { if (!bp->resv) WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv, NULL)); - r = amdgpu_bo_create_shadow(adev, bp->size, bp->byte_align, (*bo_ptr)); + r = amdgpu_bo_create_shadow(adev, bp->size, *bo_ptr); if (!bp->resv) reservation_object_unlock((*bo_ptr)->tbo.resv); @@ -614,53 +608,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev, } /** - * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: &amdgpu_bo buffer to be backed up - * @resv: reservation object with embedded fence - * @fence: dma_fence associated with the operation - * @direct: whether to submit the job directly - * - * Copies an &amdgpu_bo buffer object to its shadow object. - * Not used for now. - * - * Returns: - * 0 for success or a negative error code on failure. - */ -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct) - -{ - struct amdgpu_bo *shadow = bo->shadow; - uint64_t bo_addr, shadow_addr; - int r; - - if (!shadow) - return -EINVAL; - - bo_addr = amdgpu_bo_gpu_offset(bo); - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); - - r = reservation_object_reserve_shared(bo->tbo.resv); - if (r) - goto err; - - r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr, - amdgpu_bo_size(bo), resv, fence, - direct, false); - if (!r) - amdgpu_bo_fence(bo, *fence, true); - -err: - return r; -} - -/** * amdgpu_bo_validate - validate an &amdgpu_bo buffer object * @bo: pointer to the buffer object * @@ -695,13 +642,10 @@ retry: } /** - * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object - * @adev: amdgpu device object - * @ring: amdgpu_ring for the engine handling the buffer operations - * @bo: &amdgpu_bo buffer to be restored - * @resv: reservation object with embedded fence + * amdgpu_bo_restore_shadow - restore an &amdgpu_bo shadow + * + * @shadow: &amdgpu_bo shadow to be restored * @fence: dma_fence associated with the operation - * @direct: whether to submit the job directly * * Copies a buffer object's shadow content back to the object. * This is used for recovering a buffer from its shadow in case of a gpu @@ -710,36 +654,19 @@ retry: * Returns: * 0 for success or a negative error code on failure. */ -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct) +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, struct dma_fence **fence) { - struct amdgpu_bo *shadow = bo->shadow; - uint64_t bo_addr, shadow_addr; - int r; - - if (!shadow) - return -EINVAL; + struct amdgpu_device *adev = amdgpu_ttm_adev(shadow->tbo.bdev); + struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring; + uint64_t shadow_addr, parent_addr; - bo_addr = amdgpu_bo_gpu_offset(bo); - shadow_addr = amdgpu_bo_gpu_offset(bo->shadow); + shadow_addr = amdgpu_bo_gpu_offset(shadow); + parent_addr = amdgpu_bo_gpu_offset(shadow->parent); - r = reservation_object_reserve_shared(bo->tbo.resv); - if (r) - goto err; - - r = amdgpu_copy_buffer(ring, shadow_addr, bo_addr, - amdgpu_bo_size(bo), resv, fence, - direct, false); - if (!r) - amdgpu_bo_fence(bo, *fence, true); - -err: - return r; + return amdgpu_copy_buffer(ring, shadow_addr, parent_addr, + amdgpu_bo_size(shadow), NULL, fence, + true, false); } /** @@ -985,7 +912,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) struct ttm_operation_ctx ctx = { false, false }; int r, i; - if (!bo->pin_count) { + if (WARN_ON_ONCE(!bo->pin_count)) { dev_warn(adev->dev, "%p unpin not necessary\n", bo); return 0; } @@ -1019,10 +946,12 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo) int amdgpu_bo_evict_vram(struct amdgpu_device *adev) { /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */ - if (0 && (adev->flags & AMD_IS_APU)) { +#ifndef CONFIG_HIBERNATION + if (adev->flags & AMD_IS_APU) { /* Useless to evict on IGP chips */ return 0; } +#endif return ttm_bo_evict_mm(&adev->mman.bdev, TTM_PL_VRAM); } @@ -1360,15 +1289,13 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo) { WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_SYSTEM); - WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem)); WARN_ON_ONCE(!ww_mutex_is_locked(&bo->tbo.resv->lock) && - !bo->pin_count); + !bo->pin_count && bo->tbo.type != ttm_bo_type_kernel); WARN_ON_ONCE(bo->tbo.mem.start == AMDGPU_BO_INVALID_OFFSET); WARN_ON_ONCE(bo->tbo.mem.mem_type == TTM_PL_VRAM && !(bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)); - return bo->tbo.offset; + return amdgpu_gmc_sign_extend(bo->tbo.offset); } /** diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h index 18945dd6982d..9291c2f837e9 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h @@ -89,8 +89,8 @@ struct amdgpu_bo { void *metadata; u32 metadata_size; unsigned prime_shared_count; - /* list of all virtual address to which this bo is associated to */ - struct list_head va; + /* per VM structure for page tables and with virtual addresses */ + struct amdgpu_vm_bo_base *vm_bo; /* Constant after initialization */ struct drm_gem_object gem_base; struct amdgpu_bo *parent; @@ -194,19 +194,6 @@ static inline u64 amdgpu_bo_mmap_offset(struct amdgpu_bo *bo) } /** - * amdgpu_bo_gpu_accessible - return whether the bo is currently in memory that - * is accessible to the GPU. - */ -static inline bool amdgpu_bo_gpu_accessible(struct amdgpu_bo *bo) -{ - switch (bo->tbo.mem.mem_type) { - case TTM_PL_TT: return amdgpu_gtt_mgr_has_gart_addr(&bo->tbo.mem); - case TTM_PL_VRAM: return true; - default: return false; - } -} - -/** * amdgpu_bo_in_cpu_visible_vram - check if BO is (partly) in visible VRAM */ static inline bool amdgpu_bo_in_cpu_visible_vram(struct amdgpu_bo *bo) @@ -280,18 +267,9 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo); void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence, bool shared); u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo); -int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, bool direct); int amdgpu_bo_validate(struct amdgpu_bo *bo); -int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev, - struct amdgpu_ring *ring, - struct amdgpu_bo *bo, - struct reservation_object *resv, - struct dma_fence **fence, - bool direct); +int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow, + struct dma_fence **fence); uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev, uint32_t domain); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c index 7b4e657a95c7..1f61ed95727c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c @@ -27,11 +27,14 @@ #include "amdgpu_drv.h" #include "amdgpu_pm.h" #include "amdgpu_dpm.h" +#include "amdgpu_display.h" #include "atom.h" #include <linux/power_supply.h> #include <linux/hwmon.h> #include <linux/hwmon-sysfs.h> #include <linux/nospec.h> +#include "hwmgr.h" +#define WIDTH_4K 3840 static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev); @@ -473,6 +476,8 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * in each power level within a power state. The pp_od_clk_voltage is used for * this. * + * < For Vega10 and previous ASICs > + * * Reading the file will display: * * - a list of engine clock levels and voltages labeled OD_SCLK @@ -490,6 +495,44 @@ static ssize_t amdgpu_set_pp_table(struct device *dev, * "c" (commit) to the file to commit your changes. If you want to reset to the * default power levels, write "r" (reset) to the file to reset them. * + * + * < For Vega20 > + * + * Reading the file will display: + * + * - minimum and maximum engine clock labeled OD_SCLK + * + * - maximum memory clock labeled OD_MCLK + * + * - three <frequency, voltage> points labeled OD_VDDC_CURVE. + * They can be used to calibrate the sclk voltage curve. + * + * - a list of valid ranges for sclk, mclk, and voltage curve points + * labeled OD_RANGE + * + * To manually adjust these settings: + * + * - First select manual using power_dpm_force_performance_level + * + * - For clock frequency setting, enter a new value by writing a + * string that contains "s/m index clock" to the file. The index + * should be 0 if to set minimum clock. And 1 if to set maximum + * clock. E.g., "s 0 500" will update minimum sclk to be 500 MHz. + * "m 1 800" will update maximum mclk to be 800Mhz. + * + * For sclk voltage curve, enter the new values by writing a + * string that contains "vc point clock voltage" to the file. The + * points are indexed by 0, 1 and 2. E.g., "vc 0 300 600" will + * update point1 with clock set as 300Mhz and voltage as + * 600mV. "vc 2 1000 1000" will update point3 with clock set + * as 1000Mhz and voltage 1000mV. + * + * - When you have edited all of the states as needed, write "c" (commit) + * to the file to commit your changes + * + * - If you want to reset to the default power levels, write "r" (reset) + * to the file to reset them + * */ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, @@ -519,6 +562,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, type = PP_OD_RESTORE_DEFAULT_TABLE; else if (*buf == 'c') type = PP_OD_COMMIT_DPM_TABLE; + else if (!strncmp(buf, "vc", 2)) + type = PP_OD_EDIT_VDDC_CURVE; else return -EINVAL; @@ -526,6 +571,8 @@ static ssize_t amdgpu_set_pp_od_clk_voltage(struct device *dev, tmp_str = buf_cpy; + if (type == PP_OD_EDIT_VDDC_CURVE) + tmp_str++; while (isspace(*++tmp_str)); while (tmp_str[0]) { @@ -569,6 +616,7 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev, if (adev->powerplay.pp_funcs->print_clock_levels) { size = amdgpu_dpm_print_clock_levels(adev, OD_SCLK, buf); size += amdgpu_dpm_print_clock_levels(adev, OD_MCLK, buf+size); + size += amdgpu_dpm_print_clock_levels(adev, OD_VDDC_CURVE, buf+size); size += amdgpu_dpm_print_clock_levels(adev, OD_RANGE, buf+size); return size; } else { @@ -658,7 +706,10 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask); + + if (ret) + return -EINVAL; return count; } @@ -691,7 +742,10 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask); + + if (ret) + return -EINVAL; return count; } @@ -724,7 +778,10 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev, return ret; if (adev->powerplay.pp_funcs->force_clock_level) - amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + ret = amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask); + + if (ret) + return -EINVAL; return count; } @@ -1074,12 +1131,19 @@ static ssize_t amdgpu_hwmon_set_pwm1(struct device *dev, struct amdgpu_device *adev = dev_get_drvdata(dev); int err; u32 value; + u32 pwm_mode; /* Can't adjust fan when the card is off */ if ((adev->flags & AMD_IS_PX) && (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) return -EINVAL; + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) { + pr_info("manual fan speed control should be enabled first\n"); + return -EINVAL; + } + err = kstrtou32(buf, 10, &value); if (err) return err; @@ -1141,6 +1205,148 @@ static ssize_t amdgpu_hwmon_get_fan1_input(struct device *dev, return sprintf(buf, "%i\n", speed); } +static ssize_t amdgpu_hwmon_get_fan1_min(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + u32 min_rpm = 0; + u32 size = sizeof(min_rpm); + int r; + + if (!adev->powerplay.pp_funcs->read_sensor) + return -EINVAL; + + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MIN_FAN_RPM, + (void *)&min_rpm, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", min_rpm); +} + +static ssize_t amdgpu_hwmon_get_fan1_max(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + u32 max_rpm = 0; + u32 size = sizeof(max_rpm); + int r; + + if (!adev->powerplay.pp_funcs->read_sensor) + return -EINVAL; + + r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_MAX_FAN_RPM, + (void *)&max_rpm, &size); + if (r) + return r; + + return snprintf(buf, PAGE_SIZE, "%d\n", max_rpm); +} + +static ssize_t amdgpu_hwmon_get_fan1_target(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int err; + u32 rpm = 0; + + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + if (adev->powerplay.pp_funcs->get_fan_speed_rpm) { + err = amdgpu_dpm_get_fan_speed_rpm(adev, &rpm); + if (err) + return err; + } + + return sprintf(buf, "%i\n", rpm); +} + +static ssize_t amdgpu_hwmon_set_fan1_target(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int err; + u32 value; + u32 pwm_mode; + + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + if (pwm_mode != AMD_FAN_CTRL_MANUAL) + return -ENODATA; + + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + err = kstrtou32(buf, 10, &value); + if (err) + return err; + + if (adev->powerplay.pp_funcs->set_fan_speed_rpm) { + err = amdgpu_dpm_set_fan_speed_rpm(adev, value); + if (err) + return err; + } + + return count; +} + +static ssize_t amdgpu_hwmon_get_fan1_enable(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + u32 pwm_mode = 0; + + if (!adev->powerplay.pp_funcs->get_fan_control_mode) + return -EINVAL; + + pwm_mode = amdgpu_dpm_get_fan_control_mode(adev); + + return sprintf(buf, "%i\n", pwm_mode == AMD_FAN_CTRL_AUTO ? 0 : 1); +} + +static ssize_t amdgpu_hwmon_set_fan1_enable(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + struct amdgpu_device *adev = dev_get_drvdata(dev); + int err; + int value; + u32 pwm_mode; + + /* Can't adjust fan when the card is off */ + if ((adev->flags & AMD_IS_PX) && + (adev->ddev->switch_power_state != DRM_SWITCH_POWER_ON)) + return -EINVAL; + + if (!adev->powerplay.pp_funcs->set_fan_control_mode) + return -EINVAL; + + err = kstrtoint(buf, 10, &value); + if (err) + return err; + + if (value == 0) + pwm_mode = AMD_FAN_CTRL_AUTO; + else if (value == 1) + pwm_mode = AMD_FAN_CTRL_MANUAL; + else + return -EINVAL; + + amdgpu_dpm_set_fan_control_mode(adev, pwm_mode); + + return count; +} + static ssize_t amdgpu_hwmon_show_vddgfx(struct device *dev, struct device_attribute *attr, char *buf) @@ -1360,8 +1566,16 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev, * * - pwm1_max: pulse width modulation fan control maximum level (255) * + * - fan1_min: an minimum value Unit: revolution/min (RPM) + * + * - fan1_max: an maxmum value Unit: revolution/max (RPM) + * * - fan1_input: fan speed in RPM * + * - fan[1-*]_target: Desired fan speed Unit: revolution/min (RPM) + * + * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable + * * You can use hwmon tools like sensors to view this information on your system. * */ @@ -1374,6 +1588,10 @@ static SENSOR_DEVICE_ATTR(pwm1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_pwm1_ static SENSOR_DEVICE_ATTR(pwm1_min, S_IRUGO, amdgpu_hwmon_get_pwm1_min, NULL, 0); static SENSOR_DEVICE_ATTR(pwm1_max, S_IRUGO, amdgpu_hwmon_get_pwm1_max, NULL, 0); static SENSOR_DEVICE_ATTR(fan1_input, S_IRUGO, amdgpu_hwmon_get_fan1_input, NULL, 0); +static SENSOR_DEVICE_ATTR(fan1_min, S_IRUGO, amdgpu_hwmon_get_fan1_min, NULL, 0); +static SENSOR_DEVICE_ATTR(fan1_max, S_IRUGO, amdgpu_hwmon_get_fan1_max, NULL, 0); +static SENSOR_DEVICE_ATTR(fan1_target, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_target, amdgpu_hwmon_set_fan1_target, 0); +static SENSOR_DEVICE_ATTR(fan1_enable, S_IRUGO | S_IWUSR, amdgpu_hwmon_get_fan1_enable, amdgpu_hwmon_set_fan1_enable, 0); static SENSOR_DEVICE_ATTR(in0_input, S_IRUGO, amdgpu_hwmon_show_vddgfx, NULL, 0); static SENSOR_DEVICE_ATTR(in0_label, S_IRUGO, amdgpu_hwmon_show_vddgfx_label, NULL, 0); static SENSOR_DEVICE_ATTR(in1_input, S_IRUGO, amdgpu_hwmon_show_vddnb, NULL, 0); @@ -1392,6 +1610,10 @@ static struct attribute *hwmon_attributes[] = { &sensor_dev_attr_pwm1_min.dev_attr.attr, &sensor_dev_attr_pwm1_max.dev_attr.attr, &sensor_dev_attr_fan1_input.dev_attr.attr, + &sensor_dev_attr_fan1_min.dev_attr.attr, + &sensor_dev_attr_fan1_max.dev_attr.attr, + &sensor_dev_attr_fan1_target.dev_attr.attr, + &sensor_dev_attr_fan1_enable.dev_attr.attr, &sensor_dev_attr_in0_input.dev_attr.attr, &sensor_dev_attr_in0_label.dev_attr.attr, &sensor_dev_attr_in1_input.dev_attr.attr, @@ -1410,13 +1632,29 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, struct amdgpu_device *adev = dev_get_drvdata(dev); umode_t effective_mode = attr->mode; - /* Skip fan attributes if fan is not present */ if (adev->pm.no_fan && (attr == &sensor_dev_attr_pwm1.dev_attr.attr || attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || - attr == &sensor_dev_attr_fan1_input.dev_attr.attr)) + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) + return 0; + + /* Skip fan attributes on APU */ + if ((adev->flags & AMD_IS_APU) && + (attr == &sensor_dev_attr_pwm1.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; /* Skip limit attributes if DPM is not enabled */ @@ -1426,7 +1664,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, attr == &sensor_dev_attr_pwm1.dev_attr.attr || attr == &sensor_dev_attr_pwm1_enable.dev_attr.attr || attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || - attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) + attr == &sensor_dev_attr_pwm1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_input.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr || + attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_target.dev_attr.attr || + attr == &sensor_dev_attr_fan1_enable.dev_attr.attr)) return 0; /* mask fan attributes if we have no bindings for this asic to expose */ @@ -1451,10 +1694,18 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj, /* hide max/min values if we can't both query and manage the fan */ if ((!adev->powerplay.pp_funcs->set_fan_speed_percent && !adev->powerplay.pp_funcs->get_fan_speed_percent) && + (!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && (attr == &sensor_dev_attr_pwm1_max.dev_attr.attr || attr == &sensor_dev_attr_pwm1_min.dev_attr.attr)) return 0; + if ((!adev->powerplay.pp_funcs->set_fan_speed_rpm && + !adev->powerplay.pp_funcs->get_fan_speed_rpm) && + (attr == &sensor_dev_attr_fan1_max.dev_attr.attr || + attr == &sensor_dev_attr_fan1_min.dev_attr.attr)) + return 0; + /* only APUs have vddnb */ if (!(adev->flags & AMD_IS_APU) && (attr == &sensor_dev_attr_in1_input.dev_attr.attr || @@ -1719,18 +1970,17 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable) mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable); mutex_unlock(&adev->pm.mutex); - } else { - if (enable) { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.uvd_active = true; - adev->pm.dpm.state = POWER_STATE_TYPE_INTERNAL_UVD; - mutex_unlock(&adev->pm.mutex); - } else { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.uvd_active = false; - mutex_unlock(&adev->pm.mutex); - } - amdgpu_pm_compute_clocks(adev); + } + /* enable/disable Low Memory PState for UVD (4k videos) */ + if (adev->asic_type == CHIP_STONEY && + adev->uvd.decode_image_width >= WIDTH_4K) { + struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle; + + if (hwmgr && hwmgr->hwmgr_func && + hwmgr->hwmgr_func->update_nbdpm_pstate) + hwmgr->hwmgr_func->update_nbdpm_pstate(hwmgr, + !enable, + true); } } @@ -1741,29 +1991,6 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable) mutex_lock(&adev->pm.mutex); amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable); mutex_unlock(&adev->pm.mutex); - } else { - if (enable) { - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.vce_active = true; - /* XXX select vce level based on ring/task */ - adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL; - mutex_unlock(&adev->pm.mutex); - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_UNGATE); - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_UNGATE); - amdgpu_pm_compute_clocks(adev); - } else { - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_PG_STATE_GATE); - amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE, - AMD_CG_STATE_GATE); - mutex_lock(&adev->pm.mutex); - adev->pm.dpm.vce_active = false; - mutex_unlock(&adev->pm.mutex); - amdgpu_pm_compute_clocks(adev); - } - } } @@ -1928,7 +2155,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; i++) { struct amdgpu_ring *ring = adev->rings[i]; - if (ring && ring->ready) + if (ring && ring->sched.ready) amdgpu_fence_wait_empty(ring); } @@ -1965,6 +2192,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev) static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *adev) { uint32_t value; + uint64_t value64; uint32_t query = 0; int size; @@ -2003,6 +2231,10 @@ static int amdgpu_debugfs_pm_info_pp(struct seq_file *m, struct amdgpu_device *a seq_printf(m, "GPU Load: %u %%\n", value); seq_printf(m, "\n"); + /* SMC feature mask */ + if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_ENABLED_SMC_FEATURES_MASK, (void *)&value64, &size)) + seq_printf(m, "SMC Feature Mask: 0x%016llx\n", value64); + /* UVD clocks */ if (!amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_UVD_POWER, (void *)&value, &size)) { if (!value) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c index 1c5d97f4b4dd..71913a18d142 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c @@ -35,18 +35,17 @@ #include "amdgpu.h" #include "amdgpu_display.h" +#include "amdgpu_gem.h" #include <drm/amdgpu_drm.h> #include <linux/dma-buf.h> -static const struct dma_buf_ops amdgpu_dmabuf_ops; - /** * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table * implementation - * @obj: GEM buffer object + * @obj: GEM buffer object (BO) * * Returns: - * A scatter/gather table for the pinned pages of the buffer object's memory. + * A scatter/gather table for the pinned pages of the BO's memory. */ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) { @@ -58,9 +57,9 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj) /** * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation - * @obj: GEM buffer object + * @obj: GEM BO * - * Sets up an in-kernel virtual mapping of the buffer object's memory. + * Sets up an in-kernel virtual mapping of the BO's memory. * * Returns: * The virtual address of the mapping or an error pointer. @@ -80,10 +79,10 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj) /** * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation - * @obj: GEM buffer object - * @vaddr: virtual address (unused) + * @obj: GEM BO + * @vaddr: Virtual address (unused) * - * Tears down the in-kernel virtual mapping of the buffer object's memory. + * Tears down the in-kernel virtual mapping of the BO's memory. */ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) { @@ -94,14 +93,14 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr) /** * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation - * @obj: GEM buffer object - * @vma: virtual memory area + * @obj: GEM BO + * @vma: Virtual memory area * - * Sets up a userspace mapping of the buffer object's memory in the given + * Sets up a userspace mapping of the BO's memory in the given * virtual memory area. * * Returns: - * 0 on success or negative error code. + * 0 on success or a negative error code on failure. */ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma) { @@ -144,10 +143,10 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma * @attach: DMA-buf attachment * @sg: Scatter/gather table * - * Import shared DMA buffer memory exported by another device. + * Imports shared DMA buffer memory exported by another device. * * Returns: - * A new GEM buffer object of the given DRM device, representing the memory + * A new GEM BO of the given DRM device, representing the memory * described by the given DMA-buf attachment and scatter/gather table. */ struct drm_gem_object * @@ -190,7 +189,7 @@ error: /** * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation - * @dma_buf: shared DMA buffer + * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * * Makes sure that the shared DMA buffer can be accessed by the target device. @@ -198,7 +197,7 @@ error: * all DMA devices. * * Returns: - * 0 on success or negative error code. + * 0 on success or a negative error code on failure. */ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) @@ -250,11 +249,11 @@ error_detach: /** * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation - * @dma_buf: shared DMA buffer + * @dma_buf: Shared DMA buffer * @attach: DMA-buf attachment * * This is called when a shared DMA buffer no longer needs to be accessible by - * the other device. For now, simply unpins the buffer from GTT. + * another device. For now, simply unpins the buffer from GTT. */ static void amdgpu_gem_map_detach(struct dma_buf *dma_buf, struct dma_buf_attachment *attach) @@ -279,10 +278,10 @@ error: /** * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation - * @obj: GEM buffer object + * @obj: GEM BO * * Returns: - * The buffer object's reservation object. + * The BO's reservation object. */ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) { @@ -293,15 +292,15 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj) /** * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation - * @dma_buf: shared DMA buffer - * @direction: direction of DMA transfer + * @dma_buf: Shared DMA buffer + * @direction: Direction of DMA transfer * * This is called before CPU access to the shared DMA buffer's memory. If it's * a read access, the buffer is moved to the GTT domain if possible, for optimal * CPU read performance. * * Returns: - * 0 on success or negative error code. + * 0 on success or a negative error code on failure. */ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, enum dma_data_direction direction) @@ -331,15 +330,13 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf, return ret; } -static const struct dma_buf_ops amdgpu_dmabuf_ops = { +const struct dma_buf_ops amdgpu_dmabuf_ops = { .attach = amdgpu_gem_map_attach, .detach = amdgpu_gem_map_detach, .map_dma_buf = drm_gem_map_dma_buf, .unmap_dma_buf = drm_gem_unmap_dma_buf, .release = drm_gem_dmabuf_release, .begin_cpu_access = amdgpu_gem_begin_cpu_access, - .map = drm_gem_dmabuf_kmap, - .unmap = drm_gem_dmabuf_kunmap, .mmap = drm_gem_dmabuf_mmap, .vmap = drm_gem_dmabuf_vmap, .vunmap = drm_gem_dmabuf_vunmap, @@ -348,14 +345,14 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = { /** * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation * @dev: DRM device - * @gobj: GEM buffer object - * @flags: flags like DRM_CLOEXEC and DRM_RDWR + * @gobj: GEM BO + * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR. * * The main work is done by the &drm_gem_prime_export helper, which in turn * uses &amdgpu_gem_prime_res_obj. * * Returns: - * Shared DMA buffer representing the GEM buffer object from the given device. + * Shared DMA buffer representing the GEM BO from the given device. */ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, struct drm_gem_object *gobj, @@ -386,7 +383,7 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev, * uses &amdgpu_gem_prime_import_sg_table. * * Returns: - * GEM buffer object representing the shared DMA buffer for the given device. + * GEM BO representing the shared DMA buffer for the given device. */ struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev, struct dma_buf *dma_buf) diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c index 5b39d1399630..8fab0d637ee5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c @@ -31,6 +31,7 @@ #include "soc15_common.h" #include "psp_v3_1.h" #include "psp_v10_0.h" +#include "psp_v11_0.h" static void psp_set_funcs(struct amdgpu_device *adev); @@ -52,12 +53,14 @@ static int psp_sw_init(void *handle) switch (adev->asic_type) { case CHIP_VEGA10: case CHIP_VEGA12: - case CHIP_VEGA20: psp_v3_1_set_psp_funcs(psp); break; case CHIP_RAVEN: psp_v10_0_set_psp_funcs(psp); break; + case CHIP_VEGA20: + psp_v11_0_set_psp_funcs(psp); + break; default: return -EINVAL; } @@ -87,6 +90,8 @@ static int psp_sw_fini(void *handle) adev->psp.sos_fw = NULL; release_firmware(adev->psp.asd_fw); adev->psp.asd_fw = NULL; + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; return 0; } @@ -115,20 +120,31 @@ int psp_wait_for(struct psp_context *psp, uint32_t reg_index, static int psp_cmd_submit_buf(struct psp_context *psp, struct amdgpu_firmware_info *ucode, - struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr, - int index) + struct psp_gfx_cmd_resp *cmd, uint64_t fence_mc_addr) { int ret; + int index; memset(psp->cmd_buf_mem, 0, PSP_CMD_BUFFER_SIZE); memcpy(psp->cmd_buf_mem, cmd, sizeof(struct psp_gfx_cmd_resp)); + index = atomic_inc_return(&psp->fence_value); ret = psp_cmd_submit(psp, ucode, psp->cmd_buf_mc_addr, fence_mc_addr, index); + if (ret) { + atomic_dec(&psp->fence_value); + return ret; + } - while (*((unsigned int *)psp->fence_buf) != index) { + while (*((unsigned int *)psp->fence_buf) != index) msleep(1); + + /* the status field must be 0 after FW is loaded */ + if (ucode && psp->cmd_buf_mem->resp.status) { + DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n", + psp->cmd_buf_mem->resp.status, ucode->ucode_id); + return -EINVAL; } if (ucode) { @@ -139,10 +155,14 @@ psp_cmd_submit_buf(struct psp_context *psp, return ret; } -static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd, +static void psp_prep_tmr_cmd_buf(struct psp_context *psp, + struct psp_gfx_cmd_resp *cmd, uint64_t tmr_mc, uint32_t size) { - cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; + if (psp_support_vmr_ring(psp)) + cmd->cmd_id = GFX_CMD_ID_SETUP_VMR; + else + cmd->cmd_id = GFX_CMD_ID_SETUP_TMR; cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc); cmd->cmd.cmd_setup_tmr.buf_size = size; @@ -160,7 +180,7 @@ static int psp_tmr_init(struct psp_context *psp) * Note: this memory need be reserved till the driver * uninitializes. */ - ret = amdgpu_bo_create_kernel(psp->adev, 0x300000, 0x100000, + ret = amdgpu_bo_create_kernel(psp->adev, PSP_TMR_SIZE, 0x100000, AMDGPU_GEM_DOMAIN_VRAM, &psp->tmr_bo, &psp->tmr_mc_addr, &psp->tmr_buf); @@ -176,10 +196,12 @@ static int psp_tmr_load(struct psp_context *psp) if (!cmd) return -ENOMEM; - psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, 0x300000); + psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE); + DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n", + PSP_TMR_SIZE, psp->tmr_mc_addr); ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr, 1); + psp->fence_buf_mc_addr); if (ret) goto failed; @@ -246,13 +268,194 @@ static int psp_asd_load(struct psp_context *psp) psp->asd_ucode_size, PSP_ASD_SHARED_MEM_SIZE); ret = psp_cmd_submit_buf(psp, NULL, cmd, - psp->fence_buf_mc_addr, 2); + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_xgmi_ta_load_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint64_t xgmi_ta_mc, uint64_t xgmi_mc_shared, + uint32_t xgmi_ta_size, uint32_t shared_size) +{ + cmd->cmd_id = GFX_CMD_ID_LOAD_TA; + cmd->cmd.cmd_load_ta.app_phy_addr_lo = lower_32_bits(xgmi_ta_mc); + cmd->cmd.cmd_load_ta.app_phy_addr_hi = upper_32_bits(xgmi_ta_mc); + cmd->cmd.cmd_load_ta.app_len = xgmi_ta_size; + + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_lo = lower_32_bits(xgmi_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_phy_addr_hi = upper_32_bits(xgmi_mc_shared); + cmd->cmd.cmd_load_ta.cmd_buf_len = shared_size; +} + +static int psp_xgmi_init_shared_buf(struct psp_context *psp) +{ + int ret; + + /* + * Allocate 16k memory aligned to 4k from Frame Buffer (local + * physical) for xgmi ta <-> Driver + */ + ret = amdgpu_bo_create_kernel(psp->adev, PSP_XGMI_SHARED_MEM_SIZE, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &psp->xgmi_context.xgmi_shared_bo, + &psp->xgmi_context.xgmi_shared_mc_addr, + &psp->xgmi_context.xgmi_shared_buf); + + return ret; +} + +static int psp_xgmi_load(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + memcpy(psp->fw_pri_buf, psp->ta_xgmi_start_addr, psp->ta_xgmi_ucode_size); + + psp_prep_xgmi_ta_load_cmd_buf(cmd, psp->fw_pri_mc_addr, + psp->xgmi_context.xgmi_shared_mc_addr, + psp->ta_xgmi_ucode_size, PSP_XGMI_SHARED_MEM_SIZE); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + if (!ret) { + psp->xgmi_context.initialized = 1; + psp->xgmi_context.session_id = cmd->resp.session_id; + } kfree(cmd); return ret; } +static void psp_prep_xgmi_ta_unload_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t xgmi_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_UNLOAD_TA; + cmd->cmd.cmd_unload_ta.session_id = xgmi_session_id; +} + +static int psp_xgmi_unload(struct psp_context *psp) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the unloading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_xgmi_ta_unload_cmd_buf(cmd, psp->xgmi_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static void psp_prep_xgmi_ta_invoke_cmd_buf(struct psp_gfx_cmd_resp *cmd, + uint32_t ta_cmd_id, + uint32_t xgmi_session_id) +{ + cmd->cmd_id = GFX_CMD_ID_INVOKE_CMD; + cmd->cmd.cmd_invoke_cmd.session_id = xgmi_session_id; + cmd->cmd.cmd_invoke_cmd.ta_cmd_id = ta_cmd_id; + /* Note: cmd_invoke_cmd.buf is not used for now */ +} + +int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id) +{ + int ret; + struct psp_gfx_cmd_resp *cmd; + + /* + * TODO: bypass the loading in sriov for now + */ + if (amdgpu_sriov_vf(psp->adev)) + return 0; + + cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); + if (!cmd) + return -ENOMEM; + + psp_prep_xgmi_ta_invoke_cmd_buf(cmd, ta_cmd_id, + psp->xgmi_context.session_id); + + ret = psp_cmd_submit_buf(psp, NULL, cmd, + psp->fence_buf_mc_addr); + + kfree(cmd); + + return ret; +} + +static int psp_xgmi_terminate(struct psp_context *psp) +{ + int ret; + + if (!psp->xgmi_context.initialized) + return 0; + + ret = psp_xgmi_unload(psp); + if (ret) + return ret; + + psp->xgmi_context.initialized = 0; + + /* free xgmi shared memory */ + amdgpu_bo_free_kernel(&psp->xgmi_context.xgmi_shared_bo, + &psp->xgmi_context.xgmi_shared_mc_addr, + &psp->xgmi_context.xgmi_shared_buf); + + return 0; +} + +static int psp_xgmi_initialize(struct psp_context *psp) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + if (!psp->xgmi_context.initialized) { + ret = psp_xgmi_init_shared_buf(psp); + if (ret) + return ret; + } + + /* Load XGMI TA */ + ret = psp_xgmi_load(psp); + if (ret) + return ret; + + /* Initialize XGMI session */ + xgmi_cmd = (struct ta_xgmi_shared_memory *)(psp->xgmi_context.xgmi_shared_buf); + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__INITIALIZE; + + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + + return ret; +} + static int psp_hw_start(struct psp_context *psp) { struct amdgpu_device *adev = psp->adev; @@ -280,6 +483,15 @@ static int psp_hw_start(struct psp_context *psp) if (ret) return ret; + if (adev->gmc.xgmi.num_physical_nodes > 1) { + ret = psp_xgmi_initialize(psp); + /* Warning the XGMI seesion initialize failure + * Instead of stop driver initialization + */ + if (ret) + dev_err(psp->adev->dev, + "XGMI: Failed to initialize XGMI session\n"); + } return 0; } @@ -309,7 +521,7 @@ static int psp_np_fw_load(struct psp_context *psp) return ret; ret = psp_cmd_submit_buf(psp, ucode, psp->cmd, - psp->fence_buf_mc_addr, i + 3); + psp->fence_buf_mc_addr); if (ret) return ret; @@ -328,8 +540,10 @@ static int psp_load_fw(struct amdgpu_device *adev) int ret; struct psp_context *psp = &adev->psp; - if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0) + if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) { + psp_ring_destroy(psp, PSP_RING_TYPE__KM); goto skip_memalloc; + } psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL); if (!psp->cmd) @@ -440,7 +654,9 @@ static int psp_hw_fini(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; - amdgpu_ucode_fini_bo(adev); + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) + psp_xgmi_terminate(psp); psp_ring_destroy(psp, PSP_RING_TYPE__KM); @@ -469,6 +685,15 @@ static int psp_suspend(void *handle) if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) return 0; + if (adev->gmc.xgmi.num_physical_nodes > 1 && + psp->xgmi_context.initialized == 1) { + ret = psp_xgmi_terminate(psp); + if (ret) { + DRM_ERROR("Failed to terminate xgmi ta\n"); + return ret; + } + } + ret = psp_ring_stop(psp, PSP_RING_TYPE__KM); if (ret) { DRM_ERROR("PSP ring stop failed\n"); @@ -594,3 +819,12 @@ const struct amdgpu_ip_block_version psp_v10_0_ip_block = .rev = 0, .funcs = &psp_ip_funcs, }; + +const struct amdgpu_ip_block_version psp_v11_0_ip_block = +{ + .type = AMD_IP_BLOCK_TYPE_PSP, + .major = 11, + .minor = 0, + .rev = 0, + .funcs = &psp_ip_funcs, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h index 129209686848..3ee573b4016e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h @@ -27,13 +27,18 @@ #include "amdgpu.h" #include "psp_gfx_if.h" +#include "ta_xgmi_if.h" #define PSP_FENCE_BUFFER_SIZE 0x1000 #define PSP_CMD_BUFFER_SIZE 0x1000 -#define PSP_ASD_SHARED_MEM_SIZE 0x4000 +#define PSP_ASD_SHARED_MEM_SIZE 0x4000 +#define PSP_XGMI_SHARED_MEM_SIZE 0x4000 #define PSP_1_MEG 0x100000 +#define PSP_TMR_SIZE 0x400000 struct psp_context; +struct psp_xgmi_node_info; +struct psp_xgmi_topology_info; enum psp_ring_type { @@ -63,18 +68,36 @@ struct psp_funcs int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode, struct psp_gfx_cmd_resp *cmd); int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type); - int (*ring_create)(struct psp_context *psp, enum psp_ring_type ring_type); + int (*ring_create)(struct psp_context *psp, + enum psp_ring_type ring_type); int (*ring_stop)(struct psp_context *psp, enum psp_ring_type ring_type); int (*ring_destroy)(struct psp_context *psp, enum psp_ring_type ring_type); - int (*cmd_submit)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, - uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, int index); + int (*cmd_submit)(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index); bool (*compare_sram_data)(struct psp_context *psp, struct amdgpu_firmware_info *ucode, enum AMDGPU_UCODE_ID ucode_type); bool (*smu_reload_quirk)(struct psp_context *psp); int (*mode1_reset)(struct psp_context *psp); + int (*xgmi_get_node_id)(struct psp_context *psp, uint64_t *node_id); + int (*xgmi_get_hive_id)(struct psp_context *psp, uint64_t *hive_id); + int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices, + struct psp_xgmi_topology_info *topology); + int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices, + struct psp_xgmi_topology_info *topology); + bool (*support_vmr_ring)(struct psp_context *psp); +}; + +struct psp_xgmi_context { + uint8_t initialized; + uint32_t session_id; + struct amdgpu_bo *xgmi_shared_bo; + uint64_t xgmi_shared_mc_addr; + void *xgmi_shared_buf; }; struct psp_context @@ -83,11 +106,11 @@ struct psp_context struct psp_ring km_ring; struct psp_gfx_cmd_resp *cmd; - const struct psp_funcs *funcs; + const struct psp_funcs *funcs; - /* fence buffer */ - struct amdgpu_bo *fw_pri_bo; - uint64_t fw_pri_mc_addr; + /* firmware buffer */ + struct amdgpu_bo *fw_pri_bo; + uint64_t fw_pri_mc_addr; void *fw_pri_buf; /* sos firmware */ @@ -100,8 +123,8 @@ struct psp_context uint8_t *sos_start_addr; /* tmr buffer */ - struct amdgpu_bo *tmr_bo; - uint64_t tmr_mc_addr; + struct amdgpu_bo *tmr_bo; + uint64_t tmr_mc_addr; void *tmr_buf; /* asd firmware and buffer */ @@ -110,19 +133,29 @@ struct psp_context uint32_t asd_feature_version; uint32_t asd_ucode_size; uint8_t *asd_start_addr; - struct amdgpu_bo *asd_shared_bo; - uint64_t asd_shared_mc_addr; + struct amdgpu_bo *asd_shared_bo; + uint64_t asd_shared_mc_addr; void *asd_shared_buf; /* fence buffer */ - struct amdgpu_bo *fence_buf_bo; - uint64_t fence_buf_mc_addr; + struct amdgpu_bo *fence_buf_bo; + uint64_t fence_buf_mc_addr; void *fence_buf; /* cmd buffer */ struct amdgpu_bo *cmd_buf_bo; uint64_t cmd_buf_mc_addr; struct psp_gfx_cmd_resp *cmd_buf_mem; + + /* fence value associated with cmd buffer */ + atomic_t fence_value; + + /* xgmi ta firmware and buffer */ + const struct firmware *ta_fw; + uint32_t ta_xgmi_ucode_version; + uint32_t ta_xgmi_ucode_size; + uint8_t *ta_xgmi_start_addr; + struct psp_xgmi_context xgmi_context; }; struct amdgpu_psp_funcs { @@ -130,6 +163,19 @@ struct amdgpu_psp_funcs { enum AMDGPU_UCODE_ID); }; +#define AMDGPU_XGMI_MAX_CONNECTED_NODES 64 +struct psp_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct psp_xgmi_topology_info { + uint32_t num_nodes; + struct psp_xgmi_node_info nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES]; +}; + #define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type)) #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type)) #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type)) @@ -147,8 +193,22 @@ struct amdgpu_psp_funcs { ((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0) #define psp_smu_reload_quirk(psp) \ ((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false) +#define psp_support_vmr_ring(psp) \ + ((psp)->funcs->support_vmr_ring ? (psp)->funcs->support_vmr_ring((psp)) : false) #define psp_mode1_reset(psp) \ ((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false) +#define psp_xgmi_get_node_id(psp, node_id) \ + ((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp), (node_id)) : -EINVAL) +#define psp_xgmi_get_hive_id(psp, hive_id) \ + ((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp), (hive_id)) : -EINVAL) +#define psp_xgmi_get_topology_info(psp, num_device, topology) \ + ((psp)->funcs->xgmi_get_topology_info ? \ + (psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL) +#define psp_xgmi_set_topology_info(psp, num_device, topology) \ + ((psp)->funcs->xgmi_set_topology_info ? \ + (psp)->funcs->xgmi_set_topology_info((psp), (num_device), (topology)) : -EINVAL) + +#define amdgpu_psp_check_fw_loading_status(adev, i) (adev)->firmware.funcs->check_fw_loading_status((adev), (i)) extern const struct amd_ip_funcs psp_ip_funcs; @@ -159,5 +219,7 @@ extern int psp_wait_for(struct psp_context *psp, uint32_t reg_index, extern const struct amdgpu_ip_block_version psp_v10_0_ip_block; int psp_gpu_reset(struct amdgpu_device *adev); +int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id); +extern const struct amdgpu_ip_block_version psp_v11_0_ip_block; #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c deleted file mode 100644 index a172bba32b45..000000000000 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c +++ /dev/null @@ -1,316 +0,0 @@ -/* - * Copyright 2017 Valve Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in - * all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR - * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, - * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR - * OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: Andres Rodriguez - */ - -#include "amdgpu.h" -#include "amdgpu_ring.h" - -static int amdgpu_queue_mapper_init(struct amdgpu_queue_mapper *mapper, - int hw_ip) -{ - if (!mapper) - return -EINVAL; - - if (hw_ip > AMDGPU_MAX_IP_NUM) - return -EINVAL; - - mapper->hw_ip = hw_ip; - mutex_init(&mapper->lock); - - memset(mapper->queue_map, 0, sizeof(mapper->queue_map)); - - return 0; -} - -static struct amdgpu_ring *amdgpu_get_cached_map(struct amdgpu_queue_mapper *mapper, - int ring) -{ - return mapper->queue_map[ring]; -} - -static int amdgpu_update_cached_map(struct amdgpu_queue_mapper *mapper, - int ring, struct amdgpu_ring *pring) -{ - if (WARN_ON(mapper->queue_map[ring])) { - DRM_ERROR("Un-expected ring re-map\n"); - return -EINVAL; - } - - mapper->queue_map[ring] = pring; - - return 0; -} - -static int amdgpu_identity_map(struct amdgpu_device *adev, - struct amdgpu_queue_mapper *mapper, - u32 ring, - struct amdgpu_ring **out_ring) -{ - switch (mapper->hw_ip) { - case AMDGPU_HW_IP_GFX: - *out_ring = &adev->gfx.gfx_ring[ring]; - break; - case AMDGPU_HW_IP_COMPUTE: - *out_ring = &adev->gfx.compute_ring[ring]; - break; - case AMDGPU_HW_IP_DMA: - *out_ring = &adev->sdma.instance[ring].ring; - break; - case AMDGPU_HW_IP_UVD: - *out_ring = &adev->uvd.inst[0].ring; - break; - case AMDGPU_HW_IP_VCE: - *out_ring = &adev->vce.ring[ring]; - break; - case AMDGPU_HW_IP_UVD_ENC: - *out_ring = &adev->uvd.inst[0].ring_enc[ring]; - break; - case AMDGPU_HW_IP_VCN_DEC: - *out_ring = &adev->vcn.ring_dec; - break; - case AMDGPU_HW_IP_VCN_ENC: - *out_ring = &adev->vcn.ring_enc[ring]; - break; - case AMDGPU_HW_IP_VCN_JPEG: - *out_ring = &adev->vcn.ring_jpeg; - break; - default: - *out_ring = NULL; - DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip); - return -EINVAL; - } - - return amdgpu_update_cached_map(mapper, ring, *out_ring); -} - -static enum amdgpu_ring_type amdgpu_hw_ip_to_ring_type(int hw_ip) -{ - switch (hw_ip) { - case AMDGPU_HW_IP_GFX: - return AMDGPU_RING_TYPE_GFX; - case AMDGPU_HW_IP_COMPUTE: - return AMDGPU_RING_TYPE_COMPUTE; - case AMDGPU_HW_IP_DMA: - return AMDGPU_RING_TYPE_SDMA; - case AMDGPU_HW_IP_UVD: - return AMDGPU_RING_TYPE_UVD; - case AMDGPU_HW_IP_VCE: - return AMDGPU_RING_TYPE_VCE; - default: - DRM_ERROR("Invalid HW IP specified %d\n", hw_ip); - return -1; - } -} - -static int amdgpu_lru_map(struct amdgpu_device *adev, - struct amdgpu_queue_mapper *mapper, - u32 user_ring, bool lru_pipe_order, - struct amdgpu_ring **out_ring) -{ - int r, i, j; - int ring_type = amdgpu_hw_ip_to_ring_type(mapper->hw_ip); - int ring_blacklist[AMDGPU_MAX_RINGS]; - struct amdgpu_ring *ring; - - /* 0 is a valid ring index, so initialize to -1 */ - memset(ring_blacklist, 0xff, sizeof(ring_blacklist)); - - for (i = 0, j = 0; i < AMDGPU_MAX_RINGS; i++) { - ring = mapper->queue_map[i]; - if (ring) - ring_blacklist[j++] = ring->idx; - } - - r = amdgpu_ring_lru_get(adev, ring_type, ring_blacklist, - j, lru_pipe_order, out_ring); - if (r) - return r; - - return amdgpu_update_cached_map(mapper, user_ring, *out_ring); -} - -/** - * amdgpu_queue_mgr_init - init an amdgpu_queue_mgr struct - * - * @adev: amdgpu_device pointer - * @mgr: amdgpu_queue_mgr structure holding queue information - * - * Initialize the the selected @mgr (all asics). - * - * Returns 0 on success, error on failure. - */ -int amdgpu_queue_mgr_init(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr) -{ - int i, r; - - if (!adev || !mgr) - return -EINVAL; - - memset(mgr, 0, sizeof(*mgr)); - - for (i = 0; i < AMDGPU_MAX_IP_NUM; ++i) { - r = amdgpu_queue_mapper_init(&mgr->mapper[i], i); - if (r) - return r; - } - - return 0; -} - -/** - * amdgpu_queue_mgr_fini - de-initialize an amdgpu_queue_mgr struct - * - * @adev: amdgpu_device pointer - * @mgr: amdgpu_queue_mgr structure holding queue information - * - * De-initialize the the selected @mgr (all asics). - * - * Returns 0 on success, error on failure. - */ -int amdgpu_queue_mgr_fini(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr) -{ - return 0; -} - -/** - * amdgpu_queue_mgr_map - Map a userspace ring id to an amdgpu_ring - * - * @adev: amdgpu_device pointer - * @mgr: amdgpu_queue_mgr structure holding queue information - * @hw_ip: HW IP enum - * @instance: HW instance - * @ring: user ring id - * @our_ring: pointer to mapped amdgpu_ring - * - * Map a userspace ring id to an appropriate kernel ring. Different - * policies are configurable at a HW IP level. - * - * Returns 0 on success, error on failure. - */ -int amdgpu_queue_mgr_map(struct amdgpu_device *adev, - struct amdgpu_queue_mgr *mgr, - u32 hw_ip, u32 instance, u32 ring, - struct amdgpu_ring **out_ring) -{ - int i, r, ip_num_rings = 0; - struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip]; - - if (!adev || !mgr || !out_ring) - return -EINVAL; - - if (hw_ip >= AMDGPU_MAX_IP_NUM) - return -EINVAL; - - if (ring >= AMDGPU_MAX_RINGS) - return -EINVAL; - - /* Right now all IPs have only one instance - multiple rings. */ - if (instance != 0) { - DRM_DEBUG("invalid ip instance: %d\n", instance); - return -EINVAL; - } - - switch (hw_ip) { - case AMDGPU_HW_IP_GFX: - ip_num_rings = adev->gfx.num_gfx_rings; - break; - case AMDGPU_HW_IP_COMPUTE: - ip_num_rings = adev->gfx.num_compute_rings; - break; - case AMDGPU_HW_IP_DMA: - ip_num_rings = adev->sdma.num_instances; - break; - case AMDGPU_HW_IP_UVD: - for (i = 0; i < adev->uvd.num_uvd_inst; i++) { - if (!(adev->uvd.harvest_config & (1 << i))) - ip_num_rings++; - } - break; - case AMDGPU_HW_IP_VCE: - ip_num_rings = adev->vce.num_rings; - break; - case AMDGPU_HW_IP_UVD_ENC: - for (i = 0; i < adev->uvd.num_uvd_inst; i++) { - if (!(adev->uvd.harvest_config & (1 << i))) - ip_num_rings++; - } - ip_num_rings = - adev->uvd.num_enc_rings * ip_num_rings; - break; - case AMDGPU_HW_IP_VCN_DEC: - ip_num_rings = 1; - break; - case AMDGPU_HW_IP_VCN_ENC: - ip_num_rings = adev->vcn.num_enc_rings; - break; - case AMDGPU_HW_IP_VCN_JPEG: - ip_num_rings = 1; - break; - default: - DRM_DEBUG("unknown ip type: %d\n", hw_ip); - return -EINVAL; - } - - if (ring >= ip_num_rings) { - DRM_DEBUG("Ring index:%d exceeds maximum:%d for ip:%d\n", - ring, ip_num_rings, hw_ip); - return -EINVAL; - } - - mutex_lock(&mapper->lock); - - *out_ring = amdgpu_get_cached_map(mapper, ring); - if (*out_ring) { - /* cache hit */ - r = 0; - goto out_unlock; - } - - switch (mapper->hw_ip) { - case AMDGPU_HW_IP_GFX: - case AMDGPU_HW_IP_UVD: - case AMDGPU_HW_IP_VCE: - case AMDGPU_HW_IP_UVD_ENC: - case AMDGPU_HW_IP_VCN_DEC: - case AMDGPU_HW_IP_VCN_ENC: - case AMDGPU_HW_IP_VCN_JPEG: - r = amdgpu_identity_map(adev, mapper, ring, out_ring); - break; - case AMDGPU_HW_IP_DMA: - r = amdgpu_lru_map(adev, mapper, ring, false, out_ring); - break; - case AMDGPU_HW_IP_COMPUTE: - r = amdgpu_lru_map(adev, mapper, ring, true, out_ring); - break; - default: - *out_ring = NULL; - r = -EINVAL; - DRM_DEBUG("unknown HW IP type: %d\n", mapper->hw_ip); - } - -out_unlock: - mutex_unlock(&mapper->lock); - return r; -} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c index 93794a85f83d..335a0edf114b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c @@ -135,9 +135,6 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring) if (ring->funcs->end_use) ring->funcs->end_use(ring); - - if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) - amdgpu_ring_lru_touch(ring->adev, ring); } /** @@ -320,8 +317,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, ring->max_dw = max_dw; ring->priority = DRM_SCHED_PRIORITY_NORMAL; mutex_init(&ring->priority_mutex); - INIT_LIST_HEAD(&ring->lru_list); - amdgpu_ring_lru_touch(adev, ring); for (i = 0; i < DRM_SCHED_PRIORITY_MAX; ++i) atomic_set(&ring->num_jobs[i], 0); @@ -343,7 +338,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, */ void amdgpu_ring_fini(struct amdgpu_ring *ring) { - ring->ready = false; + ring->sched.ready = false; /* Not to finish a ring which is not initialized */ if (!(ring->adev) || !(ring->adev->rings[ring->idx])) @@ -368,99 +363,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring) ring->adev->rings[ring->idx] = NULL; } -static void amdgpu_ring_lru_touch_locked(struct amdgpu_device *adev, - struct amdgpu_ring *ring) -{ - /* list_move_tail handles the case where ring isn't part of the list */ - list_move_tail(&ring->lru_list, &adev->ring_lru_list); -} - -static bool amdgpu_ring_is_blacklisted(struct amdgpu_ring *ring, - int *blacklist, int num_blacklist) -{ - int i; - - for (i = 0; i < num_blacklist; i++) { - if (ring->idx == blacklist[i]) - return true; - } - - return false; -} - -/** - * amdgpu_ring_lru_get - get the least recently used ring for a HW IP block - * - * @adev: amdgpu_device pointer - * @type: amdgpu_ring_type enum - * @blacklist: blacklisted ring ids array - * @num_blacklist: number of entries in @blacklist - * @lru_pipe_order: find a ring from the least recently used pipe - * @ring: output ring - * - * Retrieve the amdgpu_ring structure for the least recently used ring of - * a specific IP block (all asics). - * Returns 0 on success, error on failure. - */ -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, - int *blacklist, int num_blacklist, - bool lru_pipe_order, struct amdgpu_ring **ring) -{ - struct amdgpu_ring *entry; - - /* List is sorted in LRU order, find first entry corresponding - * to the desired HW IP */ - *ring = NULL; - spin_lock(&adev->ring_lru_list_lock); - list_for_each_entry(entry, &adev->ring_lru_list, lru_list) { - if (entry->funcs->type != type) - continue; - - if (amdgpu_ring_is_blacklisted(entry, blacklist, num_blacklist)) - continue; - - if (!*ring) { - *ring = entry; - - /* We are done for ring LRU */ - if (!lru_pipe_order) - break; - } - - /* Move all rings on the same pipe to the end of the list */ - if (entry->pipe == (*ring)->pipe) - amdgpu_ring_lru_touch_locked(adev, entry); - } - - /* Move the ring we found to the end of the list */ - if (*ring) - amdgpu_ring_lru_touch_locked(adev, *ring); - - spin_unlock(&adev->ring_lru_list_lock); - - if (!*ring) { - DRM_ERROR("Ring LRU contains no entries for ring type:%d\n", type); - return -EINVAL; - } - - return 0; -} - -/** - * amdgpu_ring_lru_touch - mark a ring as recently being used - * - * @adev: amdgpu_device pointer - * @ring: ring to touch - * - * Move @ring to the tail of the lru list - */ -void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring) -{ - spin_lock(&adev->ring_lru_list_lock); - amdgpu_ring_lru_touch_locked(adev, ring); - spin_unlock(&adev->ring_lru_list_lock); -} - /** * amdgpu_ring_emit_reg_write_reg_wait_helper - ring helper * @@ -481,6 +383,31 @@ void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, amdgpu_ring_emit_reg_wait(ring, reg1, mask, mask); } +/** + * amdgpu_ring_soft_recovery - try to soft recover a ring lockup + * + * @ring: ring to try the recovery on + * @vmid: VMID we try to get going again + * @fence: timedout fence + * + * Tries to get a ring proceeding again when it is stuck. + */ +bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, + struct dma_fence *fence) +{ + ktime_t deadline = ktime_add_us(ktime_get(), 10000); + + if (!ring->funcs->soft_recovery || !fence) + return false; + + atomic_inc(&ring->adev->gpu_reset_counter); + while (!dma_fence_is_signaled(fence) && + ktime_to_ns(ktime_sub(deadline, ktime_get())) > 0) + ring->funcs->soft_recovery(ring, vmid); + + return dma_fence_is_signaled(fence); +} + /* * Debugfs info */ @@ -573,3 +500,29 @@ static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring) debugfs_remove(ring->ent); #endif } + +/** + * amdgpu_ring_test_helper - tests ring and set sched readiness status + * + * @ring: ring to try the recovery on + * + * Tests ring and set sched readiness status + * + * Returns 0 on success, error on failure. + */ +int amdgpu_ring_test_helper(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int r; + + r = amdgpu_ring_test_ring(ring); + if (r) + DRM_DEV_ERROR(adev->dev, "ring %s test failed (%d)\n", + ring->name, r); + else + DRM_DEV_DEBUG(adev->dev, "ring test on %s succeeded\n", + ring->name); + + ring->sched.ready = !r; + return r; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h index d242b9a51e90..d87e828a084b 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h @@ -29,7 +29,7 @@ #include <drm/drm_print.h> /* max number of rings */ -#define AMDGPU_MAX_RINGS 21 +#define AMDGPU_MAX_RINGS 23 #define AMDGPU_MAX_GFX_RINGS 1 #define AMDGPU_MAX_COMPUTE_RINGS 8 #define AMDGPU_MAX_VCE_RINGS 3 @@ -97,7 +97,7 @@ void amdgpu_fence_driver_resume(struct amdgpu_device *adev); int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence, unsigned flags); int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s); -void amdgpu_fence_process(struct amdgpu_ring *ring); +bool amdgpu_fence_process(struct amdgpu_ring *ring); int amdgpu_fence_wait_empty(struct amdgpu_ring *ring); signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring, uint32_t wait_seq, @@ -129,8 +129,9 @@ struct amdgpu_ring_funcs { unsigned emit_ib_size; /* command emit functions */ void (*emit_ib)(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch); + bool ctx_switch); void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr, uint64_t seq, unsigned flags); void (*emit_pipeline_sync)(struct amdgpu_ring *ring); @@ -168,6 +169,8 @@ struct amdgpu_ring_funcs { /* priority functions */ void (*set_priority) (struct amdgpu_ring *ring, enum drm_sched_priority priority); + /* Try to soft recover the ring to make the fence signal */ + void (*soft_recovery)(struct amdgpu_ring *ring, unsigned vmid); }; struct amdgpu_ring { @@ -175,7 +178,6 @@ struct amdgpu_ring { const struct amdgpu_ring_funcs *funcs; struct amdgpu_fence_driver fence_drv; struct drm_gpu_scheduler sched; - struct list_head lru_list; struct amdgpu_bo *ring_obj; volatile uint32_t *ring; @@ -188,7 +190,6 @@ struct amdgpu_ring { uint64_t gpu_addr; uint64_t ptr_mask; uint32_t buf_mask; - bool ready; u32 idx; u32 me; u32 pipe; @@ -221,6 +222,30 @@ struct amdgpu_ring { #endif }; +#define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib))) +#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib))) +#define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r)) +#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t)) +#define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r)) +#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r)) +#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r)) +#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c))) +#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r)) +#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr)) +#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags)) +#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as)) +#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r)) +#define amdgpu_ring_emit_switch_buffer(r) (r)->funcs->emit_switch_buffer((r)) +#define amdgpu_ring_emit_cntxcntl(r, d) (r)->funcs->emit_cntxcntl((r), (d)) +#define amdgpu_ring_emit_rreg(r, d) (r)->funcs->emit_rreg((r), (d)) +#define amdgpu_ring_emit_wreg(r, d, v) (r)->funcs->emit_wreg((r), (d), (v)) +#define amdgpu_ring_emit_reg_wait(r, d, v, m) (r)->funcs->emit_reg_wait((r), (d), (v), (m)) +#define amdgpu_ring_emit_reg_write_reg_wait(r, d0, d1, v, m) (r)->funcs->emit_reg_write_reg_wait((r), (d0), (d1), (v), (m)) +#define amdgpu_ring_emit_tmz(r, b) (r)->funcs->emit_tmz((r), (b)) +#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib))) +#define amdgpu_ring_init_cond_exec(r) (r)->funcs->init_cond_exec((r)) +#define amdgpu_ring_patch_cond_exec(r,o) (r)->funcs->patch_cond_exec((r),(o)) + int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw); void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count); void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib); @@ -234,13 +259,11 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring, unsigned ring_size, struct amdgpu_irq_src *irq_src, unsigned irq_type); void amdgpu_ring_fini(struct amdgpu_ring *ring); -int amdgpu_ring_lru_get(struct amdgpu_device *adev, int type, - int *blacklist, int num_blacklist, - bool lru_pipe_order, struct amdgpu_ring **ring); -void amdgpu_ring_lru_touch(struct amdgpu_device *adev, struct amdgpu_ring *ring); void amdgpu_ring_emit_reg_write_reg_wait_helper(struct amdgpu_ring *ring, uint32_t reg0, uint32_t val0, uint32_t reg1, uint32_t val1); +bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid, + struct dma_fence *fence); static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring) { @@ -290,4 +313,6 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring, ring->count_dw -= count_dw; } +int amdgpu_ring_test_helper(struct amdgpu_ring *ring); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c new file mode 100644 index 000000000000..c8793e6cc3c5 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.c @@ -0,0 +1,282 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * Copyright 2008 Red Hat Inc. + * Copyright 2009 Jerome Glisse. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_gfx.h" +#include "amdgpu_rlc.h" + +/** + * amdgpu_gfx_rlc_enter_safe_mode - Set RLC into safe mode + * + * @adev: amdgpu_device pointer + * + * Set RLC enter into safe mode if RLC is enabled and haven't in safe mode. + */ +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev) +{ + if (adev->gfx.rlc.in_safe_mode) + return; + + /* if RLC is not enabled, do nothing */ + if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + adev->gfx.rlc.funcs->set_safe_mode(adev); + adev->gfx.rlc.in_safe_mode = true; + } +} + +/** + * amdgpu_gfx_rlc_exit_safe_mode - Set RLC out of safe mode + * + * @adev: amdgpu_device pointer + * + * Set RLC exit safe mode if RLC is enabled and have entered into safe mode. + */ +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev) +{ + if (!(adev->gfx.rlc.in_safe_mode)) + return; + + /* if RLC is not enabled, do nothing */ + if (!adev->gfx.rlc.funcs->is_rlc_enabled(adev)) + return; + + if (adev->cg_flags & + (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_3D_CGCG)) { + adev->gfx.rlc.funcs->unset_safe_mode(adev); + adev->gfx.rlc.in_safe_mode = false; + } +} + +/** + * amdgpu_gfx_rlc_init_sr - Init save restore block + * + * @adev: amdgpu_device pointer + * @dws: the size of save restore block + * + * Allocate and setup value to save restore block of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws) +{ + const u32 *src_ptr; + volatile u32 *dst_ptr; + u32 i; + int r; + + /* allocate save restore block */ + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); + if (r) { + dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* write the sr buffer */ + src_ptr = adev->gfx.rlc.reg_list; + dst_ptr = adev->gfx.rlc.sr_ptr; + for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) + dst_ptr[i] = cpu_to_le32(src_ptr[i]); + amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_init_csb - Init clear state block + * + * @adev: amdgpu_device pointer + * + * Allocate and setup value to clear state block of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev) +{ + volatile u32 *dst_ptr; + u32 dws; + int r; + + /* allocate clear state block */ + adev->gfx.rlc.clear_state_size = dws = adev->gfx.rlc.funcs->get_csb_size(adev); + r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* set up the cs buffer */ + dst_ptr = adev->gfx.rlc.cs_ptr; + adev->gfx.rlc.funcs->get_csb_buffer(adev, dst_ptr); + amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_init_cpt - Init cp table + * + * @adev: amdgpu_device pointer + * + * Allocate and setup value to cp table of rlc. + * Returns 0 on succeess or negative error code if allocate failed. + */ +int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev) +{ + int r; + + r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, + &adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); + if (r) { + dev_err(adev->dev, "(%d) failed to create cp table bo\n", r); + amdgpu_gfx_rlc_fini(adev); + return r; + } + + /* set up the cp table */ + amdgpu_gfx_rlc_setup_cp_table(adev); + amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); + amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + + return 0; +} + +/** + * amdgpu_gfx_rlc_setup_cp_table - setup cp the buffer of cp table + * + * @adev: amdgpu_device pointer + * + * Write cp firmware data into cp table. + */ +void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev) +{ + const __le32 *fw_data; + volatile u32 *dst_ptr; + int me, i, max_me; + u32 bo_offset = 0; + u32 table_offset, table_size; + + max_me = adev->gfx.rlc.funcs->get_cp_table_num(adev); + + /* write the cp table buffer */ + dst_ptr = adev->gfx.rlc.cp_table_ptr; + for (me = 0; me < max_me; me++) { + if (me == 0) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; + fw_data = (const __le32 *) + (adev->gfx.ce_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 1) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; + fw_data = (const __le32 *) + (adev->gfx.pfp_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 2) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; + fw_data = (const __le32 *) + (adev->gfx.me_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 3) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } else if (me == 4) { + const struct gfx_firmware_header_v1_0 *hdr = + (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; + fw_data = (const __le32 *) + (adev->gfx.mec2_fw->data + + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); + table_offset = le32_to_cpu(hdr->jt_offset); + table_size = le32_to_cpu(hdr->jt_size); + } + + for (i = 0; i < table_size; i ++) { + dst_ptr[bo_offset + i] = + cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); + } + + bo_offset += table_size; + } +} + +/** + * amdgpu_gfx_rlc_fini - Free BO which used for RLC + * + * @adev: amdgpu_device pointer + * + * Free three BO which is used for rlc_save_restore_block, rlc_clear_state_block + * and rlc_jump_table_block. + */ +void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev) +{ + /* save restore block */ + if (adev->gfx.rlc.save_restore_obj) { + amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, + &adev->gfx.rlc.save_restore_gpu_addr, + (void **)&adev->gfx.rlc.sr_ptr); + } + + /* clear state block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, + &adev->gfx.rlc.clear_state_gpu_addr, + (void **)&adev->gfx.rlc.cs_ptr); + + /* jump table block */ + amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, + &adev->gfx.rlc.cp_table_gpu_addr, + (void **)&adev->gfx.rlc.cp_table_ptr); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h new file mode 100644 index 000000000000..49a8ab52113b --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_rlc.h @@ -0,0 +1,98 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_RLC_H__ +#define __AMDGPU_RLC_H__ + +#include "clearstate_defs.h" + +struct amdgpu_rlc_funcs { + bool (*is_rlc_enabled)(struct amdgpu_device *adev); + void (*set_safe_mode)(struct amdgpu_device *adev); + void (*unset_safe_mode)(struct amdgpu_device *adev); + int (*init)(struct amdgpu_device *adev); + u32 (*get_csb_size)(struct amdgpu_device *adev); + void (*get_csb_buffer)(struct amdgpu_device *adev, volatile u32 *buffer); + int (*get_cp_table_num)(struct amdgpu_device *adev); + int (*resume)(struct amdgpu_device *adev); + void (*stop)(struct amdgpu_device *adev); + void (*reset)(struct amdgpu_device *adev); + void (*start)(struct amdgpu_device *adev); +}; + +struct amdgpu_rlc { + /* for power gating */ + struct amdgpu_bo *save_restore_obj; + uint64_t save_restore_gpu_addr; + volatile uint32_t *sr_ptr; + const u32 *reg_list; + u32 reg_list_size; + /* for clear state */ + struct amdgpu_bo *clear_state_obj; + uint64_t clear_state_gpu_addr; + volatile uint32_t *cs_ptr; + const struct cs_section_def *cs_data; + u32 clear_state_size; + /* for cp tables */ + struct amdgpu_bo *cp_table_obj; + uint64_t cp_table_gpu_addr; + volatile uint32_t *cp_table_ptr; + u32 cp_table_size; + + /* safe mode for updating CG/PG state */ + bool in_safe_mode; + const struct amdgpu_rlc_funcs *funcs; + + /* for firmware data */ + u32 save_and_restore_offset; + u32 clear_state_descriptor_offset; + u32 avail_scratch_ram_locations; + u32 reg_restore_list_size; + u32 reg_list_format_start; + u32 reg_list_format_separate_start; + u32 starting_offsets_start; + u32 reg_list_format_size_bytes; + u32 reg_list_size_bytes; + u32 reg_list_format_direct_reg_list_length; + u32 save_restore_list_cntl_size_bytes; + u32 save_restore_list_gpm_size_bytes; + u32 save_restore_list_srm_size_bytes; + + u32 *register_list_format; + u32 *register_restore; + u8 *save_restore_list_cntl; + u8 *save_restore_list_gpm; + u8 *save_restore_list_srm; + + bool is_rlc_v2_1; +}; + +void amdgpu_gfx_rlc_enter_safe_mode(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_exit_safe_mode(struct amdgpu_device *adev); +int amdgpu_gfx_rlc_init_sr(struct amdgpu_device *adev, u32 dws); +int amdgpu_gfx_rlc_init_csb(struct amdgpu_device *adev); +int amdgpu_gfx_rlc_init_cpt(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_setup_cp_table(struct amdgpu_device *adev); +void amdgpu_gfx_rlc_fini(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c index fb1667b35daa..12f2bf97611f 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c @@ -226,6 +226,8 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager, for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { struct amdgpu_sa_bo *sa_bo; + fences[i] = NULL; + if (list_empty(&sa_manager->flist[i])) continue; @@ -296,10 +298,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager, spin_lock(&sa_manager->wq.lock); do { - for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) { - fences[i] = NULL; + for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) tries[i] = 0; - } do { amdgpu_sa_bo_try_free(sa_manager); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c new file mode 100644 index 000000000000..115bb0c99b0f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c @@ -0,0 +1,58 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include <drm/drmP.h> +#include "amdgpu.h" +#include "amdgpu_sdma.h" + +/* + * GPU SDMA IP block helpers function. + */ + +struct amdgpu_sdma_instance *amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) + if (ring == &adev->sdma.instance[i].ring || + ring == &adev->sdma.instance[i].page) + return &adev->sdma.instance[i]; + + return NULL; +} + +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index) +{ + struct amdgpu_device *adev = ring->adev; + int i; + + for (i = 0; i < adev->sdma.num_instances; i++) { + if (ring == &adev->sdma.instance[i].ring || + ring == &adev->sdma.instance[i].page) { + *index = i; + return 0; + } + } + + return -EINVAL; +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h new file mode 100644 index 000000000000..16b1a6ae5ba6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h @@ -0,0 +1,100 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __AMDGPU_SDMA_H__ +#define __AMDGPU_SDMA_H__ + +/* max number of IP instances */ +#define AMDGPU_MAX_SDMA_INSTANCES 2 + +enum amdgpu_sdma_irq { + AMDGPU_SDMA_IRQ_TRAP0 = 0, + AMDGPU_SDMA_IRQ_TRAP1, + + AMDGPU_SDMA_IRQ_LAST +}; + +struct amdgpu_sdma_instance { + /* SDMA firmware */ + const struct firmware *fw; + uint32_t fw_version; + uint32_t feature_version; + + struct amdgpu_ring ring; + struct amdgpu_ring page; + bool burst_nop; +}; + +struct amdgpu_sdma { + struct amdgpu_sdma_instance instance[AMDGPU_MAX_SDMA_INSTANCES]; + struct amdgpu_irq_src trap_irq; + struct amdgpu_irq_src illegal_inst_irq; + int num_instances; + uint32_t srbm_soft_reset; + bool has_page_queue; +}; + +/* + * Provided by hw blocks that can move/clear data. e.g., gfx or sdma + * But currently, we use sdma to move data. + */ +struct amdgpu_buffer_funcs { + /* maximum bytes in a single operation */ + uint32_t copy_max_bytes; + + /* number of dw to reserve per operation */ + unsigned copy_num_dw; + + /* used for buffer migration */ + void (*emit_copy_buffer)(struct amdgpu_ib *ib, + /* src addr in bytes */ + uint64_t src_offset, + /* dst addr in bytes */ + uint64_t dst_offset, + /* number of byte to transfer */ + uint32_t byte_count); + + /* maximum bytes in a single operation */ + uint32_t fill_max_bytes; + + /* number of dw to reserve per operation */ + unsigned fill_num_dw; + + /* used for buffer clearing */ + void (*emit_fill_buffer)(struct amdgpu_ib *ib, + /* value to write to memory */ + uint32_t src_data, + /* dst addr in bytes */ + uint64_t dst_offset, + /* number of byte to fill */ + uint32_t byte_count); +}; + +#define amdgpu_emit_copy_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_copy_buffer((ib), (s), (d), (b)) +#define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b)) + +struct amdgpu_sdma_instance * +amdgpu_sdma_get_instance_from_ring(struct amdgpu_ring *ring); +int amdgpu_sdma_get_index_from_ring(struct amdgpu_ring *ring, uint32_t *index); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h index 7206a0025b17..626abca770a0 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h @@ -103,7 +103,7 @@ TRACE_EVENT(amdgpu_iv, __entry->src_data[2] = iv->src_data[2]; __entry->src_data[3] = iv->src_data[3]; ), - TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x\n", + TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x", __entry->client_id, __entry->src_id, __entry->ring_id, __entry->vmid, __entry->timestamp, __entry->pasid, @@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs, TP_fast_assign( __entry->bo_list = p->bo_list; - __entry->ring = p->ring->idx; + __entry->ring = to_amdgpu_ring(p->entity->rq->sched)->idx; __entry->dw = p->job->ibs[i].length_dw; __entry->fences = amdgpu_fence_count_emitted( - p->ring); + to_amdgpu_ring(p->entity->rq->sched)); ), TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u", __entry->bo_list, __entry->ring, __entry->dw, @@ -218,6 +218,7 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_ARGS(vm, ring, job), TP_STRUCT__entry( __field(u32, pasid) + __string(ring, ring->name) __field(u32, ring) __field(u32, vmid) __field(u32, vm_hub) @@ -227,14 +228,14 @@ TRACE_EVENT(amdgpu_vm_grab_id, TP_fast_assign( __entry->pasid = vm->pasid; - __entry->ring = ring->idx; + __assign_str(ring, ring->name) __entry->vmid = job->vmid; __entry->vm_hub = ring->funcs->vmhub, __entry->pd_addr = job->vm_pd_addr; __entry->needs_flush = job->vm_needs_flush; ), - TP_printk("pasid=%d, ring=%u, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", - __entry->pasid, __entry->ring, __entry->vmid, + TP_printk("pasid=%d, ring=%s, id=%u, hub=%u, pd_addr=%010Lx needs_flush=%u", + __entry->pasid, __get_str(ring), __entry->vmid, __entry->vm_hub, __entry->pd_addr, __entry->needs_flush) ); @@ -366,20 +367,20 @@ TRACE_EVENT(amdgpu_vm_flush, uint64_t pd_addr), TP_ARGS(ring, vmid, pd_addr), TP_STRUCT__entry( - __field(u32, ring) + __string(ring, ring->name) __field(u32, vmid) __field(u32, vm_hub) __field(u64, pd_addr) ), TP_fast_assign( - __entry->ring = ring->idx; + __assign_str(ring, ring->name) __entry->vmid = vmid; __entry->vm_hub = ring->funcs->vmhub; __entry->pd_addr = pd_addr; ), - TP_printk("ring=%u, id=%u, hub=%u, pd_addr=%010Lx", - __entry->ring, __entry->vmid, + TP_printk("ring=%s, id=%u, hub=%u, pd_addr=%010Lx", + __get_str(ring), __entry->vmid, __entry->vm_hub,__entry->pd_addr) ); @@ -462,6 +463,30 @@ TRACE_EVENT(amdgpu_bo_move, __entry->new_placement, __entry->bo_size) ); +TRACE_EVENT(amdgpu_ib_pipe_sync, + TP_PROTO(struct amdgpu_job *sched_job, struct dma_fence *fence), + TP_ARGS(sched_job, fence), + TP_STRUCT__entry( + __field(const char *,name) + __field(uint64_t, id) + __field(struct dma_fence *, fence) + __field(uint64_t, ctx) + __field(unsigned, seqno) + ), + + TP_fast_assign( + __entry->name = sched_job->base.sched->name; + __entry->id = sched_job->base.id; + __entry->fence = fence; + __entry->ctx = fence->context; + __entry->seqno = fence->seqno; + ), + TP_printk("job ring=%s, id=%llu, need pipe sync to fence=%p, context=%llu, seq=%u", + __entry->name, __entry->id, + __entry->fence, __entry->ctx, + __entry->seqno) +); + #undef AMDGPU_JOB_GET_TIMELINE_NAME #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c index b160b958e5fe..f212402570a5 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace_points.c @@ -1,4 +1,4 @@ -// SPDX-License-Identifier: GPL-2.0 +// SPDX-License-Identifier: MIT /* Copyright Red Hat Inc 2010. * * Permission is hereby granted, free of charge, to any person obtaining a diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index fcf421263fd9..c91ec3101d00 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -47,6 +47,7 @@ #include "amdgpu_object.h" #include "amdgpu_trace.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_sdma.h" #include "bif/bif_4_1_d.h" #define DRM_FILE_PAGE_OFFSET (0x100000000ULL >> PAGE_SHIFT) @@ -60,100 +61,6 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, static int amdgpu_ttm_debugfs_init(struct amdgpu_device *adev); static void amdgpu_ttm_debugfs_fini(struct amdgpu_device *adev); -/* - * Global memory. - */ - -/** - * amdgpu_ttm_mem_global_init - Initialize and acquire reference to - * memory object - * - * @ref: Object for initialization. - * - * This is called by drm_global_item_ref() when an object is being - * initialized. - */ -static int amdgpu_ttm_mem_global_init(struct drm_global_reference *ref) -{ - return ttm_mem_global_init(ref->object); -} - -/** - * amdgpu_ttm_mem_global_release - Drop reference to a memory object - * - * @ref: Object being removed - * - * This is called by drm_global_item_unref() when an object is being - * released. - */ -static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref) -{ - ttm_mem_global_release(ref->object); -} - -/** - * amdgpu_ttm_global_init - Initialize global TTM memory reference structures. - * - * @adev: AMDGPU device for which the global structures need to be registered. - * - * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init() - * during bring up. - */ -static int amdgpu_ttm_global_init(struct amdgpu_device *adev) -{ - struct drm_global_reference *global_ref; - int r; - - /* ensure reference is false in case init fails */ - adev->mman.mem_global_referenced = false; - - global_ref = &adev->mman.mem_global_ref; - global_ref->global_type = DRM_GLOBAL_TTM_MEM; - global_ref->size = sizeof(struct ttm_mem_global); - global_ref->init = &amdgpu_ttm_mem_global_init; - global_ref->release = &amdgpu_ttm_mem_global_release; - r = drm_global_item_ref(global_ref); - if (r) { - DRM_ERROR("Failed setting up TTM memory accounting " - "subsystem.\n"); - goto error_mem; - } - - adev->mman.bo_global_ref.mem_glob = - adev->mman.mem_global_ref.object; - global_ref = &adev->mman.bo_global_ref.ref; - global_ref->global_type = DRM_GLOBAL_TTM_BO; - global_ref->size = sizeof(struct ttm_bo_global); - global_ref->init = &ttm_bo_global_init; - global_ref->release = &ttm_bo_global_release; - r = drm_global_item_ref(global_ref); - if (r) { - DRM_ERROR("Failed setting up TTM BO subsystem.\n"); - goto error_bo; - } - - mutex_init(&adev->mman.gtt_window_lock); - - adev->mman.mem_global_referenced = true; - - return 0; - -error_bo: - drm_global_item_unref(&adev->mman.mem_global_ref); -error_mem: - return r; -} - -static void amdgpu_ttm_global_fini(struct amdgpu_device *adev) -{ - if (adev->mman.mem_global_referenced) { - mutex_destroy(&adev->mman.gtt_window_lock); - drm_global_item_unref(&adev->mman.bo_global_ref.ref); - drm_global_item_unref(&adev->mman.mem_global_ref); - adev->mman.mem_global_referenced = false; - } -} - static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags) { return 0; @@ -255,6 +162,13 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, abo = ttm_to_amdgpu_bo(bo); switch (bo->mem.mem_type) { + case AMDGPU_PL_GDS: + case AMDGPU_PL_GWS: + case AMDGPU_PL_OA: + placement->num_placement = 0; + placement->num_busy_placement = 0; + return; + case TTM_PL_VRAM: if (!adev->mman.buffer_funcs_enabled) { /* Move to system memory */ @@ -282,6 +196,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo, case TTM_PL_TT: default: amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU); + break; } *placement = abo->placement; } @@ -344,7 +259,7 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo, { uint64_t addr = 0; - if (mem->mem_type != TTM_PL_TT || amdgpu_gtt_mgr_has_gart_addr(mem)) { + if (mm_node->start != AMDGPU_BO_INVALID_OFFSET) { addr = mm_node->start << PAGE_SHIFT; addr += bo->bdev->man[mem->mem_type].gpu_offset; } @@ -432,8 +347,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, /* Map only what needs to be accessed. Map src to window 0 and * dst to window 1 */ - if (src->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_has_gart_addr(src->mem)) { + if (src->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(src->bo, src->mem, PFN_UP(cur_size + src_page_offset), src_node_start, 0, ring, @@ -446,8 +360,7 @@ int amdgpu_ttm_copy_mem_to_mem(struct amdgpu_device *adev, from += src_page_offset; } - if (dst->mem->mem_type == TTM_PL_TT && - !amdgpu_gtt_mgr_has_gart_addr(dst->mem)) { + if (dst->mem->start == AMDGPU_BO_INVALID_OFFSET) { r = amdgpu_map_buffer(dst->bo, dst->mem, PFN_UP(cur_size + dst_page_offset), dst_node_start, 1, ring, @@ -525,7 +438,11 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo, if (r) goto error; - r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); + /* Always block for VM page tables before committing the new location */ + if (bo->type == ttm_bo_type_kernel) + r = ttm_bo_move_accel_cleanup(bo, fence, true, new_mem); + else + r = ttm_bo_pipeline_move(bo, fence, evict, new_mem); dma_fence_put(fence); return r; @@ -676,6 +593,16 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict, amdgpu_move_null(bo, new_mem); return 0; } + if (old_mem->mem_type == AMDGPU_PL_GDS || + old_mem->mem_type == AMDGPU_PL_GWS || + old_mem->mem_type == AMDGPU_PL_OA || + new_mem->mem_type == AMDGPU_PL_GDS || + new_mem->mem_type == AMDGPU_PL_GWS || + new_mem->mem_type == AMDGPU_PL_OA) { + /* Nothing to save here */ + amdgpu_move_null(bo, new_mem); + return 0; + } if (!adev->mman.buffer_funcs_enabled) goto memcpy; @@ -1082,42 +1009,48 @@ int amdgpu_ttm_alloc_gart(struct ttm_buffer_object *bo) struct ttm_mem_reg tmp; struct ttm_placement placement; struct ttm_place placements; - uint64_t flags; + uint64_t addr, flags; int r; - if (bo->mem.mem_type != TTM_PL_TT || - amdgpu_gtt_mgr_has_gart_addr(&bo->mem)) + if (bo->mem.start != AMDGPU_BO_INVALID_OFFSET) return 0; - /* allocate GTT space */ - tmp = bo->mem; - tmp.mm_node = NULL; - placement.num_placement = 1; - placement.placement = &placements; - placement.num_busy_placement = 1; - placement.busy_placement = &placements; - placements.fpfn = 0; - placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; - placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | - TTM_PL_FLAG_TT; + addr = amdgpu_gmc_agp_addr(bo); + if (addr != AMDGPU_BO_INVALID_OFFSET) { + bo->mem.start = addr >> PAGE_SHIFT; + } else { - r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); - if (unlikely(r)) - return r; + /* allocate GART space */ + tmp = bo->mem; + tmp.mm_node = NULL; + placement.num_placement = 1; + placement.placement = &placements; + placement.num_busy_placement = 1; + placement.busy_placement = &placements; + placements.fpfn = 0; + placements.lpfn = adev->gmc.gart_size >> PAGE_SHIFT; + placements.flags = (bo->mem.placement & ~TTM_PL_MASK_MEM) | + TTM_PL_FLAG_TT; + + r = ttm_bo_mem_space(bo, &placement, &tmp, &ctx); + if (unlikely(r)) + return r; - /* compute PTE flags for this buffer object */ - flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); + /* compute PTE flags for this buffer object */ + flags = amdgpu_ttm_tt_pte_flags(adev, bo->ttm, &tmp); - /* Bind pages */ - gtt->offset = (u64)tmp.start << PAGE_SHIFT; - r = amdgpu_ttm_gart_bind(adev, bo, flags); - if (unlikely(r)) { - ttm_bo_mem_put(bo, &tmp); - return r; + /* Bind pages */ + gtt->offset = (u64)tmp.start << PAGE_SHIFT; + r = amdgpu_ttm_gart_bind(adev, bo, flags); + if (unlikely(r)) { + ttm_bo_mem_put(bo, &tmp); + return r; + } + + ttm_bo_mem_put(bo, &bo->mem); + bo->mem = tmp; } - ttm_bo_mem_put(bo, &bo->mem); - bo->mem = tmp; bo->offset = (bo->mem.start << PAGE_SHIFT) + bo->bdev->man[bo->mem.mem_type].gpu_offset; @@ -1427,13 +1360,14 @@ bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm) } /** - * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object + * amdgpu_ttm_tt_pde_flags - Compute PDE flags for ttm_tt object * * @ttm: The ttm_tt object to compute the flags for * @mem: The memory registry backing this ttm_tt object + * + * Figure out the flags to use for a VM PDE (Page Directory Entry). */ -uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, - struct ttm_mem_reg *mem) +uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem) { uint64_t flags = 0; @@ -1447,6 +1381,22 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, flags |= AMDGPU_PTE_SNOOPED; } + return flags; +} + +/** + * amdgpu_ttm_tt_pte_flags - Compute PTE flags for ttm_tt object + * + * @ttm: The ttm_tt object to compute the flags for + * @mem: The memory registry backing this ttm_tt object + + * Figure out the flags to use for a VM PTE (Page Table Entry). + */ +uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, + struct ttm_mem_reg *mem) +{ + uint64_t flags = amdgpu_ttm_tt_pde_flags(ttm, mem); + flags |= adev->gart.gart_pte_flags; flags |= AMDGPU_PTE_READABLE; @@ -1714,14 +1664,10 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) int r; u64 vis_vram_limit; - /* initialize global references for vram/gtt */ - r = amdgpu_ttm_global_init(adev); - if (r) { - return r; - } + mutex_init(&adev->mman.gtt_window_lock); + /* No others user of address space so set it to 0 */ r = ttm_bo_device_init(&adev->mman.bdev, - adev->mman.bo_global_ref.ref.object, &amdgpu_bo_driver, adev->ddev->anon_inode->i_mapping, DRM_FILE_PAGE_OFFSET, @@ -1769,14 +1715,12 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) * This is used for VGA emulation and pre-OS scanout buffers to * avoid display artifacts while transitioning between pre-OS * and driver. */ - if (adev->gmc.stolen_size) { - r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->stolen_vga_memory, - NULL, NULL); - if (r) - return r; - } + r = amdgpu_bo_create_kernel(adev, adev->gmc.stolen_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->stolen_vga_memory, + NULL, NULL); + if (r) + return r; DRM_INFO("amdgpu: %uM of VRAM memory ready\n", (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); @@ -1803,45 +1747,45 @@ int amdgpu_ttm_init(struct amdgpu_device *adev) (unsigned)(gtt_size / (1024 * 1024))); /* Initialize various on-chip memory pools */ - adev->gds.mem.total_size = adev->gds.mem.total_size << AMDGPU_GDS_SHIFT; - adev->gds.mem.gfx_partition_size = adev->gds.mem.gfx_partition_size << AMDGPU_GDS_SHIFT; - adev->gds.mem.cs_partition_size = adev->gds.mem.cs_partition_size << AMDGPU_GDS_SHIFT; - adev->gds.gws.total_size = adev->gds.gws.total_size << AMDGPU_GWS_SHIFT; - adev->gds.gws.gfx_partition_size = adev->gds.gws.gfx_partition_size << AMDGPU_GWS_SHIFT; - adev->gds.gws.cs_partition_size = adev->gds.gws.cs_partition_size << AMDGPU_GWS_SHIFT; - adev->gds.oa.total_size = adev->gds.oa.total_size << AMDGPU_OA_SHIFT; - adev->gds.oa.gfx_partition_size = adev->gds.oa.gfx_partition_size << AMDGPU_OA_SHIFT; - adev->gds.oa.cs_partition_size = adev->gds.oa.cs_partition_size << AMDGPU_OA_SHIFT; - /* GDS Memory */ - if (adev->gds.mem.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, - adev->gds.mem.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing GDS heap.\n"); - return r; - } + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GDS, + adev->gds.mem.total_size); + if (r) { + DRM_ERROR("Failed initializing GDS heap.\n"); + return r; } - /* GWS */ - if (adev->gds.gws.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, - adev->gds.gws.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing gws heap.\n"); - return r; - } + r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, + &adev->gds.gds_gfx_bo, NULL, NULL); + if (r) + return r; + + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_GWS, + adev->gds.gws.total_size); + if (r) { + DRM_ERROR("Failed initializing gws heap.\n"); + return r; } - /* OA */ - if (adev->gds.oa.total_size) { - r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, - adev->gds.oa.total_size >> PAGE_SHIFT); - if (r) { - DRM_ERROR("Failed initializing oa heap.\n"); - return r; - } + r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, + &adev->gds.gws_gfx_bo, NULL, NULL); + if (r) + return r; + + r = ttm_bo_init_mm(&adev->mman.bdev, AMDGPU_PL_OA, + adev->gds.oa.total_size); + if (r) { + DRM_ERROR("Failed initializing oa heap.\n"); + return r; } + r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, + PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, + &adev->gds.oa_gfx_bo, NULL, NULL); + if (r) + return r; + /* Register debugfs entries for amdgpu_ttm */ r = amdgpu_ttm_debugfs_init(adev); if (r) { @@ -1876,14 +1820,10 @@ void amdgpu_ttm_fini(struct amdgpu_device *adev) ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_VRAM); ttm_bo_clean_mm(&adev->mman.bdev, TTM_PL_TT); - if (adev->gds.mem.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); - if (adev->gds.gws.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); - if (adev->gds.oa.total_size) - ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GDS); + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_GWS); + ttm_bo_clean_mm(&adev->mman.bdev, AMDGPU_PL_OA); ttm_bo_device_release(&adev->mman.bdev); - amdgpu_ttm_global_fini(adev); adev->mman.initialized = false; DRM_INFO("amdgpu: ttm finalized\n"); } @@ -1987,7 +1927,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo, src_addr = num_dw * 4; src_addr += job->ibs[0].gpu_addr; - dst_addr = adev->gart.table_addr; + dst_addr = amdgpu_bo_gpu_offset(adev->gart.bo); dst_addr += window * AMDGPU_GTT_MAX_TRANSFER_SIZE * 8; amdgpu_emit_copy_buffer(adev, &job->ibs[0], src_addr, dst_addr, num_bytes); @@ -2030,7 +1970,7 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, unsigned i; int r; - if (direct_submit && !ring->ready) { + if (direct_submit && !ring->sched.ready) { DRM_ERROR("Trying to move memory with ring turned off.\n"); return -EINVAL; } @@ -2047,7 +1987,10 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset, if (r) return r; - job->vm_needs_flush = vm_needs_flush; + if (vm_needs_flush) { + job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo); + job->vm_needs_flush = true; + } if (resv) { r = amdgpu_sync_resv(adev, &job->sync, resv, AMDGPU_FENCE_OWNER_UNDEFINED, @@ -2183,7 +2126,7 @@ error_free: static int amdgpu_mm_dump_table(struct seq_file *m, void *data) { struct drm_info_node *node = (struct drm_info_node *)m->private; - unsigned ttm_pl = *(int *)node->info_ent->data; + unsigned ttm_pl = (uintptr_t)node->info_ent->data; struct drm_device *dev = node->minor->dev; struct amdgpu_device *adev = dev->dev_private; struct ttm_mem_type_manager *man = &adev->mman.bdev.man[ttm_pl]; @@ -2193,12 +2136,12 @@ static int amdgpu_mm_dump_table(struct seq_file *m, void *data) return 0; } -static int ttm_pl_vram = TTM_PL_VRAM; -static int ttm_pl_tt = TTM_PL_TT; - static const struct drm_info_list amdgpu_ttm_debugfs_list[] = { - {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, &ttm_pl_vram}, - {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, &ttm_pl_tt}, + {"amdgpu_vram_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_VRAM}, + {"amdgpu_gtt_mm", amdgpu_mm_dump_table, 0, (void *)TTM_PL_TT}, + {"amdgpu_gds_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GDS}, + {"amdgpu_gws_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_GWS}, + {"amdgpu_oa_mm", amdgpu_mm_dump_table, 0, (void *)AMDGPU_PL_OA}, {"ttm_page_pool", ttm_page_alloc_debugfs, 0, NULL}, #ifdef CONFIG_SWIOTLB {"ttm_dma_page_pool", ttm_dma_page_alloc_debugfs, 0, NULL} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h index 8b3cc6687769..b5b2d101f7db 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h @@ -39,8 +39,6 @@ #define AMDGPU_GTT_NUM_TRANSFER_WINDOWS 2 struct amdgpu_mman { - struct ttm_bo_global_ref bo_global_ref; - struct drm_global_reference mem_global_ref; struct ttm_bo_device bdev; bool mem_global_referenced; bool initialized; @@ -116,6 +114,7 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm, int *last_invalidated); bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm); bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm); +uint64_t amdgpu_ttm_tt_pde_flags(struct ttm_tt *ttm, struct ttm_mem_reg *mem); uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm, struct ttm_mem_reg *mem); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c index f55f72a37ca8..7b33867036e7 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.c @@ -277,6 +277,7 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_PITCAIRN: case CHIP_VERDE: case CHIP_OLAND: + case CHIP_HAINAN: return AMDGPU_FW_LOAD_DIRECT; #endif #ifdef CONFIG_DRM_AMDGPU_CIK @@ -296,19 +297,15 @@ amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type) case CHIP_POLARIS11: case CHIP_POLARIS12: case CHIP_VEGAM: - if (!load_type) - return AMDGPU_FW_LOAD_DIRECT; - else - return AMDGPU_FW_LOAD_SMU; + return AMDGPU_FW_LOAD_SMU; case CHIP_VEGA10: case CHIP_RAVEN: case CHIP_VEGA12: + case CHIP_VEGA20: if (!load_type) return AMDGPU_FW_LOAD_DIRECT; else return AMDGPU_FW_LOAD_PSP; - case CHIP_VEGA20: - return AMDGPU_FW_LOAD_DIRECT; default: DRM_ERROR("Unknown firmware load type\n"); } @@ -322,6 +319,7 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, { const struct common_firmware_header *header = NULL; const struct gfx_firmware_header_v1_0 *cp_hdr = NULL; + const struct dmcu_firmware_header_v1_0 *dmcu_hdr = NULL; if (NULL == ucode->fw) return 0; @@ -333,8 +331,8 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, return 0; header = (const struct common_firmware_header *)ucode->fw->data; - cp_hdr = (const struct gfx_firmware_header_v1_0 *)ucode->fw->data; + dmcu_hdr = (const struct dmcu_firmware_header_v1_0 *)ucode->fw->data; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP || (ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC1 && @@ -343,7 +341,9 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, ucode->ucode_id != AMDGPU_UCODE_ID_CP_MEC2_JT && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL && ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM && - ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM)) { + ucode->ucode_id != AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM && + ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_ERAM && + ucode->ucode_id != AMDGPU_UCODE_ID_DMCU_INTV)) { ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes); memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + @@ -365,6 +365,20 @@ static int amdgpu_ucode_init_single_fw(struct amdgpu_device *adev, le32_to_cpu(header->ucode_array_offset_bytes) + le32_to_cpu(cp_hdr->jt_offset) * 4), ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_ERAM) { + ucode->ucode_size = le32_to_cpu(header->ucode_size_bytes) - + le32_to_cpu(dmcu_hdr->intv_size_bytes); + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes)), + ucode->ucode_size); + } else if (ucode->ucode_id == AMDGPU_UCODE_ID_DMCU_INTV) { + ucode->ucode_size = le32_to_cpu(dmcu_hdr->intv_size_bytes); + + memcpy(ucode->kaddr, (void *)((uint8_t *)ucode->fw->data + + le32_to_cpu(header->ucode_array_offset_bytes) + + le32_to_cpu(dmcu_hdr->intv_offset_bytes)), + ucode->ucode_size); } else if (ucode->ucode_id == AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL) { ucode->ucode_size = adev->gfx.rlc.save_restore_list_cntl_size_bytes; memcpy(ucode->kaddr, adev->gfx.rlc.save_restore_list_cntl, @@ -406,32 +420,41 @@ static int amdgpu_ucode_patch_jt(struct amdgpu_firmware_info *ucode, return 0; } -int amdgpu_ucode_init_bo(struct amdgpu_device *adev) +int amdgpu_ucode_create_bo(struct amdgpu_device *adev) { - uint64_t fw_offset = 0; - int i, err; - struct amdgpu_firmware_info *ucode = NULL; - const struct common_firmware_header *header = NULL; - - if (!adev->firmware.fw_size) { - dev_warn(adev->dev, "No ip firmware need to load\n"); - return 0; - } - - if (!adev->in_gpu_reset) { - err = amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, - amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, - &adev->firmware.fw_buf, - &adev->firmware.fw_buf_mc, - &adev->firmware.fw_buf_ptr); - if (err) { + if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) { + amdgpu_bo_create_kernel(adev, adev->firmware.fw_size, PAGE_SIZE, + amdgpu_sriov_vf(adev) ? AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT, + &adev->firmware.fw_buf, + &adev->firmware.fw_buf_mc, + &adev->firmware.fw_buf_ptr); + if (!adev->firmware.fw_buf) { dev_err(adev->dev, "failed to create kernel buffer for firmware.fw_buf\n"); - goto failed; + return -ENOMEM; + } else if (amdgpu_sriov_vf(adev)) { + memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size); } } + return 0; +} - memset(adev->firmware.fw_buf_ptr, 0, adev->firmware.fw_size); +void amdgpu_ucode_free_bo(struct amdgpu_device *adev) +{ + if (adev->firmware.load_type != AMDGPU_FW_LOAD_DIRECT) + amdgpu_bo_free_kernel(&adev->firmware.fw_buf, + &adev->firmware.fw_buf_mc, + &adev->firmware.fw_buf_ptr); +} + +int amdgpu_ucode_init_bo(struct amdgpu_device *adev) +{ + uint64_t fw_offset = 0; + int i; + struct amdgpu_firmware_info *ucode = NULL; + /* for baremetal, the ucode is allocated in gtt, so don't need to fill the bo when reset/suspend */ + if (!amdgpu_sriov_vf(adev) && (adev->in_gpu_reset || adev->in_suspend)) + return 0; /* * if SMU loaded firmware, it needn't add SMC, UVD, and VCE * ucode info here @@ -448,7 +471,6 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) for (i = 0; i < adev->firmware.max_ucodes; i++) { ucode = &adev->firmware.ucode[i]; if (ucode->fw) { - header = (const struct common_firmware_header *)ucode->fw->data; amdgpu_ucode_init_single_fw(adev, ucode, adev->firmware.fw_buf_mc + fw_offset, adev->firmware.fw_buf_ptr + fw_offset); if (i == AMDGPU_UCODE_ID_CP_MEC1 && @@ -463,33 +485,4 @@ int amdgpu_ucode_init_bo(struct amdgpu_device *adev) } } return 0; - -failed: - if (err) - adev->firmware.load_type = AMDGPU_FW_LOAD_DIRECT; - - return err; -} - -int amdgpu_ucode_fini_bo(struct amdgpu_device *adev) -{ - int i; - struct amdgpu_firmware_info *ucode = NULL; - - if (!adev->firmware.fw_size) - return 0; - - for (i = 0; i < adev->firmware.max_ucodes; i++) { - ucode = &adev->firmware.ucode[i]; - if (ucode->fw) { - ucode->mc_addr = 0; - ucode->kaddr = NULL; - } - } - - amdgpu_bo_free_kernel(&adev->firmware.fw_buf, - &adev->firmware.fw_buf_mc, - &adev->firmware.fw_buf_ptr); - - return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h index bdc472b6e641..7ac25a1c7853 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h @@ -58,6 +58,17 @@ struct psp_firmware_header_v1_0 { }; /* version_major=1, version_minor=0 */ +struct ta_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t ta_xgmi_ucode_version; + uint32_t ta_xgmi_offset_bytes; + uint32_t ta_xgmi_size_bytes; + uint32_t ta_ras_ucode_version; + uint32_t ta_ras_offset_bytes; + uint32_t ta_ras_size_bytes; +}; + +/* version_major=1, version_minor=0 */ struct gfx_firmware_header_v1_0 { struct common_firmware_header header; uint32_t ucode_feature_version; @@ -157,12 +168,20 @@ struct gpu_info_firmware_header_v1_0 { uint16_t version_minor; /* version */ }; +/* version_major=1, version_minor=0 */ +struct dmcu_firmware_header_v1_0 { + struct common_firmware_header header; + uint32_t intv_offset_bytes; /* interrupt vectors offset from end of header, in bytes */ + uint32_t intv_size_bytes; /* size of interrupt vectors, in bytes */ +}; + /* header is fixed size */ union amdgpu_firmware_header { struct common_firmware_header common; struct mc_firmware_header_v1_0 mc; struct smc_firmware_header_v1_0 smc; struct psp_firmware_header_v1_0 psp; + struct ta_firmware_header_v1_0 ta; struct gfx_firmware_header_v1_0 gfx; struct rlc_firmware_header_v1_0 rlc; struct rlc_firmware_header_v2_0 rlc_v2_0; @@ -170,6 +189,7 @@ union amdgpu_firmware_header { struct sdma_firmware_header_v1_0 sdma; struct sdma_firmware_header_v1_1 sdma_v1_1; struct gpu_info_firmware_header_v1_0 gpu_info; + struct dmcu_firmware_header_v1_0 dmcu; uint8_t raw[0x100]; }; @@ -193,8 +213,11 @@ enum AMDGPU_UCODE_ID { AMDGPU_UCODE_ID_STORAGE, AMDGPU_UCODE_ID_SMC, AMDGPU_UCODE_ID_UVD, + AMDGPU_UCODE_ID_UVD1, AMDGPU_UCODE_ID_VCE, AMDGPU_UCODE_ID_VCN, + AMDGPU_UCODE_ID_DMCU_ERAM, + AMDGPU_UCODE_ID_DMCU_INTV, AMDGPU_UCODE_ID_MAXIMUM, }; @@ -205,6 +228,12 @@ enum AMDGPU_UCODE_STATUS { AMDGPU_UCODE_STATUS_LOADED, }; +enum amdgpu_firmware_load_type { + AMDGPU_FW_LOAD_DIRECT = 0, + AMDGPU_FW_LOAD_SMU, + AMDGPU_FW_LOAD_PSP, +}; + /* conform to smu_ucode_xfer_cz.h */ #define AMDGPU_SDMA0_UCODE_LOADED 0x00000001 #define AMDGPU_SDMA1_UCODE_LOADED 0x00000002 @@ -232,6 +261,24 @@ struct amdgpu_firmware_info { uint32_t tmr_mc_addr_hi; }; +struct amdgpu_firmware { + struct amdgpu_firmware_info ucode[AMDGPU_UCODE_ID_MAXIMUM]; + enum amdgpu_firmware_load_type load_type; + struct amdgpu_bo *fw_buf; + unsigned int fw_size; + unsigned int max_ucodes; + /* firmwares are loaded by psp instead of smu from vega10 */ + const struct amdgpu_psp_funcs *funcs; + struct amdgpu_bo *rbuf; + struct mutex mutex; + + /* gpu info firmware data pointer */ + const struct firmware *gpu_info_fw; + + void *fw_buf_ptr; + uint64_t fw_buf_mc; +}; + void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_smc_hdr(const struct common_firmware_header *hdr); void amdgpu_ucode_print_gfx_hdr(const struct common_firmware_header *hdr); @@ -241,8 +288,10 @@ void amdgpu_ucode_print_gpu_info_hdr(const struct common_firmware_header *hdr); int amdgpu_ucode_validate(const struct firmware *fw); bool amdgpu_ucode_hdr_version(union amdgpu_firmware_header *hdr, uint16_t hdr_major, uint16_t hdr_minor); + int amdgpu_ucode_init_bo(struct amdgpu_device *adev); -int amdgpu_ucode_fini_bo(struct amdgpu_device *adev); +int amdgpu_ucode_create_bo(struct amdgpu_device *adev); +void amdgpu_ucode_free_bo(struct amdgpu_device *adev); enum amdgpu_firmware_load_type amdgpu_ucode_get_load_type(struct amdgpu_device *adev, int load_type); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c index e5a6db6beab7..4e5d13e41f6a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c @@ -692,6 +692,8 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg, buf_sizes[0x1] = dpb_size; buf_sizes[0x2] = image_size; buf_sizes[0x4] = min_ctx_size; + /* store image width to adjust nb memory pstate */ + adev->uvd.decode_image_width = width; return 0; } @@ -1243,30 +1245,20 @@ int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout) { struct dma_fence *fence; long r; - uint32_t ip_instance = ring->me; r = amdgpu_uvd_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ip_instance, r); + if (r) goto error; - } r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ip_instance, r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ip_instance); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ip_instance, r); - } else { - DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ip_instance, ring->idx); + else if (r > 0) r = 0; - } dma_fence_put(fence); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h index a3ab1a41060f..5eb63288d157 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h @@ -65,6 +65,8 @@ struct amdgpu_uvd { struct drm_sched_entity entity; struct delayed_work idle_work; unsigned harvest_config; + /* store image width to adjust nb memory state */ + unsigned decode_image_width; }; int amdgpu_uvd_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c index 5f3f54073818..98a1b2ce2b9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c @@ -1032,8 +1032,10 @@ out: * @ib: the IB to execute * */ -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { amdgpu_ring_write(ring, VCE_CMD_IB); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1079,11 +1081,9 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) return 0; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: vce failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, VCE_CMD_END); amdgpu_ring_commit(ring); @@ -1093,14 +1093,8 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= timeout) r = -ETIMEDOUT; - } return r; } @@ -1121,27 +1115,19 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout) return 0; r = amdgpu_vce_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h index a1f209eed4c4..50293652af14 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h @@ -65,8 +65,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp); int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx); int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx); -void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch); +void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, bool ctx_switch); void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq, unsigned flags); int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c index 400fc74bbae2..ecf6f96df2ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c @@ -36,14 +36,19 @@ #include "soc15_common.h" #include "vcn/vcn_1_0_offset.h" +#include "vcn/vcn_1_0_sh_mask.h" /* 1 second timeout */ #define VCN_IDLE_TIMEOUT msecs_to_jiffies(1000) /* Firmware Names */ #define FIRMWARE_RAVEN "amdgpu/raven_vcn.bin" +#define FIRMWARE_PICASSO "amdgpu/picasso_vcn.bin" +#define FIRMWARE_RAVEN2 "amdgpu/raven2_vcn.bin" MODULE_FIRMWARE(FIRMWARE_RAVEN); +MODULE_FIRMWARE(FIRMWARE_PICASSO); +MODULE_FIRMWARE(FIRMWARE_RAVEN2); static void amdgpu_vcn_idle_work_handler(struct work_struct *work); @@ -59,7 +64,12 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - fw_name = FIRMWARE_RAVEN; + if (adev->rev_id >= 8) + fw_name = FIRMWARE_RAVEN2; + else if (adev->pdev->device == 0x15d8) + fw_name = FIRMWARE_PICASSO; + else + fw_name = FIRMWARE_RAVEN; break; default: return -EINVAL; @@ -111,8 +121,7 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev) version_major, version_minor, family_id); } - bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE - + AMDGPU_VCN_SESSION_SIZE * 40; + bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE; if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8); r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE, @@ -203,20 +212,164 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev) return 0; } +static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev, + struct dpg_pause_state *new_state) +{ + int ret_code; + uint32_t reg_data = 0; + uint32_t reg_data2 = 0; + struct amdgpu_ring *ring; + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.fw_based != new_state->fw_based) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK); + + if (new_state->fw_based == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* pause DPG non-jpeg */ + reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_enc[0]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_enc[1]; + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_LO2, ring->gpu_addr); + WREG32_SOC15(UVD, 0, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_SIZE2, ring->ring_size / 4); + WREG32_SOC15(UVD, 0, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr)); + WREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr)); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg non-jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.fw_based = new_state->fw_based; + } + + /* pause/unpause if state is changed */ + if (adev->vcn.pause_state.jpeg != new_state->jpeg) { + DRM_DEBUG("dpg pause state changed %d:%d -> %d:%d", + adev->vcn.pause_state.fw_based, adev->vcn.pause_state.jpeg, + new_state->fw_based, new_state->jpeg); + + reg_data = RREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE) & + (~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK); + + if (new_state->jpeg == VCN_DPG_STATE__PAUSE) { + ret_code = 0; + + if (!(reg_data & UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK)) + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + if (!ret_code) { + /* Make sure JPRG Snoop is disabled before sending the pause */ + reg_data2 = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + reg_data2 |= UVD_POWER_STATUS__JRBC_SNOOP_DIS_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, reg_data2); + + /* pause DPG jpeg */ + reg_data |= UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_DPG_PAUSE, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, + UVD_DPG_PAUSE__JPEG_PAUSE_DPG_ACK_MASK, ret_code); + + /* Restore */ + ring = &adev->vcn.ring_jpeg; + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, ring->wptr); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); + + ring = &adev->vcn.ring_dec; + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + } + } else { + /* unpause dpg jpeg, no need to wait */ + reg_data &= ~UVD_DPG_PAUSE__JPEG_PAUSE_DPG_REQ_MASK; + WREG32_SOC15(UVD, 0, mmUVD_DPG_PAUSE, reg_data); + } + adev->vcn.pause_state.jpeg = new_state->jpeg; + } + + return 0; +} + static void amdgpu_vcn_idle_work_handler(struct work_struct *work) { struct amdgpu_device *adev = container_of(work, struct amdgpu_device, vcn.idle_work.work); - unsigned fences = amdgpu_fence_count_emitted(&adev->vcn.ring_dec); - unsigned i; + unsigned int fences = 0; + unsigned int i; for (i = 0; i < adev->vcn.num_enc_rings; ++i) { fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); } + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + amdgpu_vcn_pause_dpg_mode(adev, &new_state); + } + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg); + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_dec); if (fences == 0) { + amdgpu_gfx_off_ctrl(adev, true); if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, false); else @@ -233,12 +386,39 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring) bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work); if (set_clocks) { + amdgpu_gfx_off_ctrl(adev, false); if (adev->pm.dpm_enabled) amdgpu_dpm_enable_uvd(adev, true); else amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN, AMD_PG_STATE_UNGATE); } + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) { + struct dpg_pause_state new_state; + unsigned int fences = 0; + unsigned int i; + + for (i = 0; i < adev->vcn.num_enc_rings; ++i) { + fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]); + } + if (fences) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else + new_state.fw_based = VCN_DPG_STATE__UNPAUSE; + + if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg)) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + else + new_state.jpeg = VCN_DPG_STATE__UNPAUSE; + + if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) + new_state.fw_based = VCN_DPG_STATE__PAUSE; + else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG) + new_state.jpeg = VCN_DPG_STATE__PAUSE; + + amdgpu_vcn_pause_dpg_mode(adev, &new_state); + } } void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring) @@ -253,32 +433,25 @@ int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, - PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0)); + PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -400,30 +573,20 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_vcn_dec_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } dma_fence_put(fence); - error: return r; } @@ -436,11 +599,9 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: vcn enc failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, VCN_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -450,14 +611,8 @@ int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -572,27 +727,19 @@ int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_vcn_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; @@ -605,35 +752,26 @@ int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring) unsigned i; int r; - WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD); + WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, - PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0)); + PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; return r; } @@ -654,7 +792,7 @@ static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle, ib = &job->ibs[0]; - ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0); + ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9), 0, 0, PACKETJ_TYPE0); ib->ptr[1] = 0xDEADBEEF; for (i = 2; i < 16; i += 2) { ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6); @@ -686,38 +824,30 @@ int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r = 0; r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; goto error; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto error; - } else + } else { r = 0; + } for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH)); + tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_SCRATCH9)); if (tmp == 0xDEADBEEF) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); - else { - DRM_ERROR("ib test failed (0x%08X)\n", tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; dma_fence_put(fence); - error: return r; } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h index 0b0b8638d73f..a0ad19af9080 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h @@ -24,9 +24,9 @@ #ifndef __AMDGPU_VCN_H__ #define __AMDGPU_VCN_H__ -#define AMDGPU_VCN_STACK_SIZE (200*1024) -#define AMDGPU_VCN_HEAP_SIZE (256*1024) -#define AMDGPU_VCN_SESSION_SIZE (50*1024) +#define AMDGPU_VCN_STACK_SIZE (128*1024) +#define AMDGPU_VCN_CONTEXT_SIZE (512*1024) + #define AMDGPU_VCN_FIRMWARE_OFFSET 256 #define AMDGPU_VCN_MAX_ENC_RINGS 3 @@ -56,6 +56,16 @@ enum engine_status_constants { UVD_STATUS__RBC_BUSY = 0x1, }; +enum internal_dpg_state { + VCN_DPG_STATE__UNPAUSE = 0, + VCN_DPG_STATE__PAUSE, +}; + +struct dpg_pause_state { + enum internal_dpg_state fw_based; + enum internal_dpg_state jpeg; +}; + struct amdgpu_vcn { struct amdgpu_bo *vcpu_bo; void *cpu_addr; @@ -69,6 +79,8 @@ struct amdgpu_vcn { struct amdgpu_ring ring_jpeg; struct amdgpu_irq_src irq; unsigned num_enc_rings; + enum amd_powergating_state cur_state; + struct dpg_pause_state pause_state; }; int amdgpu_vcn_sw_init(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c index 21adb1b6e5cb..462a04e0f5e6 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c @@ -22,21 +22,6 @@ */ #include "amdgpu.h" -#define MAX_KIQ_REG_WAIT 5000 /* in usecs, 5ms */ -#define MAX_KIQ_REG_BAILOUT_INTERVAL 5 /* in msecs, 5ms */ -#define MAX_KIQ_REG_TRY 20 - -uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev) -{ - uint64_t addr = adev->vm_manager.max_pfn << AMDGPU_GPU_PAGE_SHIFT; - - addr -= AMDGPU_VA_RESERVED_SIZE; - - if (addr >= AMDGPU_VA_HOLE_START) - addr |= AMDGPU_VA_HOLE_END; - - return addr; -} bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) { @@ -46,88 +31,6 @@ bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev) return RREG32_NO_KIQ(0xc040) == 0xffffffff; } -int amdgpu_allocate_static_csa(struct amdgpu_device *adev) -{ - int r; - void *ptr; - - r = amdgpu_bo_create_kernel(adev, AMDGPU_CSA_SIZE, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, &adev->virt.csa_obj, - &adev->virt.csa_vmid0_addr, &ptr); - if (r) - return r; - - memset(ptr, 0, AMDGPU_CSA_SIZE); - return 0; -} - -void amdgpu_free_static_csa(struct amdgpu_device *adev) { - amdgpu_bo_free_kernel(&adev->virt.csa_obj, - &adev->virt.csa_vmid0_addr, - NULL); -} - -/* - * amdgpu_map_static_csa should be called during amdgpu_vm_init - * it maps virtual address amdgpu_csa_vaddr() to this VM, and each command - * submission of GFX should use this virtual address within META_DATA init - * package to support SRIOV gfx preemption. - */ -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va) -{ - uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_VA_HOLE_MASK; - struct ww_acquire_ctx ticket; - struct list_head list; - struct amdgpu_bo_list_entry pd; - struct ttm_validate_buffer csa_tv; - int r; - - INIT_LIST_HEAD(&list); - INIT_LIST_HEAD(&csa_tv.head); - csa_tv.bo = &adev->virt.csa_obj->tbo; - csa_tv.shared = true; - - list_add(&csa_tv.head, &list); - amdgpu_vm_get_pd_bo(vm, &list, &pd); - - r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL); - if (r) { - DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r); - return r; - } - - *bo_va = amdgpu_vm_bo_add(adev, vm, adev->virt.csa_obj); - if (!*bo_va) { - ttm_eu_backoff_reservation(&ticket, &list); - DRM_ERROR("failed to create bo_va for static CSA\n"); - return -ENOMEM; - } - - r = amdgpu_vm_alloc_pts(adev, (*bo_va)->base.vm, csa_addr, - AMDGPU_CSA_SIZE); - if (r) { - DRM_ERROR("failed to allocate pts for static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - - r = amdgpu_vm_bo_map(adev, *bo_va, csa_addr, 0, AMDGPU_CSA_SIZE, - AMDGPU_PTE_READABLE | AMDGPU_PTE_WRITEABLE | - AMDGPU_PTE_EXECUTABLE); - - if (r) { - DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r); - amdgpu_vm_bo_rmv(adev, *bo_va); - ttm_eu_backoff_reservation(&ticket, &list); - return r; - } - - ttm_eu_backoff_reservation(&ticket, &list); - return 0; -} - void amdgpu_virt_init_setting(struct amdgpu_device *adev) { /* enable virtual display */ @@ -167,9 +70,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_read; - if (in_interrupt()) - might_sleep(); - + might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); @@ -215,9 +116,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v) if (r < 1 && (adev->in_gpu_reset || in_interrupt())) goto failed_kiq_write; - if (in_interrupt()) - might_sleep(); - + might_sleep(); while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); @@ -233,6 +132,46 @@ failed_kiq_write: pr_err("failed to write reg:%x\n", reg); } +void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t reg1, + uint32_t ref, uint32_t mask) +{ + struct amdgpu_kiq *kiq = &adev->gfx.kiq; + struct amdgpu_ring *ring = &kiq->ring; + signed long r, cnt = 0; + unsigned long flags; + uint32_t seq; + + spin_lock_irqsave(&kiq->ring_lock, flags); + amdgpu_ring_alloc(ring, 32); + amdgpu_ring_emit_reg_write_reg_wait(ring, reg0, reg1, + ref, mask); + amdgpu_fence_emit_polling(ring, &seq); + amdgpu_ring_commit(ring); + spin_unlock_irqrestore(&kiq->ring_lock, flags); + + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + + /* don't wait anymore for IRQ context */ + if (r < 1 && in_interrupt()) + goto failed_kiq; + + might_sleep(); + while (r < 1 && cnt++ < MAX_KIQ_REG_TRY) { + + msleep(MAX_KIQ_REG_BAILOUT_INTERVAL); + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT); + } + + if (cnt > MAX_KIQ_REG_TRY) + goto failed_kiq; + + return; + +failed_kiq: + pr_err("failed to write reg %x wait reg %x\n", reg0, reg1); +} + /** * amdgpu_virt_request_full_gpu() - request full gpu access * @amdgpu: amdgpu device. @@ -395,7 +334,7 @@ void amdgpu_virt_init_data_exchange(struct amdgpu_device *adev) if (adev->fw_vram_usage.va != NULL) { adev->virt.fw_reserve.p_pf2vf = - (struct amdgim_pf2vf_info_header *)( + (struct amd_sriov_msg_pf2vf_info_header *)( adev->fw_vram_usage.va + AMDGIM_DATAEXCHANGE_OFFSET); AMDGPU_FW_VRAM_PF2VF_READ(adev, header.size, &pf2vf_size); AMDGPU_FW_VRAM_PF2VF_READ(adev, checksum, &checksum); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h index 880ac113a3a9..722deefc0a7e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.h @@ -63,8 +63,8 @@ struct amdgpu_virt_ops { * Firmware Reserve Frame buffer */ struct amdgpu_virt_fw_reserve { - struct amdgim_pf2vf_info_header *p_pf2vf; - struct amdgim_vf2pf_info_header *p_vf2pf; + struct amd_sriov_msg_pf2vf_info_header *p_pf2vf; + struct amd_sriov_msg_vf2pf_info_header *p_vf2pf; unsigned int checksum_key; }; /* @@ -85,15 +85,17 @@ enum AMDGIM_FEATURE_FLAG { AMDGIM_FEATURE_GIM_FLR_VRAMLOST = 0x4, }; -struct amdgim_pf2vf_info_header { +struct amd_sriov_msg_pf2vf_info_header { /* the total structure size in byte. */ uint32_t size; /* version of this structure, written by the GIM */ uint32_t version; + /* reserved */ + uint32_t reserved[2]; } __aligned(4); struct amdgim_pf2vf_info_v1 { /* header contains size and version */ - struct amdgim_pf2vf_info_header header; + struct amd_sriov_msg_pf2vf_info_header header; /* max_width * max_height */ unsigned int uvd_enc_max_pixels_count; /* 16x16 pixels/sec, codec independent */ @@ -112,7 +114,7 @@ struct amdgim_pf2vf_info_v1 { struct amdgim_pf2vf_info_v2 { /* header contains size and version */ - struct amdgim_pf2vf_info_header header; + struct amd_sriov_msg_pf2vf_info_header header; /* use private key from mailbox 2 to create chueksum */ uint32_t checksum; /* The features flags of the GIM driver supports. */ @@ -137,20 +139,22 @@ struct amdgim_pf2vf_info_v2 { uint64_t vcefw_kboffset; /* VCE FW size in KB */ uint32_t vcefw_ksize; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amdgim_pf2vf_info_header)/sizeof(uint32_t)), 3)]; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 0, 0, (9 + sizeof(struct amd_sriov_msg_pf2vf_info_header)/sizeof(uint32_t)), 3)]; } __aligned(4); -struct amdgim_vf2pf_info_header { +struct amd_sriov_msg_vf2pf_info_header { /* the total structure size in byte. */ uint32_t size; /*version of this structure, written by the guest */ uint32_t version; + /* reserved */ + uint32_t reserved[2]; } __aligned(4); struct amdgim_vf2pf_info_v1 { /* header contains size and version */ - struct amdgim_vf2pf_info_header header; + struct amd_sriov_msg_vf2pf_info_header header; /* driver version */ char driver_version[64]; /* driver certification, 1=WHQL, 0=None */ @@ -180,7 +184,7 @@ struct amdgim_vf2pf_info_v1 { struct amdgim_vf2pf_info_v2 { /* header contains size and version */ - struct amdgim_vf2pf_info_header header; + struct amd_sriov_msg_vf2pf_info_header header; uint32_t checksum; /* driver version */ uint8_t driver_version[64]; @@ -206,7 +210,7 @@ struct amdgim_vf2pf_info_v2 { uint32_t uvd_enc_usage; /* guest uvd engine usage percentage. 0xffff means N/A. */ uint32_t uvd_enc_health; - uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amdgim_vf2pf_info_header)/sizeof(uint32_t)), 0)]; + uint32_t reserved[AMDGIM_GET_STRUCTURE_RESERVED_SIZE(256, 64, 0, (12 + sizeof(struct amd_sriov_msg_vf2pf_info_header)/sizeof(uint32_t)), 0)]; } __aligned(4); #define AMDGPU_FW_VRAM_VF2PF_VER 2 @@ -238,7 +242,6 @@ typedef struct amdgim_vf2pf_info_v2 amdgim_vf2pf_info ; struct amdgpu_virt { uint32_t caps; struct amdgpu_bo *csa_obj; - uint64_t csa_vmid0_addr; bool chained_ib_support; uint32_t reg_val_offs; struct amdgpu_irq_src ack_irq; @@ -251,8 +254,6 @@ struct amdgpu_virt { uint32_t gim_feature; }; -#define AMDGPU_CSA_SIZE (8 * 1024) - #define amdgpu_sriov_enabled(adev) \ ((adev)->virt.caps & AMDGPU_SRIOV_CAPS_ENABLE_IOV) @@ -277,17 +278,13 @@ static inline bool is_virtual_machine(void) #endif } -struct amdgpu_vm; - -uint64_t amdgpu_csa_vaddr(struct amdgpu_device *adev); bool amdgpu_virt_mmio_blocked(struct amdgpu_device *adev); -int amdgpu_allocate_static_csa(struct amdgpu_device *adev); -int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm, - struct amdgpu_bo_va **bo_va); -void amdgpu_free_static_csa(struct amdgpu_device *adev); void amdgpu_virt_init_setting(struct amdgpu_device *adev); uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg); void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v); +void amdgpu_virt_kiq_reg_write_reg_wait(struct amdgpu_device *adev, + uint32_t reg0, uint32_t rreg1, + uint32_t ref, uint32_t mask); int amdgpu_virt_request_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_release_full_gpu(struct amdgpu_device *adev, bool init); int amdgpu_virt_reset_gpu(struct amdgpu_device *adev); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index b17771dd5ce7..e73d152659a2 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -134,48 +134,6 @@ struct amdgpu_prt_cb { }; /** - * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm - * - * @base: base structure for tracking BO usage in a VM - * @vm: vm to which bo is to be added - * @bo: amdgpu buffer object - * - * Initialize a bo_va_base structure and add it to the appropriate lists - * - */ -static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, - struct amdgpu_vm *vm, - struct amdgpu_bo *bo) -{ - base->vm = vm; - base->bo = bo; - INIT_LIST_HEAD(&base->bo_list); - INIT_LIST_HEAD(&base->vm_status); - - if (!bo) - return; - list_add_tail(&base->bo_list, &bo->va); - - if (bo->tbo.type == ttm_bo_type_kernel) - list_move(&base->vm_status, &vm->relocated); - - if (bo->tbo.resv != vm->root.base.bo->tbo.resv) - return; - - if (bo->preferred_domains & - amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) - return; - - /* - * we checked all the prerequisites, but it looks like this per vm bo - * is currently evicted. add the bo to the evicted list to make sure it - * is validated on next vm use to avoid fault. - * */ - list_move_tail(&base->vm_status, &vm->evicted); - base->moved = true; -} - -/** * amdgpu_vm_level_shift - return the addr shift for each level * * @adev: amdgpu_device pointer @@ -223,7 +181,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) /* For the root directory */ - return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift; + return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift; else if (level != AMDGPU_VM_PTB) /* Everything in between */ return 512; @@ -233,6 +191,26 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev, } /** + * amdgpu_vm_entries_mask - the mask to get the entry number of a PD/PT + * + * @adev: amdgpu_device pointer + * @level: VMPT level + * + * Returns: + * The mask to extract the entry number of a PD/PT from an address. + */ +static uint32_t amdgpu_vm_entries_mask(struct amdgpu_device *adev, + unsigned int level) +{ + if (level <= adev->vm_manager.root_level) + return 0xffffffff; + else if (level != AMDGPU_VM_PTB) + return 0x1ff; + else + return AMDGPU_VM_PTE_COUNT(adev) - 1; +} + +/** * amdgpu_vm_bo_size - returns the size of the BOs in bytes * * @adev: amdgpu_device pointer @@ -247,6 +225,383 @@ static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level) } /** + * amdgpu_vm_bo_evicted - vm_bo is evicted + * + * @vm_bo: vm_bo which is evicted + * + * State for PDs/PTs and per VM BOs which are not at the location they should + * be. + */ +static void amdgpu_vm_bo_evicted(struct amdgpu_vm_bo_base *vm_bo) +{ + struct amdgpu_vm *vm = vm_bo->vm; + struct amdgpu_bo *bo = vm_bo->bo; + + vm_bo->moved = true; + if (bo->tbo.type == ttm_bo_type_kernel) + list_move(&vm_bo->vm_status, &vm->evicted); + else + list_move_tail(&vm_bo->vm_status, &vm->evicted); +} + +/** + * amdgpu_vm_bo_relocated - vm_bo is reloacted + * + * @vm_bo: vm_bo which is relocated + * + * State for PDs/PTs which needs to update their parent PD. + */ +static void amdgpu_vm_bo_relocated(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->relocated); +} + +/** + * amdgpu_vm_bo_moved - vm_bo is moved + * + * @vm_bo: vm_bo which is moved + * + * State for per VM BOs which are moved, but that change is not yet reflected + * in the page tables. + */ +static void amdgpu_vm_bo_moved(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->moved); +} + +/** + * amdgpu_vm_bo_idle - vm_bo is idle + * + * @vm_bo: vm_bo which is now idle + * + * State for PDs/PTs and per VM BOs which have gone through the state machine + * and are now idle. + */ +static void amdgpu_vm_bo_idle(struct amdgpu_vm_bo_base *vm_bo) +{ + list_move(&vm_bo->vm_status, &vm_bo->vm->idle); + vm_bo->moved = false; +} + +/** + * amdgpu_vm_bo_invalidated - vm_bo is invalidated + * + * @vm_bo: vm_bo which is now invalidated + * + * State for normal BOs which are invalidated and that change not yet reflected + * in the PTs. + */ +static void amdgpu_vm_bo_invalidated(struct amdgpu_vm_bo_base *vm_bo) +{ + spin_lock(&vm_bo->vm->invalidated_lock); + list_move(&vm_bo->vm_status, &vm_bo->vm->invalidated); + spin_unlock(&vm_bo->vm->invalidated_lock); +} + +/** + * amdgpu_vm_bo_done - vm_bo is done + * + * @vm_bo: vm_bo which is now done + * + * State for normal BOs which are invalidated and that change has been updated + * in the PTs. + */ +static void amdgpu_vm_bo_done(struct amdgpu_vm_bo_base *vm_bo) +{ + spin_lock(&vm_bo->vm->invalidated_lock); + list_del_init(&vm_bo->vm_status); + spin_unlock(&vm_bo->vm->invalidated_lock); +} + +/** + * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm + * + * @base: base structure for tracking BO usage in a VM + * @vm: vm to which bo is to be added + * @bo: amdgpu buffer object + * + * Initialize a bo_va_base structure and add it to the appropriate lists + * + */ +static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base, + struct amdgpu_vm *vm, + struct amdgpu_bo *bo) +{ + base->vm = vm; + base->bo = bo; + base->next = NULL; + INIT_LIST_HEAD(&base->vm_status); + + if (!bo) + return; + base->next = bo->vm_bo; + bo->vm_bo = base; + + if (bo->tbo.resv != vm->root.base.bo->tbo.resv) + return; + + vm->bulk_moveable = false; + if (bo->tbo.type == ttm_bo_type_kernel) + amdgpu_vm_bo_relocated(base); + else + amdgpu_vm_bo_idle(base); + + if (bo->preferred_domains & + amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type)) + return; + + /* + * we checked all the prerequisites, but it looks like this per vm bo + * is currently evicted. add the bo to the evicted list to make sure it + * is validated on next vm use to avoid fault. + * */ + amdgpu_vm_bo_evicted(base); +} + +/** + * amdgpu_vm_pt_parent - get the parent page directory + * + * @pt: child page table + * + * Helper to get the parent entry for the child page table. NULL if we are at + * the root page directory. + */ +static struct amdgpu_vm_pt *amdgpu_vm_pt_parent(struct amdgpu_vm_pt *pt) +{ + struct amdgpu_bo *parent = pt->base.bo->parent; + + if (!parent) + return NULL; + + return container_of(parent->vm_bo, struct amdgpu_vm_pt, base); +} + +/** + * amdgpu_vm_pt_cursor - state for for_each_amdgpu_vm_pt + */ +struct amdgpu_vm_pt_cursor { + uint64_t pfn; + struct amdgpu_vm_pt *parent; + struct amdgpu_vm_pt *entry; + unsigned level; +}; + +/** + * amdgpu_vm_pt_start - start PD/PT walk + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start address of the walk + * @cursor: state to initialize + * + * Initialize a amdgpu_vm_pt_cursor to start a walk. + */ +static void amdgpu_vm_pt_start(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + cursor->pfn = start; + cursor->parent = NULL; + cursor->entry = &vm->root; + cursor->level = adev->vm_manager.root_level; +} + +/** + * amdgpu_vm_pt_descendant - go to child node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the child node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_descendant(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned mask, shift, idx; + + if (!cursor->entry->entries) + return false; + + BUG_ON(!cursor->entry->base.bo); + mask = amdgpu_vm_entries_mask(adev, cursor->level); + shift = amdgpu_vm_level_shift(adev, cursor->level); + + ++cursor->level; + idx = (cursor->pfn >> shift) & mask; + cursor->parent = cursor->entry; + cursor->entry = &cursor->entry->entries[idx]; + return true; +} + +/** + * amdgpu_vm_pt_sibling - go to sibling node + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk to the sibling node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_sibling(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + unsigned shift, num_entries; + + /* Root doesn't have a sibling */ + if (!cursor->parent) + return false; + + /* Go to our parents and see if we got a sibling */ + shift = amdgpu_vm_level_shift(adev, cursor->level - 1); + num_entries = amdgpu_vm_num_entries(adev, cursor->level - 1); + + if (cursor->entry == &cursor->parent->entries[num_entries - 1]) + return false; + + cursor->pfn += 1ULL << shift; + cursor->pfn &= ~((1ULL << shift) - 1); + ++cursor->entry; + return true; +} + +/** + * amdgpu_vm_pt_ancestor - go to parent node + * + * @cursor: current state + * + * Walk to the parent node of the current node. + * Returns: + * True if the walk was possible, false otherwise. + */ +static bool amdgpu_vm_pt_ancestor(struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->parent) + return false; + + --cursor->level; + cursor->entry = cursor->parent; + cursor->parent = amdgpu_vm_pt_parent(cursor->parent); + return true; +} + +/** + * amdgpu_vm_pt_next - get next PD/PT in hieratchy + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next node. + */ +static void amdgpu_vm_pt_next(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + /* First try a newborn child */ + if (amdgpu_vm_pt_descendant(adev, cursor)) + return; + + /* If that didn't worked try to find a sibling */ + while (!amdgpu_vm_pt_sibling(adev, cursor)) { + /* No sibling, go to our parents and grandparents */ + if (!amdgpu_vm_pt_ancestor(cursor)) { + cursor->pfn = ~0ll; + return; + } + } +} + +/** + * amdgpu_vm_pt_first_leaf - get first leaf PD/PT + * + * @adev: amdgpu_device pointer + * @vm: amdgpu_vm structure + * @start: start addr of the walk + * @cursor: state to initialize + * + * Start a walk and go directly to the leaf node. + */ +static void amdgpu_vm_pt_first_leaf(struct amdgpu_device *adev, + struct amdgpu_vm *vm, uint64_t start, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_start(adev, vm, start, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * amdgpu_vm_pt_next_leaf - get next leaf PD/PT + * + * @adev: amdgpu_device pointer + * @cursor: current state + * + * Walk the PD/PT tree to the next leaf node. + */ +static void amdgpu_vm_pt_next_leaf(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_next(adev, cursor); + if (cursor->pfn != ~0ll) + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * for_each_amdgpu_vm_pt_leaf - walk over all leaf PDs/PTs in the hierarchy + */ +#define for_each_amdgpu_vm_pt_leaf(adev, vm, start, end, cursor) \ + for (amdgpu_vm_pt_first_leaf((adev), (vm), (start), &(cursor)); \ + (cursor).pfn <= end; amdgpu_vm_pt_next_leaf((adev), &(cursor))) + +/** + * amdgpu_vm_pt_first_dfs - start a deep first search + * + * @adev: amdgpu_device structure + * @vm: amdgpu_vm structure + * @cursor: state to initialize + * + * Starts a deep first traversal of the PD/PT tree. + */ +static void amdgpu_vm_pt_first_dfs(struct amdgpu_device *adev, + struct amdgpu_vm *vm, + struct amdgpu_vm_pt_cursor *cursor) +{ + amdgpu_vm_pt_start(adev, vm, 0, cursor); + while (amdgpu_vm_pt_descendant(adev, cursor)); +} + +/** + * amdgpu_vm_pt_next_dfs - get the next node for a deep first search + * + * @adev: amdgpu_device structure + * @cursor: current state + * + * Move the cursor to the next node in a deep first search. + */ +static void amdgpu_vm_pt_next_dfs(struct amdgpu_device *adev, + struct amdgpu_vm_pt_cursor *cursor) +{ + if (!cursor->entry) + return; + + if (!cursor->parent) + cursor->entry = NULL; + else if (amdgpu_vm_pt_sibling(adev, cursor)) + while (amdgpu_vm_pt_descendant(adev, cursor)); + else + amdgpu_vm_pt_ancestor(cursor); +} + +/** + * for_each_amdgpu_vm_pt_dfs_safe - safe deep first search of all PDs/PTs + */ +#define for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) \ + for (amdgpu_vm_pt_first_dfs((adev), (vm), &(cursor)), \ + (entry) = (cursor).entry, amdgpu_vm_pt_next_dfs((adev), &(cursor));\ + (entry); (entry) = (cursor).entry, \ + amdgpu_vm_pt_next_dfs((adev), &(cursor))) + +/** * amdgpu_vm_get_pd_bo - add the VM PD to a validation list * * @vm: vm providing the BOs @@ -260,15 +615,56 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry) { - entry->robj = vm->root.base.bo; entry->priority = 0; - entry->tv.bo = &entry->robj->tbo; - entry->tv.shared = true; + entry->tv.bo = &vm->root.base.bo->tbo; + /* One for the VM updates, one for TTM and one for the CS job */ + entry->tv.num_shared = 3; entry->user_pages = NULL; list_add(&entry->tv.head, validated); } /** + * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU + * + * @adev: amdgpu device pointer + * @vm: vm providing the BOs + * + * Move all BOs to the end of LRU and remember their positions to put them + * together. + */ +void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + struct ttm_bo_global *glob = adev->mman.bdev.glob; + struct amdgpu_vm_bo_base *bo_base; + + if (vm->bulk_moveable) { + spin_lock(&glob->lru_lock); + ttm_bo_bulk_move_lru_tail(&vm->lru_bulk_move); + spin_unlock(&glob->lru_lock); + return; + } + + memset(&vm->lru_bulk_move, 0, sizeof(vm->lru_bulk_move)); + + spin_lock(&glob->lru_lock); + list_for_each_entry(bo_base, &vm->idle, vm_status) { + struct amdgpu_bo *bo = bo_base->bo; + + if (!bo->parent) + continue; + + ttm_bo_move_to_lru_tail(&bo->tbo, &vm->lru_bulk_move); + if (bo->shadow) + ttm_bo_move_to_lru_tail(&bo->shadow->tbo, + &vm->lru_bulk_move); + } + spin_unlock(&glob->lru_lock); + + vm->bulk_moveable = true; +} + +/** * amdgpu_vm_validate_pt_bos - validate the page table BOs * * @adev: amdgpu device pointer @@ -285,47 +681,36 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm, int (*validate)(void *p, struct amdgpu_bo *bo), void *param) { - struct ttm_bo_global *glob = adev->mman.bdev.glob; struct amdgpu_vm_bo_base *bo_base, *tmp; int r = 0; + vm->bulk_moveable &= list_empty(&vm->evicted); + list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) { struct amdgpu_bo *bo = bo_base->bo; - if (bo->parent) { - r = validate(param, bo); - if (r) - break; - - spin_lock(&glob->lru_lock); - ttm_bo_move_to_lru_tail(&bo->tbo); - if (bo->shadow) - ttm_bo_move_to_lru_tail(&bo->shadow->tbo); - spin_unlock(&glob->lru_lock); - } + r = validate(param, bo); + if (r) + break; if (bo->tbo.type != ttm_bo_type_kernel) { - spin_lock(&vm->moved_lock); - list_move(&bo_base->vm_status, &vm->moved); - spin_unlock(&vm->moved_lock); + amdgpu_vm_bo_moved(bo_base); } else { - list_move(&bo_base->vm_status, &vm->relocated); + if (vm->use_cpu_for_update) + r = amdgpu_bo_kmap(bo, NULL); + else + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (r) + break; + if (bo->shadow) { + r = amdgpu_ttm_alloc_gart(&bo->shadow->tbo); + if (r) + break; + } + amdgpu_vm_bo_relocated(bo_base); } } - spin_lock(&glob->lru_lock); - list_for_each_entry(bo_base, &vm->idle, vm_status) { - struct amdgpu_bo *bo = bo_base->bo; - - if (!bo->parent) - continue; - - ttm_bo_move_to_lru_tail(&bo->tbo); - if (bo->shadow) - ttm_bo_move_to_lru_tail(&bo->shadow->tbo); - } - spin_unlock(&glob->lru_lock); - return r; } @@ -376,7 +761,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, if (level == adev->vm_manager.root_level) { ats_entries = amdgpu_vm_level_shift(adev, level); ats_entries += AMDGPU_GPU_PAGE_SHIFT; - ats_entries = AMDGPU_VA_HOLE_START >> ats_entries; + ats_entries = AMDGPU_GMC_HOLE_START >> ats_entries; ats_entries = min(ats_entries, entries); entries -= ats_entries; } else { @@ -389,14 +774,14 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev, ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); - r = reservation_object_reserve_shared(bo->tbo.resv); - if (r) - return r; - r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx); if (r) goto error; + r = amdgpu_ttm_alloc_gart(&bo->tbo); + if (r) + return r; + r = amdgpu_job_alloc_with_ib(adev, 64, &job); if (r) goto error; @@ -448,117 +833,33 @@ error: } /** - * amdgpu_vm_alloc_levels - allocate the PD/PT levels + * amdgpu_vm_bo_param - fill in parameters for PD/PT allocation * * @adev: amdgpu_device pointer - * @vm: requested vm - * @parent: parent PT - * @saddr: start of the address range - * @eaddr: end of the address range - * @level: VMPT level - * @ats: indicate ATS support from PTE - * - * Make sure the page directories and page tables are allocated - * - * Returns: - * 0 on success, errno otherwise. + * @vm: requesting vm + * @bp: resulting BO allocation parameters */ -static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - uint64_t saddr, uint64_t eaddr, - unsigned level, bool ats) +static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm, + int level, struct amdgpu_bo_param *bp) { - unsigned shift = amdgpu_vm_level_shift(adev, level); - unsigned pt_idx, from, to; - u64 flags; - int r; - - if (!parent->entries) { - unsigned num_entries = amdgpu_vm_num_entries(adev, level); - - parent->entries = kvmalloc_array(num_entries, - sizeof(struct amdgpu_vm_pt), - GFP_KERNEL | __GFP_ZERO); - if (!parent->entries) - return -ENOMEM; - memset(parent->entries, 0 , sizeof(struct amdgpu_vm_pt)); - } - - from = saddr >> shift; - to = eaddr >> shift; - if (from >= amdgpu_vm_num_entries(adev, level) || - to >= amdgpu_vm_num_entries(adev, level)) - return -EINVAL; - - ++level; - saddr = saddr & ((1 << shift) - 1); - eaddr = eaddr & ((1 << shift) - 1); - - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - if (vm->root.base.bo->shadow) - flags |= AMDGPU_GEM_CREATE_SHADOW; + memset(bp, 0, sizeof(*bp)); + + bp->size = amdgpu_vm_bo_size(adev, level); + bp->byte_align = AMDGPU_GPU_PAGE_SIZE; + bp->domain = AMDGPU_GEM_DOMAIN_VRAM; + if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 && + adev->flags & AMD_IS_APU) + bp->domain |= AMDGPU_GEM_DOMAIN_GTT; + bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain); + bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS | + AMDGPU_GEM_CREATE_CPU_GTT_USWC; if (vm->use_cpu_for_update) - flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else - flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; - - /* walk over the address space and allocate the page tables */ - for (pt_idx = from; pt_idx <= to; ++pt_idx) { - struct reservation_object *resv = vm->root.base.bo->tbo.resv; - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - struct amdgpu_bo *pt; - - if (!entry->base.bo) { - struct amdgpu_bo_param bp; - - memset(&bp, 0, sizeof(bp)); - bp.size = amdgpu_vm_bo_size(adev, level); - bp.byte_align = AMDGPU_GPU_PAGE_SIZE; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = flags; - bp.type = ttm_bo_type_kernel; - bp.resv = resv; - r = amdgpu_bo_create(adev, &bp, &pt); - if (r) - return r; - - r = amdgpu_vm_clear_bo(adev, vm, pt, level, ats); - if (r) { - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); - return r; - } - - if (vm->use_cpu_for_update) { - r = amdgpu_bo_kmap(pt, NULL); - if (r) { - amdgpu_bo_unref(&pt->shadow); - amdgpu_bo_unref(&pt); - return r; - } - } - - /* Keep a reference to the root directory to avoid - * freeing them up in the wrong order. - */ - pt->parent = amdgpu_bo_ref(parent->base.bo); - - amdgpu_vm_bo_base_init(&entry->base, vm, pt); - } - - if (level < AMDGPU_VM_PTB) { - uint64_t sub_saddr = (pt_idx == from) ? saddr : 0; - uint64_t sub_eaddr = (pt_idx == to) ? eaddr : - ((1 << shift) - 1); - r = amdgpu_vm_alloc_levels(adev, vm, entry, sub_saddr, - sub_eaddr, level, ats); - if (r) - return r; - } - } - - return 0; + bp->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; + else if (!vm->root.base.bo || vm->root.base.bo->shadow) + bp->flags |= AMDGPU_GEM_CREATE_SHADOW; + bp->type = ttm_bo_type_kernel; + if (vm->root.base.bo) + bp->resv = vm->root.base.bo->tbo.resv; } /** @@ -569,7 +870,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev, * @saddr: Start address which needs to be allocated * @size: Size from start address we need. * - * Make sure the page tables are allocated. + * Make sure the page directories and page tables are allocated * * Returns: * 0 on success, errno otherwise. @@ -578,8 +879,11 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, struct amdgpu_vm *vm, uint64_t saddr, uint64_t size) { - uint64_t eaddr; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_bo *pt; bool ats = false; + uint64_t eaddr; + int r; /* validate the parameters */ if (saddr & AMDGPU_GPU_PAGE_MASK || size & AMDGPU_GPU_PAGE_MASK) @@ -588,7 +892,7 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, eaddr = saddr + size - 1; if (vm->pte_support_ats) - ats = saddr < AMDGPU_VA_HOLE_START; + ats = saddr < AMDGPU_GMC_HOLE_START; saddr /= AMDGPU_GPU_PAGE_SIZE; eaddr /= AMDGPU_GPU_PAGE_SIZE; @@ -599,8 +903,84 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev, return -EINVAL; } - return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, - adev->vm_manager.root_level, ats); + for_each_amdgpu_vm_pt_leaf(adev, vm, saddr, eaddr, cursor) { + struct amdgpu_vm_pt *entry = cursor.entry; + struct amdgpu_bo_param bp; + + if (cursor.level < AMDGPU_VM_PTB) { + unsigned num_entries; + + num_entries = amdgpu_vm_num_entries(adev, cursor.level); + entry->entries = kvmalloc_array(num_entries, + sizeof(*entry->entries), + GFP_KERNEL | + __GFP_ZERO); + if (!entry->entries) + return -ENOMEM; + } + + + if (entry->base.bo) + continue; + + amdgpu_vm_bo_param(adev, vm, cursor.level, &bp); + + r = amdgpu_bo_create(adev, &bp, &pt); + if (r) + return r; + + r = amdgpu_vm_clear_bo(adev, vm, pt, cursor.level, ats); + if (r) + goto error_free_pt; + + if (vm->use_cpu_for_update) { + r = amdgpu_bo_kmap(pt, NULL); + if (r) + goto error_free_pt; + } + + /* Keep a reference to the root directory to avoid + * freeing them up in the wrong order. + */ + pt->parent = amdgpu_bo_ref(cursor.parent->base.bo); + + amdgpu_vm_bo_base_init(&entry->base, vm, pt); + } + + return 0; + +error_free_pt: + amdgpu_bo_unref(&pt->shadow); + amdgpu_bo_unref(&pt); + return r; +} + +/** + * amdgpu_vm_free_pts - free PD/PT levels + * + * @adev: amdgpu device structure + * @vm: amdgpu vm structure + * + * Free the page directory or page table level and all sub levels. + */ +static void amdgpu_vm_free_pts(struct amdgpu_device *adev, + struct amdgpu_vm *vm) +{ + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry; + + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) { + + if (entry->base.bo) { + entry->base.bo->vm_bo = NULL; + list_del(&entry->base.vm_status); + amdgpu_bo_unref(&entry->base.bo->shadow); + amdgpu_bo_unref(&entry->base.bo); + } + kvfree(entry->entries); + } + + BUG_ON(vm->root.base.bo); } /** @@ -714,7 +1094,8 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ } gds_switch_needed &= !!ring->funcs->emit_gds_switch; - vm_flush_needed &= !!ring->funcs->emit_vm_flush; + vm_flush_needed &= !!ring->funcs->emit_vm_flush && + job->vm_pd_addr != AMDGPU_BO_INVALID_OFFSET; pasid_mapping_needed &= adev->gmc.gmc_funcs->emit_pasid_mapping && ring->funcs->emit_wreg; @@ -799,12 +1180,13 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_ struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm, struct amdgpu_bo *bo) { - struct amdgpu_bo_va *bo_va; + struct amdgpu_vm_bo_base *base; - list_for_each_entry(bo_va, &bo->va, base.bo_list) { - if (bo_va->base.vm == vm) { - return bo_va; - } + for (base = bo->vm_bo; base; base = base->next) { + if (base->vm != vm) + continue; + + return container_of(base, struct amdgpu_bo_va, base); } return NULL; } @@ -957,6 +1339,22 @@ static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm, return r; } +/** + * amdgpu_vm_update_func - helper to call update function + * + * Calls the update function for both the given BO as well as its shadow. + */ +static void amdgpu_vm_update_func(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) +{ + if (bo->shadow) + params->func(params, bo->shadow, pe, addr, count, incr, flags); + params->func(params, bo, pe, addr, count, incr, flags); +} + /* * amdgpu_vm_update_pde - update a single level in the hierarchy * @@ -984,47 +1382,28 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params, pbo = pbo->parent; level += params->adev->vm_manager.root_level; - pt = amdgpu_bo_gpu_offset(entry->base.bo); - flags = AMDGPU_PTE_VALID; - amdgpu_gmc_get_vm_pde(params->adev, level, &pt, &flags); + amdgpu_gmc_get_pde_for_bo(entry->base.bo, level, &pt, &flags); pde = (entry - parent->entries) * 8; - if (bo->shadow) - params->func(params, bo->shadow, pde, pt, 1, 0, flags); - params->func(params, bo, pde, pt, 1, 0, flags); + amdgpu_vm_update_func(params, bo, pde, pt, 1, 0, flags); } /* - * amdgpu_vm_invalidate_level - mark all PD levels as invalid + * amdgpu_vm_invalidate_pds - mark all PDs as invalid * * @adev: amdgpu_device pointer * @vm: related vm - * @parent: parent PD - * @level: VMPT level * * Mark all PD level as invalid after an error. */ -static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev, - struct amdgpu_vm *vm, - struct amdgpu_vm_pt *parent, - unsigned level) +static void amdgpu_vm_invalidate_pds(struct amdgpu_device *adev, + struct amdgpu_vm *vm) { - unsigned pt_idx, num_entries; - - /* - * Recurse into the subdirectories. This recursion is harmless because - * we only have a maximum of 5 layers. - */ - num_entries = amdgpu_vm_num_entries(adev, level); - for (pt_idx = 0; pt_idx < num_entries; ++pt_idx) { - struct amdgpu_vm_pt *entry = &parent->entries[pt_idx]; - - if (!entry->base.bo) - continue; + struct amdgpu_vm_pt_cursor cursor; + struct amdgpu_vm_pt *entry; - if (!entry->base.moved) - list_move(&entry->base.vm_status, &vm->relocated); - amdgpu_vm_invalidate_level(adev, vm, entry, level + 1); - } + for_each_amdgpu_vm_pt_dfs_safe(adev, vm, cursor, entry) + if (entry->base.bo && !entry->base.moved) + amdgpu_vm_bo_relocated(&entry->base); } /* @@ -1054,14 +1433,6 @@ restart: params.adev = adev; if (vm->use_cpu_for_update) { - struct amdgpu_vm_bo_base *bo_base; - - list_for_each_entry(bo_base, &vm->relocated, vm_status) { - r = amdgpu_bo_kmap(bo_base->bo, NULL); - if (unlikely(r)) - return r; - } - r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM); if (unlikely(r)) return r; @@ -1078,25 +1449,16 @@ restart: } while (!list_empty(&vm->relocated)) { - struct amdgpu_vm_bo_base *bo_base, *parent; struct amdgpu_vm_pt *pt, *entry; - struct amdgpu_bo *bo; - bo_base = list_first_entry(&vm->relocated, - struct amdgpu_vm_bo_base, - vm_status); - bo_base->moved = false; - list_del_init(&bo_base->vm_status); + entry = list_first_entry(&vm->relocated, struct amdgpu_vm_pt, + base.vm_status); + amdgpu_vm_bo_idle(&entry->base); - bo = bo_base->bo->parent; - if (!bo) + pt = amdgpu_vm_pt_parent(entry); + if (!pt) continue; - parent = list_first_entry(&bo->va, struct amdgpu_vm_bo_base, - bo_list); - pt = container_of(parent, struct amdgpu_vm_pt, base); - entry = container_of(bo_base, struct amdgpu_vm_pt, base); - amdgpu_vm_update_pde(¶ms, vm, pt, entry); if (!vm->use_cpu_for_update && @@ -1138,85 +1500,90 @@ restart: return 0; error: - amdgpu_vm_invalidate_level(adev, vm, &vm->root, - adev->vm_manager.root_level); + amdgpu_vm_invalidate_pds(adev, vm); amdgpu_job_free(job); return r; } /** - * amdgpu_vm_find_entry - find the entry for an address + * amdgpu_vm_update_huge - figure out parameters for PTE updates * - * @p: see amdgpu_pte_update_params definition - * @addr: virtual address in question - * @entry: resulting entry or NULL - * @parent: parent entry - * - * Find the vm_pt entry and it's parent for the given address. + * Make sure to set the right flags for the PTEs at the desired level. */ -void amdgpu_vm_get_entry(struct amdgpu_pte_update_params *p, uint64_t addr, - struct amdgpu_vm_pt **entry, - struct amdgpu_vm_pt **parent) -{ - unsigned level = p->adev->vm_manager.root_level; - - *parent = NULL; - *entry = &p->vm->root; - while ((*entry)->entries) { - unsigned shift = amdgpu_vm_level_shift(p->adev, level++); +static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params, + struct amdgpu_bo *bo, unsigned level, + uint64_t pe, uint64_t addr, + unsigned count, uint32_t incr, + uint64_t flags) - *parent = *entry; - *entry = &(*entry)->entries[addr >> shift]; - addr &= (1ULL << shift) - 1; +{ + if (level != AMDGPU_VM_PTB) { + flags |= AMDGPU_PDE_PTE; + amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags); } - if (level != AMDGPU_VM_PTB) - *entry = NULL; + amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags); } /** - * amdgpu_vm_handle_huge_pages - handle updating the PD with huge pages + * amdgpu_vm_fragment - get fragment for PTEs * - * @p: see amdgpu_pte_update_params definition - * @entry: vm_pt entry to check - * @parent: parent entry - * @nptes: number of PTEs updated with this operation - * @dst: destination address where the PTEs should point to - * @flags: access flags fro the PTEs + * @params: see amdgpu_pte_update_params definition + * @start: first PTE to handle + * @end: last PTE to handle + * @flags: hw mapping flags + * @frag: resulting fragment size + * @frag_end: end of this fragment * - * Check if we can update the PD with a huge page. + * Returns the first possible fragment for the start and end address. */ -static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, - struct amdgpu_vm_pt *entry, - struct amdgpu_vm_pt *parent, - unsigned nptes, uint64_t dst, - uint64_t flags) +static void amdgpu_vm_fragment(struct amdgpu_pte_update_params *params, + uint64_t start, uint64_t end, uint64_t flags, + unsigned int *frag, uint64_t *frag_end) { - uint64_t pde; + /** + * The MC L1 TLB supports variable sized pages, based on a fragment + * field in the PTE. When this field is set to a non-zero value, page + * granularity is increased from 4KB to (1 << (12 + frag)). The PTE + * flags are considered valid for all PTEs within the fragment range + * and corresponding mappings are assumed to be physically contiguous. + * + * The L1 TLB can store a single PTE for the whole fragment, + * significantly increasing the space available for translation + * caching. This leads to large improvements in throughput when the + * TLB is under pressure. + * + * The L2 TLB distributes small and large fragments into two + * asymmetric partitions. The large fragment cache is significantly + * larger. Thus, we try to use large fragments wherever possible. + * Userspace can support this by aligning virtual base address and + * allocation size to the fragment size. + * + * Starting with Vega10 the fragment size only controls the L1. The L2 + * is now directly feed with small/huge/giant pages from the walker. + */ + unsigned max_frag; - /* In the case of a mixed PT the PDE must point to it*/ - if (p->adev->asic_type >= CHIP_VEGA10 && !p->src && - nptes == AMDGPU_VM_PTE_COUNT(p->adev)) { - /* Set the huge page flag to stop scanning at this PDE */ - flags |= AMDGPU_PDE_PTE; - } + if (params->adev->asic_type < CHIP_VEGA10) + max_frag = params->adev->vm_manager.fragment_size; + else + max_frag = 31; - if (!(flags & AMDGPU_PDE_PTE)) { - if (entry->huge) { - /* Add the entry to the relocated list to update it. */ - entry->huge = false; - list_move(&entry->base.vm_status, &p->vm->relocated); - } + /* system pages are non continuously */ + if (params->src) { + *frag = 0; + *frag_end = end; return; } - entry->huge = true; - amdgpu_gmc_get_vm_pde(p->adev, AMDGPU_VM_PDB0, &dst, &flags); - - pde = (entry - parent->entries) * 8; - if (parent->base.bo->shadow) - p->func(p, parent->base.bo->shadow, pde, dst, 1, 0, flags); - p->func(p, parent->base.bo, pde, dst, 1, 0, flags); + /* This intentionally wraps around if no bit is set */ + *frag = min((unsigned)ffs(start) - 1, (unsigned)fls64(end - start) - 1); + if (*frag >= max_frag) { + *frag = max_frag; + *frag_end = end & ~((1ULL << max_frag) - 1); + } else { + *frag_end = start + (1 << *frag); + } } /** @@ -1234,112 +1601,109 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p, * 0 for success, -EINVAL for failure. */ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) + uint64_t start, uint64_t end, + uint64_t dst, uint64_t flags) { struct amdgpu_device *adev = params->adev; - const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1; + struct amdgpu_vm_pt_cursor cursor; + uint64_t frag_start = start, frag_end; + unsigned int frag; - uint64_t addr, pe_start; - struct amdgpu_bo *pt; - unsigned nptes; + /* figure out the initial fragment */ + amdgpu_vm_fragment(params, frag_start, end, flags, &frag, &frag_end); - /* walk over the address space and update the page tables */ - for (addr = start; addr < end; addr += nptes, - dst += nptes * AMDGPU_GPU_PAGE_SIZE) { - struct amdgpu_vm_pt *entry, *parent; + /* walk over the address space and update the PTs */ + amdgpu_vm_pt_start(adev, params->vm, start, &cursor); + while (cursor.pfn < end) { + struct amdgpu_bo *pt = cursor.entry->base.bo; + unsigned shift, parent_shift, mask; + uint64_t incr, entry_end, pe_start; - amdgpu_vm_get_entry(params, addr, &entry, &parent); - if (!entry) + if (!pt) return -ENOENT; - if ((addr & ~mask) == (end & ~mask)) - nptes = end - addr; - else - nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask); - - amdgpu_vm_handle_huge_pages(params, entry, parent, - nptes, dst, flags); - /* We don't need to update PTEs for huge pages */ - if (entry->huge) + /* The root level can't be a huge page */ + if (cursor.level == adev->vm_manager.root_level) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; continue; + } - pt = entry->base.bo; - pe_start = (addr & mask) * 8; - if (pt->shadow) - params->func(params, pt->shadow, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - params->func(params, pt, pe_start, dst, nptes, - AMDGPU_GPU_PAGE_SIZE, flags); - } - - return 0; -} - -/* - * amdgpu_vm_frag_ptes - add fragment information to PTEs - * - * @params: see amdgpu_pte_update_params definition - * @vm: requested vm - * @start: first PTE to handle - * @end: last PTE to handle - * @dst: addr those PTEs should point to - * @flags: hw mapping flags - * - * Returns: - * 0 for success, -EINVAL for failure. - */ -static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params *params, - uint64_t start, uint64_t end, - uint64_t dst, uint64_t flags) -{ - /** - * The MC L1 TLB supports variable sized pages, based on a fragment - * field in the PTE. When this field is set to a non-zero value, page - * granularity is increased from 4KB to (1 << (12 + frag)). The PTE - * flags are considered valid for all PTEs within the fragment range - * and corresponding mappings are assumed to be physically contiguous. - * - * The L1 TLB can store a single PTE for the whole fragment, - * significantly increasing the space available for translation - * caching. This leads to large improvements in throughput when the - * TLB is under pressure. - * - * The L2 TLB distributes small and large fragments into two - * asymmetric partitions. The large fragment cache is significantly - * larger. Thus, we try to use large fragments wherever possible. - * Userspace can support this by aligning virtual base address and - * allocation size to the fragment size. - */ - unsigned max_frag = params->adev->vm_manager.fragment_size; - int r; + /* If it isn't already handled it can't be a huge page */ + if (cursor.entry->huge) { + /* Add the entry to the relocated list to update it. */ + cursor.entry->huge = false; + amdgpu_vm_bo_relocated(&cursor.entry->base); + } - /* system pages are non continuously */ - if (params->src || !(flags & AMDGPU_PTE_VALID)) - return amdgpu_vm_update_ptes(params, start, end, dst, flags); - - while (start != end) { - uint64_t frag_flags, frag_end; - unsigned frag; - - /* This intentionally wraps around if no bit is set */ - frag = min((unsigned)ffs(start) - 1, - (unsigned)fls64(end - start) - 1); - if (frag >= max_frag) { - frag_flags = AMDGPU_PTE_FRAG(max_frag); - frag_end = end & ~((1ULL << max_frag) - 1); - } else { - frag_flags = AMDGPU_PTE_FRAG(frag); - frag_end = start + (1 << frag); + shift = amdgpu_vm_level_shift(adev, cursor.level); + parent_shift = amdgpu_vm_level_shift(adev, cursor.level - 1); + if (adev->asic_type < CHIP_VEGA10) { + /* No huge page support before GMC v9 */ + if (cursor.level != AMDGPU_VM_PTB) { + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } + } else if (frag < shift) { + /* We can't use this level when the fragment size is + * smaller than the address shift. Go to the next + * child entry and try again. + */ + if (!amdgpu_vm_pt_descendant(adev, &cursor)) + return -ENOENT; + continue; + } else if (frag >= parent_shift && + cursor.level - 1 != adev->vm_manager.root_level) { + /* If the fragment size is even larger than the parent + * shift we should go up one level and check it again + * unless one level up is the root level. + */ + if (!amdgpu_vm_pt_ancestor(&cursor)) + return -ENOENT; + continue; } - r = amdgpu_vm_update_ptes(params, start, frag_end, dst, - flags | frag_flags); - if (r) - return r; + /* Looks good so far, calculate parameters for the update */ + incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift; + mask = amdgpu_vm_entries_mask(adev, cursor.level); + pe_start = ((cursor.pfn >> shift) & mask) * 8; + entry_end = (uint64_t)(mask + 1) << shift; + entry_end += cursor.pfn & ~(entry_end - 1); + entry_end = min(entry_end, end); + + do { + uint64_t upd_end = min(entry_end, frag_end); + unsigned nptes = (upd_end - frag_start) >> shift; + + amdgpu_vm_update_huge(params, pt, cursor.level, + pe_start, dst, nptes, incr, + flags | AMDGPU_PTE_FRAG(frag)); + + pe_start += nptes * 8; + dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift; + + frag_start = upd_end; + if (frag_start >= frag_end) { + /* figure out the next fragment */ + amdgpu_vm_fragment(params, frag_start, end, + flags, &frag, &frag_end); + if (frag < shift) + break; + } + } while (frag_start < entry_end); + + if (amdgpu_vm_pt_descendant(adev, &cursor)) { + /* Mark all child entries as huge */ + while (cursor.pfn < frag_start) { + cursor.entry->huge = true; + amdgpu_vm_pt_next(adev, &cursor); + } - dst += (frag_end - start) * AMDGPU_GPU_PAGE_SIZE; - start = frag_end; + } else if (frag >= shift) { + /* or just move on to the next on the same level. */ + amdgpu_vm_pt_next(adev, &cursor); + } } return 0; @@ -1401,8 +1765,8 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, params.func = amdgpu_vm_cpu_set_ptes; params.pages_addr = pages_addr; - return amdgpu_vm_frag_ptes(¶ms, start, last + 1, - addr, flags); + return amdgpu_vm_update_ptes(¶ms, start, last + 1, + addr, flags); } ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched); @@ -1477,11 +1841,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev, if (r) goto error_free; - r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv); - if (r) - goto error_free; - - r = amdgpu_vm_frag_ptes(¶ms, start, last + 1, addr, flags); + r = amdgpu_vm_update_ptes(¶ms, start, last + 1, addr, flags); if (r) goto error_free; @@ -1696,10 +2056,6 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, amdgpu_asic_flush_hdp(adev, NULL); } - spin_lock(&vm->moved_lock); - list_del_init(&bo_va->base.vm_status); - spin_unlock(&vm->moved_lock); - /* If the BO is not in its preferred location add it back to * the evicted list so that it gets validated again on the * next command submission. @@ -1708,9 +2064,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev, uint32_t mem_type = bo->tbo.mem.mem_type; if (!(bo->preferred_domains & amdgpu_mem_type_to_domain(mem_type))) - list_add_tail(&bo_va->base.vm_status, &vm->evicted); + amdgpu_vm_bo_evicted(&bo_va->base); else - list_add(&bo_va->base.vm_status, &vm->idle); + amdgpu_vm_bo_idle(&bo_va->base); + } else { + amdgpu_vm_bo_done(&bo_va->base); } list_splice_init(&bo_va->invalids, &bo_va->valids); @@ -1895,7 +2253,8 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev, struct amdgpu_bo_va_mapping, list); list_del(&mapping->list); - if (vm->pte_support_ats && mapping->start < AMDGPU_VA_HOLE_START) + if (vm->pte_support_ats && + mapping->start < AMDGPU_GMC_HOLE_START) init_pte_value = AMDGPU_PTE_DEFAULT_ATC; r = amdgpu_vm_bo_update_mapping(adev, NULL, NULL, vm, @@ -1936,40 +2295,40 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev, struct amdgpu_vm *vm) { struct amdgpu_bo_va *bo_va, *tmp; - struct list_head moved; + struct reservation_object *resv; bool clear; int r; - INIT_LIST_HEAD(&moved); - spin_lock(&vm->moved_lock); - list_splice_init(&vm->moved, &moved); - spin_unlock(&vm->moved_lock); + list_for_each_entry_safe(bo_va, tmp, &vm->moved, base.vm_status) { + /* Per VM BOs never need to bo cleared in the page tables */ + r = amdgpu_vm_bo_update(adev, bo_va, false); + if (r) + return r; + } - list_for_each_entry_safe(bo_va, tmp, &moved, base.vm_status) { - struct reservation_object *resv = bo_va->base.bo->tbo.resv; + spin_lock(&vm->invalidated_lock); + while (!list_empty(&vm->invalidated)) { + bo_va = list_first_entry(&vm->invalidated, struct amdgpu_bo_va, + base.vm_status); + resv = bo_va->base.bo->tbo.resv; + spin_unlock(&vm->invalidated_lock); - /* Per VM BOs never need to bo cleared in the page tables */ - if (resv == vm->root.base.bo->tbo.resv) - clear = false; /* Try to reserve the BO to avoid clearing its ptes */ - else if (!amdgpu_vm_debug && reservation_object_trylock(resv)) + if (!amdgpu_vm_debug && reservation_object_trylock(resv)) clear = false; /* Somebody else is using the BO right now */ else clear = true; r = amdgpu_vm_bo_update(adev, bo_va, clear); - if (r) { - spin_lock(&vm->moved_lock); - list_splice(&moved, &vm->moved); - spin_unlock(&vm->moved_lock); + if (r) return r; - } - if (!clear && resv != vm->root.base.bo->tbo.resv) + if (!clear) reservation_object_unlock(resv); - + spin_lock(&vm->invalidated_lock); } + spin_unlock(&vm->invalidated_lock); return 0; } @@ -2034,9 +2393,7 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev, if (bo && bo->tbo.resv == vm->root.base.bo->tbo.resv && !bo_va->base.moved) { - spin_lock(&vm->moved_lock); list_move(&bo_va->base.vm_status, &vm->moved); - spin_unlock(&vm->moved_lock); } trace_amdgpu_vm_bo_map(bo_va, mapping); } @@ -2388,13 +2745,27 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev, struct amdgpu_bo_va *bo_va) { struct amdgpu_bo_va_mapping *mapping, *next; + struct amdgpu_bo *bo = bo_va->base.bo; struct amdgpu_vm *vm = bo_va->base.vm; + struct amdgpu_vm_bo_base **base; + + if (bo) { + if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + vm->bulk_moveable = false; + + for (base = &bo_va->base.bo->vm_bo; *base; + base = &(*base)->next) { + if (*base != &bo_va->base) + continue; - list_del(&bo_va->base.bo_list); + *base = bo_va->base.next; + break; + } + } - spin_lock(&vm->moved_lock); + spin_lock(&vm->invalidated_lock); list_del(&bo_va->base.vm_status); - spin_unlock(&vm->moved_lock); + spin_unlock(&vm->invalidated_lock); list_for_each_entry_safe(mapping, next, &bo_va->valids, list) { list_del(&mapping->list); @@ -2432,30 +2803,24 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev, if (bo->parent && bo->parent->shadow == bo) bo = bo->parent; - list_for_each_entry(bo_base, &bo->va, bo_list) { + for (bo_base = bo->vm_bo; bo_base; bo_base = bo_base->next) { struct amdgpu_vm *vm = bo_base->vm; - bool was_moved = bo_base->moved; - bo_base->moved = true; if (evicted && bo->tbo.resv == vm->root.base.bo->tbo.resv) { - if (bo->tbo.type == ttm_bo_type_kernel) - list_move(&bo_base->vm_status, &vm->evicted); - else - list_move_tail(&bo_base->vm_status, - &vm->evicted); + amdgpu_vm_bo_evicted(bo_base); continue; } - if (was_moved) + if (bo_base->moved) continue; + bo_base->moved = true; - if (bo->tbo.type == ttm_bo_type_kernel) { - list_move(&bo_base->vm_status, &vm->relocated); - } else { - spin_lock(&bo_base->vm->moved_lock); - list_move(&bo_base->vm_status, &vm->moved); - spin_unlock(&bo_base->vm->moved_lock); - } + if (bo->tbo.type == ttm_bo_type_kernel) + amdgpu_vm_bo_relocated(bo_base); + else if (bo->tbo.resv == vm->root.base.bo->tbo.resv) + amdgpu_vm_bo_moved(bo_base); + else + amdgpu_vm_bo_invalidated(bo_base); } } @@ -2574,6 +2939,22 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t min_vm_size, adev->vm_manager.fragment_size); } +static struct amdgpu_retryfault_hashtable *init_fault_hash(void) +{ + struct amdgpu_retryfault_hashtable *fault_hash; + + fault_hash = kmalloc(sizeof(*fault_hash), GFP_KERNEL); + if (!fault_hash) + return fault_hash; + + INIT_CHASH_TABLE(fault_hash->hash, + AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); + spin_lock_init(&fault_hash->lock); + fault_hash->count = 0; + + return fault_hash; +} + /** * amdgpu_vm_init - initialize a vm instance * @@ -2592,13 +2973,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, { struct amdgpu_bo_param bp; struct amdgpu_bo *root; - const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE, - AMDGPU_VM_PTE_COUNT(adev) * 8); - unsigned ring_instance; - struct amdgpu_ring *ring; - struct drm_sched_rq *rq; - unsigned long size; - uint64_t flags; int r, i; vm->va = RB_ROOT_CACHED; @@ -2606,18 +2980,15 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->reserved_vmid[i] = NULL; INIT_LIST_HEAD(&vm->evicted); INIT_LIST_HEAD(&vm->relocated); - spin_lock_init(&vm->moved_lock); INIT_LIST_HEAD(&vm->moved); INIT_LIST_HEAD(&vm->idle); + INIT_LIST_HEAD(&vm->invalidated); + spin_lock_init(&vm->invalidated_lock); INIT_LIST_HEAD(&vm->freed); /* create scheduler entity for page table updates */ - - ring_instance = atomic_inc_return(&adev->vm_manager.vm_pte_next_ring); - ring_instance %= adev->vm_manager.vm_pte_num_rings; - ring = adev->vm_manager.vm_pte_rings[ring_instance]; - rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL]; - r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL); + r = drm_sched_entity_init(&vm->entity, adev->vm_manager.vm_pte_rqs, + adev->vm_manager.vm_pte_num_rqs, NULL); if (r) return r; @@ -2639,20 +3010,9 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, "CPU update of VM recommended only for large BAR system\n"); vm->last_update = NULL; - flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS; - if (vm->use_cpu_for_update) - flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; - else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE) - flags |= AMDGPU_GEM_CREATE_SHADOW; - - size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level); - memset(&bp, 0, sizeof(bp)); - bp.size = size; - bp.byte_align = align; - bp.domain = AMDGPU_GEM_DOMAIN_VRAM; - bp.flags = flags; - bp.type = ttm_bo_type_kernel; - bp.resv = NULL; + amdgpu_vm_bo_param(adev, vm, adev->vm_manager.root_level, &bp); + if (vm_context == AMDGPU_VM_CONTEXT_COMPUTE) + bp.flags &= ~AMDGPU_GEM_CREATE_SHADOW; r = amdgpu_bo_create(adev, &bp, &root); if (r) goto error_free_sched_entity; @@ -2661,6 +3021,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) goto error_free_root; + r = reservation_object_reserve_shared(root->tbo.resv, 1); + if (r) + goto error_unreserve; + r = amdgpu_vm_clear_bo(adev, vm, root, adev->vm_manager.root_level, vm->pte_support_ats); @@ -2683,8 +3047,13 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, vm->pasid = pasid; } + vm->fault_hash = init_fault_hash(); + if (!vm->fault_hash) { + r = -ENOMEM; + goto error_free_root; + } + INIT_KFIFO(vm->faults); - vm->fault_credit = 16; return 0; @@ -2722,7 +3091,7 @@ error_free_sched_entity: * Returns: * 0 for success, -errno for errors. */ -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid) { bool pte_support_ats = (adev->asic_type == CHIP_RAVEN); int r; @@ -2734,7 +3103,20 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Sanity checks */ if (!RB_EMPTY_ROOT(&vm->va.rb_root) || vm->root.entries) { r = -EINVAL; - goto error; + goto unreserve_bo; + } + + if (pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid + 1, + GFP_ATOMIC); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + + if (r == -ENOSPC) + goto unreserve_bo; + r = 0; } /* Check if PD needs to be reinitialized and do it before @@ -2745,7 +3127,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) adev->vm_manager.root_level, pte_support_ats); if (r) - goto error; + goto free_idr; } /* Update VM state */ @@ -2764,45 +3146,52 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + /* Free the original amdgpu allocated pasid + * Will be replaced with kfd allocated pasid + */ + amdgpu_pasid_free(vm->pasid); vm->pasid = 0; } /* Free the shadow bo for compute VM */ amdgpu_bo_unref(&vm->root.base.bo->shadow); -error: + if (pasid) + vm->pasid = pasid; + + goto unreserve_bo; + +free_idr: + if (pasid) { + unsigned long flags; + + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); + } +unreserve_bo: amdgpu_bo_unreserve(vm->root.base.bo); return r; } /** - * amdgpu_vm_free_levels - free PD/PT levels - * - * @adev: amdgpu device structure - * @parent: PD/PT starting level to free - * @level: level of parent structure + * amdgpu_vm_release_compute - release a compute vm + * @adev: amdgpu_device pointer + * @vm: a vm turned into compute vm by calling amdgpu_vm_make_compute * - * Free the page directory or page table level and all sub levels. + * This is a correspondant of amdgpu_vm_make_compute. It decouples compute + * pasid from vm. Compute should stop use of vm after this call. */ -static void amdgpu_vm_free_levels(struct amdgpu_device *adev, - struct amdgpu_vm_pt *parent, - unsigned level) +void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm) { - unsigned i, num_entries = amdgpu_vm_num_entries(adev, level); + if (vm->pasid) { + unsigned long flags; - if (parent->base.bo) { - list_del(&parent->base.bo_list); - list_del(&parent->base.vm_status); - amdgpu_bo_unref(&parent->base.bo->shadow); - amdgpu_bo_unref(&parent->base.bo); + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags); + idr_remove(&adev->vm_manager.pasid_idr, vm->pasid); + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } - - if (parent->entries) - for (i = 0; i < num_entries; i++) - amdgpu_vm_free_levels(adev, &parent->entries[i], - level + 1); - - kvfree(parent->entries); + vm->pasid = 0; } /** @@ -2826,7 +3215,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) /* Clear pending page faults from IH when the VM is destroyed */ while (kfifo_get(&vm->faults, &fault)) - amdgpu_ih_clear_fault(adev, fault); + amdgpu_vm_clear_fault(vm->fault_hash, fault); if (vm->pasid) { unsigned long flags; @@ -2836,6 +3225,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags); } + kfree(vm->fault_hash); + vm->fault_hash = NULL; + drm_sched_entity_destroy(&vm->entity); if (!RB_EMPTY_ROOT(&vm->va.rb_root)) { @@ -2843,8 +3235,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va.rb_root, rb) { + /* Don't remove the mapping here, we don't want to trigger a + * rebalance and the tree is about to be destroyed anyway. + */ list_del(&mapping->list); - amdgpu_vm_it_remove(mapping, &vm->va); kfree(mapping); } list_for_each_entry_safe(mapping, tmp, &vm->freed, list) { @@ -2862,8 +3256,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) if (r) { dev_err(adev->dev, "Leaking page tables because BO reservation failed\n"); } else { - amdgpu_vm_free_levels(adev, &vm->root, - adev->vm_manager.root_level); + amdgpu_vm_free_pts(adev, vm); amdgpu_bo_unreserve(root); } amdgpu_bo_unref(&root); @@ -2873,42 +3266,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } /** - * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID - * - * @adev: amdgpu_device pointer - * @pasid: PASID do identify the VM - * - * This function is expected to be called in interrupt context. - * - * Returns: - * True if there was fault credit, false otherwise - */ -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid) -{ - struct amdgpu_vm *vm; - - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (!vm) { - /* VM not found, can't track fault credit */ - spin_unlock(&adev->vm_manager.pasid_lock); - return true; - } - - /* No lock needed. only accessed by IRQ handler */ - if (!vm->fault_credit) { - /* Too many faults in this VM */ - spin_unlock(&adev->vm_manager.pasid_lock); - return false; - } - - vm->fault_credit--; - spin_unlock(&adev->vm_manager.pasid_lock); - return true; -} - -/** * amdgpu_vm_manager_init - init the VM manager * * @adev: amdgpu_device pointer @@ -2926,7 +3283,6 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev) for (i = 0; i < AMDGPU_MAX_RINGS; ++i) adev->vm_manager.seqno[i] = 0; - atomic_set(&adev->vm_manager.vm_pte_next_ring, 0); spin_lock_init(&adev->vm_manager.prt_lock); atomic_set(&adev->vm_manager.num_prt_users, 0); @@ -3002,7 +3358,7 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp) /** * amdgpu_vm_get_task_info - Extracts task info for a PASID. * - * @dev: drm device pointer + * @adev: drm device pointer * @pasid: PASID identifier for VM * @task_info: task_info to fill. */ @@ -3037,3 +3393,78 @@ void amdgpu_vm_set_task_info(struct amdgpu_vm *vm) } } } + +/** + * amdgpu_vm_add_fault - Add a page fault record to fault hash table + * + * @fault_hash: fault hash table + * @key: 64-bit encoding of PASID and address + * + * This should be called when a retry page fault interrupt is + * received. If this is a new page fault, it will be added to a hash + * table. The return value indicates whether this is a new fault, or + * a fault that was already known and is already being handled. + * + * If there are too many pending page faults, this will fail. Retry + * interrupts should be ignored in this case until there is enough + * free space. + * + * Returns 0 if the fault was added, 1 if the fault was already known, + * -ENOSPC if there are too many pending faults. + */ +int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) +{ + unsigned long flags; + int r = -ENOSPC; + + if (WARN_ON_ONCE(!fault_hash)) + /* Should be allocated in amdgpu_vm_init + */ + return r; + + spin_lock_irqsave(&fault_hash->lock, flags); + + /* Only let the hash table fill up to 50% for best performance */ + if (fault_hash->count >= (1 << (AMDGPU_PAGEFAULT_HASH_BITS-1))) + goto unlock_out; + + r = chash_table_copy_in(&fault_hash->hash, key, NULL); + if (!r) + fault_hash->count++; + + /* chash_table_copy_in should never fail unless we're losing count */ + WARN_ON_ONCE(r < 0); + +unlock_out: + spin_unlock_irqrestore(&fault_hash->lock, flags); + return r; +} + +/** + * amdgpu_vm_clear_fault - Remove a page fault record + * + * @fault_hash: fault hash table + * @key: 64-bit encoding of PASID and address + * + * This should be called when a page fault has been handled. Any + * future interrupt with this key will be processed as a new + * page fault. + */ +void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key) +{ + unsigned long flags; + int r; + + if (!fault_hash) + return; + + spin_lock_irqsave(&fault_hash->lock, flags); + + r = chash_table_remove(&fault_hash->hash, key, NULL); + if (!WARN_ON_ONCE(r < 0)) { + fault_hash->count--; + WARN_ON_ONCE(fault_hash->count < 0); + } + + spin_unlock_irqrestore(&fault_hash->lock, flags); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h index 9fa9df0c5e7f..e8dcfd59fc93 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h @@ -29,6 +29,8 @@ #include <linux/rbtree.h> #include <drm/gpu_scheduler.h> #include <drm/drm_file.h> +#include <drm/ttm/ttm_bo_driver.h> +#include <linux/chash.h> #include "amdgpu_sync.h" #include "amdgpu_ring.h" @@ -48,9 +50,6 @@ struct amdgpu_bo_list_entry; /* number of entries in page table */ #define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size) -/* PTBs (Page Table Blocks) need to be aligned to 32K */ -#define AMDGPU_VM_PTB_ALIGN_SIZE 32768 - #define AMDGPU_PTE_VALID (1ULL << 0) #define AMDGPU_PTE_SYSTEM (1ULL << 1) #define AMDGPU_PTE_SNOOPED (1ULL << 2) @@ -103,19 +102,6 @@ struct amdgpu_bo_list_entry; /* hardcode that limit for now */ #define AMDGPU_VA_RESERVED_SIZE (1ULL << 20) -/* VA hole for 48bit addresses on Vega10 */ -#define AMDGPU_VA_HOLE_START 0x0000800000000000ULL -#define AMDGPU_VA_HOLE_END 0xffff800000000000ULL - -/* - * Hardware is programmed as if the hole doesn't exists with start and end - * address values. - * - * This mask is used to remove the upper 16bits of the VA and so come up with - * the linear addr value. - */ -#define AMDGPU_VA_HOLE_MASK 0x0000ffffffffffffULL - /* max vmids dedicated for process */ #define AMDGPU_VM_MAX_RESERVED_VMID 1 @@ -143,7 +129,7 @@ struct amdgpu_vm_bo_base { struct amdgpu_bo *bo; /* protected by bo being reserved */ - struct list_head bo_list; + struct amdgpu_vm_bo_base *next; /* protected by spinlock */ struct list_head vm_status; @@ -160,6 +146,27 @@ struct amdgpu_vm_pt { struct amdgpu_vm_pt *entries; }; +/* provided by hw blocks that can write ptes, e.g., sdma */ +struct amdgpu_vm_pte_funcs { + /* number of dw to reserve per operation */ + unsigned copy_pte_num_dw; + + /* copy pte entries from GART */ + void (*copy_pte)(struct amdgpu_ib *ib, + uint64_t pe, uint64_t src, + unsigned count); + + /* write pte one entry at a time with addr mapping */ + void (*write_pte)(struct amdgpu_ib *ib, uint64_t pe, + uint64_t value, unsigned count, + uint32_t incr); + /* for linear pte/pde updates without addr mapping */ + void (*set_pte_pde)(struct amdgpu_ib *ib, + uint64_t pe, + uint64_t addr, unsigned count, + uint32_t incr, uint64_t flags); +}; + #define AMDGPU_VM_FAULT(pasid, addr) (((u64)(pasid) << 48) | (addr)) #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48) #define AMDGPU_VM_FAULT_ADDR(fault) ((u64)(fault) & 0xfffffffff000ULL) @@ -172,6 +179,13 @@ struct amdgpu_task_info { pid_t tgid; }; +#define AMDGPU_PAGEFAULT_HASH_BITS 8 +struct amdgpu_retryfault_hashtable { + DECLARE_CHASH_TABLE(hash, AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); + spinlock_t lock; + int count; +}; + struct amdgpu_vm { /* tree of virtual addresses mapped */ struct rb_root_cached va; @@ -182,13 +196,16 @@ struct amdgpu_vm { /* PT BOs which relocated and their parent need an update */ struct list_head relocated; - /* BOs moved, but not yet updated in the PT */ + /* per VM BOs moved, but not yet updated in the PT */ struct list_head moved; - spinlock_t moved_lock; /* All BOs of this VM not currently in the state machine */ struct list_head idle; + /* regular invalidated BOs, but not yet updated in the PT */ + struct list_head invalidated; + spinlock_t invalidated_lock; + /* BO mappings freed, but not yet updated in the PT */ struct list_head freed; @@ -212,9 +229,6 @@ struct amdgpu_vm { /* Up to 128 pending retry page faults */ DECLARE_KFIFO(faults, u64, 128); - /* Limit non-retry fault storms */ - unsigned int fault_credit; - /* Points to the KFD process VM info */ struct amdkfd_process_info *process_info; @@ -226,6 +240,12 @@ struct amdgpu_vm { /* Some basic info about the task */ struct amdgpu_task_info task_info; + + /* Store positions of group of BOs */ + struct ttm_lru_bulk_move lru_bulk_move; + /* mark whether can do the bulk move */ + bool bulk_moveable; + struct amdgpu_retryfault_hashtable *fault_hash; }; struct amdgpu_vm_manager { @@ -244,10 +264,9 @@ struct amdgpu_vm_manager { /* vram base address for page table entry */ u64 vram_base_offset; /* vm pte handling */ - const struct amdgpu_vm_pte_funcs *vm_pte_funcs; - struct amdgpu_ring *vm_pte_rings[AMDGPU_MAX_RINGS]; - unsigned vm_pte_num_rings; - atomic_t vm_pte_next_ring; + const struct amdgpu_vm_pte_funcs *vm_pte_funcs; + struct drm_sched_rq *vm_pte_rqs[AMDGPU_MAX_RINGS]; + unsigned vm_pte_num_rqs; /* partial resident texture handling */ spinlock_t prt_lock; @@ -266,14 +285,17 @@ struct amdgpu_vm_manager { spinlock_t pasid_lock; }; +#define amdgpu_vm_copy_pte(adev, ib, pe, src, count) ((adev)->vm_manager.vm_pte_funcs->copy_pte((ib), (pe), (src), (count))) +#define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr))) +#define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags))) + void amdgpu_vm_manager_init(struct amdgpu_device *adev); void amdgpu_vm_manager_fini(struct amdgpu_device *adev); int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, int vm_context, unsigned int pasid); -int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); +int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid); +void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm); void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm); -bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev, - unsigned int pasid); void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm, struct list_head *validated, struct amdgpu_bo_list_entry *entry); @@ -330,8 +352,15 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring, void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev); void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid, - struct amdgpu_task_info *task_info); + struct amdgpu_task_info *task_info); void amdgpu_vm_set_task_info(struct amdgpu_vm *vm); +void amdgpu_vm_move_to_lru_tail(struct amdgpu_device *adev, + struct amdgpu_vm *vm); + +int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); + +void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key); + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c index 9cfa8a9ada92..3f9d5d00c9b3 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c @@ -125,6 +125,28 @@ u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo) } /** + * amdgpu_vram_mgr_virt_start - update virtual start address + * + * @mem: ttm_mem_reg to update + * @node: just allocated node + * + * Calculate a virtual BO start address to easily check if everything is CPU + * accessible. + */ +static void amdgpu_vram_mgr_virt_start(struct ttm_mem_reg *mem, + struct drm_mm_node *node) +{ + unsigned long start; + + start = node->start + node->size; + if (start > mem->num_pages) + start -= mem->num_pages; + else + start = 0; + mem->start = max(mem->start, start); +} + +/** * amdgpu_vram_mgr_new - allocate new ranges * * @man: TTM memory type manager @@ -176,10 +198,25 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, pages_left = mem->num_pages; spin_lock(&mgr->lock); - for (i = 0; i < num_nodes; ++i) { + for (i = 0; pages_left >= pages_per_node; ++i) { + unsigned long pages = rounddown_pow_of_two(pages_left); + + r = drm_mm_insert_node_in_range(mm, &nodes[i], pages, + pages_per_node, 0, + place->fpfn, lpfn, + mode); + if (unlikely(r)) + break; + + usage += nodes[i].size << PAGE_SHIFT; + vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); + amdgpu_vram_mgr_virt_start(mem, &nodes[i]); + pages_left -= pages; + } + + for (; pages_left; ++i) { unsigned long pages = min(pages_left, pages_per_node); uint32_t alignment = mem->page_alignment; - unsigned long start; if (pages == pages_per_node) alignment = pages_per_node; @@ -193,16 +230,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man, usage += nodes[i].size << PAGE_SHIFT; vis_usage += amdgpu_vram_mgr_vis_size(adev, &nodes[i]); - - /* Calculate a virtual BO start address to easily check if - * everything is CPU accessible. - */ - start = nodes[i].start + nodes[i].size; - if (start > mem->num_pages) - start -= mem->num_pages; - else - start = 0; - mem->start = max(mem->start, start); + amdgpu_vram_mgr_virt_start(mem, &nodes[i]); pages_left -= pages; } spin_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c new file mode 100644 index 000000000000..8a8bc60cb6b4 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c @@ -0,0 +1,168 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + * + */ +#include <linux/list.h> +#include "amdgpu.h" +#include "amdgpu_xgmi.h" + + +static DEFINE_MUTEX(xgmi_mutex); + +#define AMDGPU_MAX_XGMI_HIVE 8 +#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4 + +static struct amdgpu_hive_info xgmi_hives[AMDGPU_MAX_XGMI_HIVE]; +static unsigned hive_count = 0; + + +void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive) +{ + return &hive->device_list; +} + +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev) +{ + int i; + struct amdgpu_hive_info *tmp; + + if (!adev->gmc.xgmi.hive_id) + return NULL; + for (i = 0 ; i < hive_count; ++i) { + tmp = &xgmi_hives[i]; + if (tmp->hive_id == adev->gmc.xgmi.hive_id) + return tmp; + } + if (i >= AMDGPU_MAX_XGMI_HIVE) + return NULL; + + /* initialize new hive if not exist */ + tmp = &xgmi_hives[hive_count++]; + tmp->hive_id = adev->gmc.xgmi.hive_id; + INIT_LIST_HEAD(&tmp->device_list); + mutex_init(&tmp->hive_lock); + + return tmp; +} + +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev) +{ + int ret = -EINVAL; + + /* Each psp need to set the latest topology */ + ret = psp_xgmi_set_topology_info(&adev->psp, + hive->number_devices, + &hive->topology_info); + if (ret) + dev_err(adev->dev, + "XGMI: Set topology failure on device %llx, hive %llx, ret %d", + adev->gmc.xgmi.node_id, + adev->gmc.xgmi.hive_id, ret); + else + dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n", + adev->gmc.xgmi.physical_node_id, + adev->gmc.xgmi.hive_id); + + return ret; +} + +int amdgpu_xgmi_add_device(struct amdgpu_device *adev) +{ + struct psp_xgmi_topology_info *hive_topology; + struct amdgpu_hive_info *hive; + struct amdgpu_xgmi *entry; + struct amdgpu_device *tmp_adev = NULL; + + int count = 0, ret = -EINVAL; + + if (!adev->gmc.xgmi.supported) + return 0; + + ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get node id\n"); + return ret; + } + + ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id); + if (ret) { + dev_err(adev->dev, + "XGMI: Failed to get hive id\n"); + return ret; + } + + mutex_lock(&xgmi_mutex); + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + goto exit; + + hive_topology = &hive->topology_info; + + list_add_tail(&adev->gmc.xgmi.head, &hive->device_list); + list_for_each_entry(entry, &hive->device_list, head) + hive_topology->nodes[count++].node_id = entry->node_id; + hive->number_devices = count; + + /* Each psp need to get the latest topology */ + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = psp_xgmi_get_topology_info(&tmp_adev->psp, count, hive_topology); + if (ret) { + dev_err(tmp_adev->dev, + "XGMI: Get topology failure on device %llx, hive %llx, ret %d", + tmp_adev->gmc.xgmi.node_id, + tmp_adev->gmc.xgmi.hive_id, ret); + /* To do : continue with some node failed or disable the whole hive */ + break; + } + } + + list_for_each_entry(tmp_adev, &hive->device_list, gmc.xgmi.head) { + ret = amdgpu_xgmi_update_topology(hive, tmp_adev); + if (ret) + break; + } + +exit: + mutex_unlock(&xgmi_mutex); + return ret; +} + +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev) +{ + struct amdgpu_hive_info *hive; + + if (!adev->gmc.xgmi.supported) + return; + + mutex_lock(&xgmi_mutex); + + hive = amdgpu_get_xgmi_hive(adev); + if (!hive) + goto exit; + + if (!(hive->number_devices--)) + mutex_destroy(&hive->hive_lock); + +exit: + mutex_unlock(&xgmi_mutex); +} diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h new file mode 100644 index 000000000000..6151eb9c8ad3 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h @@ -0,0 +1,40 @@ +/* + * Copyright 2016 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef __AMDGPU_XGMI_H__ +#define __AMDGPU_XGMI_H__ + +#include "amdgpu_psp.h" + +struct amdgpu_hive_info { + uint64_t hive_id; + struct list_head device_list; + struct psp_xgmi_topology_info topology_info; + int number_devices; + struct mutex hive_lock; +}; + +struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev); +int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev); +int amdgpu_xgmi_add_device(struct amdgpu_device *adev); +void amdgpu_xgmi_remove_device(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c index d702fb8e3427..60e2447e12c5 100644 --- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c +++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c @@ -28,6 +28,7 @@ #include <drm/amdgpu_drm.h> #include "amdgpu.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "atom.h" #include "atombios_encoders.h" #include "atombios_dp.h" diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c index d2469453dca2..86e14c754dd4 100644 --- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c @@ -743,19 +743,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable) if (pi->caps_sq_ramping || pi->caps_db_ramping || pi->caps_td_ramping || pi->caps_tcp_ramping) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { ret = ci_program_pt_config_registers(adev, didt_config_ci); if (ret) { - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return ret; } } ci_do_enable_didt(adev, enable); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; @@ -6277,12 +6277,12 @@ static int ci_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c index 78ab939ae5d8..71c50d8900e3 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.c +++ b/drivers/gpu/drm/amd/amdgpu/cik.c @@ -1755,6 +1755,7 @@ static const struct amdgpu_asic_funcs cik_asic_funcs = .flush_hdp = &cik_flush_hdp, .invalidate_hdp = &cik_invalidate_hdp, .need_full_reset = &cik_need_full_reset, + .init_doorbell_index = &legacy_doorbell_index_init, }; static int cik_common_early_init(void *handle) @@ -2002,6 +2003,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); if (amdgpu_dpm == -1) amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); else @@ -2014,8 +2017,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v8_2_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; @@ -2023,6 +2024,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); if (amdgpu_dpm == -1) amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); else @@ -2035,8 +2038,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v8_5_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block); - amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; @@ -2044,6 +2045,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -2053,8 +2056,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v8_1_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v7_1_ip_block); - amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); + amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; @@ -2063,6 +2065,8 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &cik_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_0_ip_block); amdgpu_device_ip_block_add(adev, &cik_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); + amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &kv_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -2072,8 +2076,6 @@ int cik_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v8_3_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block); - amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v4_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v2_0_ip_block); break; diff --git a/drivers/gpu/drm/amd/amdgpu/cik.h b/drivers/gpu/drm/amd/amdgpu/cik.h index e49c6f15a0a0..54c625a2e570 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik.h +++ b/drivers/gpu/drm/amd/amdgpu/cik.h @@ -30,4 +30,5 @@ void cik_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int cik_set_ip_blocks(struct amdgpu_device *adev); +void legacy_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c index 44d10c2172f6..8a8b4967a101 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c @@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev) * [127:96] - reserved */ -/** - * cik_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cik_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - /** * cik_ih_decode_iv - decode an interrupt vector * @@ -276,7 +248,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -318,7 +290,7 @@ static int cik_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -332,7 +304,7 @@ static int cik_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -461,15 +433,13 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = { static const struct amdgpu_ih_funcs cik_ih_funcs = { .get_wptr = cik_ih_get_wptr, - .prescreen_iv = cik_ih_prescreen_iv, .decode_iv = cik_ih_decode_iv, .set_rptr = cik_ih_set_rptr }; static void cik_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &cik_ih_funcs; + adev->irq.ih_funcs = &cik_ih_funcs; } const struct amdgpu_ip_block_version cik_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index d0fa2aac2388..45795191de1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ @@ -316,8 +318,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -494,18 +496,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } cik_sdma_enable(adev, true); for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -618,21 +618,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); @@ -647,15 +643,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -678,20 +670,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); @@ -706,21 +694,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -822,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -970,19 +953,19 @@ static int cik_sdma_sw_init(void *handle) } /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 247, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 247, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1214,8 +1197,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } @@ -1370,10 +1356,8 @@ static const struct amdgpu_buffer_funcs cik_sdma_buffer_funcs = { static void cik_sdma_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &cik_sdma_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { @@ -1386,16 +1370,16 @@ static const struct amdgpu_vm_pte_funcs cik_sdma_vm_pte_funcs = { static void cik_sdma_set_vm_pte_funcs(struct amdgpu_device *adev) { + struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) - adev->vm_manager.vm_pte_rings[i] = - &adev->sdma.instance[i].ring; - - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &cik_sdma_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version cik_sdma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c index 960c29e17da6..9d3ea298e116 100644 --- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c @@ -208,34 +208,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev) } /** - * cz_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool cz_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * cz_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -255,7 +227,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -297,7 +269,7 @@ static int cz_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -311,7 +283,7 @@ static int cz_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -442,15 +414,13 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = { static const struct amdgpu_ih_funcs cz_ih_funcs = { .get_wptr = cz_ih_get_wptr, - .prescreen_iv = cz_ih_prescreen_iv, .decode_iv = cz_ih_decode_iv, .set_rptr = cz_ih_set_rptr }; static void cz_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &cz_ih_funcs; + adev->irq.ih_funcs = &cz_ih_funcs; } const struct amdgpu_ip_block_version cz_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c index 308f9f238bc1..4cfecdce29a3 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c @@ -31,6 +31,7 @@ #include "atombios_encoders.h" #include "amdgpu_pll.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "dce_v10_0.h" #include "dce/dce_10_0_d.h" @@ -1942,6 +1943,17 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc, /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); + fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2); + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2); +#ifdef __BIG_ENDIAN + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, + ENDIAN_8IN32); +#endif + break; default: DRM_ERROR("Unsupported screen format %s\n", drm_get_format_name(target_fb->format->format, &format_name)); @@ -2734,19 +2746,19 @@ static int dce_v10_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; @@ -3558,8 +3570,7 @@ static const struct amdgpu_display_funcs dce_v10_0_display_funcs = { static void dce_v10_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v10_0_display_funcs; + adev->mode_info.funcs = &dce_v10_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v10_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c index 76dfb76f7900..7c868916d90f 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c @@ -31,6 +31,7 @@ #include "atombios_encoders.h" #include "amdgpu_pll.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "dce_v11_0.h" #include "dce/dce_11_0_d.h" @@ -1984,6 +1985,17 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc, /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + fb_format = REG_SET_FIELD(0, GRPH_CONTROL, GRPH_DEPTH, 2); + fb_format = REG_SET_FIELD(fb_format, GRPH_CONTROL, GRPH_FORMAT, 0); + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_RED_CROSSBAR, 2); + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_BLUE_CROSSBAR, 2); +#ifdef __BIG_ENDIAN + fb_swap = REG_SET_FIELD(fb_swap, GRPH_SWAP_CNTL, GRPH_ENDIAN_SWAP, + ENDIAN_8IN32); +#endif + break; default: DRM_ERROR("Unsupported screen format %s\n", drm_get_format_name(target_fb->format->format, &format_name)); @@ -2855,19 +2867,19 @@ static int dce_v11_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = VISLANDS30_IV_SRCID_D1_GRPH_PFLIP; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_HOTPLUG_DETECT_A, &adev->hpd_irq); if (r) return r; @@ -3690,8 +3702,7 @@ static const struct amdgpu_display_funcs dce_v11_0_display_funcs = { static void dce_v11_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v11_0_display_funcs; + adev->mode_info.funcs = &dce_v11_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v11_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c index c9adc627305d..17eaaba36017 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c @@ -30,6 +30,7 @@ #include "atombios_encoders.h" #include "amdgpu_pll.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" @@ -1887,6 +1888,16 @@ static int dce_v6_0_crtc_do_set_base(struct drm_crtc *crtc, /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + fb_format = (GRPH_DEPTH(GRPH_DEPTH_32BPP) | + GRPH_FORMAT(GRPH_FORMAT_ARGB8888)); + fb_swap = (GRPH_RED_CROSSBAR(GRPH_RED_SEL_B) | + GRPH_BLUE_CROSSBAR(GRPH_BLUE_SEL_R)); +#ifdef __BIG_ENDIAN + fb_swap |= GRPH_ENDIAN_SWAP(GRPH_ENDIAN_8IN32); +#endif + break; default: DRM_ERROR("Unsupported screen format %s\n", drm_get_format_name(target_fb->format->format, &format_name)); @@ -2605,19 +2616,19 @@ static int dce_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3365,8 +3376,7 @@ static const struct amdgpu_display_funcs dce_v6_0_display_funcs = { static void dce_v6_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v6_0_display_funcs; + adev->mode_info.funcs = &dce_v6_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v6_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c index 50cd03beac7d..8c0576978d36 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c @@ -31,6 +31,7 @@ #include "atombios_encoders.h" #include "amdgpu_pll.h" #include "amdgpu_connectors.h" +#include "amdgpu_display.h" #include "dce_v8_0.h" #include "dce/dce_8_0_d.h" @@ -1864,6 +1865,16 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc, /* Greater 8 bpc fb needs to bypass hw-lut to retain precision */ bypass_lut = true; break; + case DRM_FORMAT_XBGR8888: + case DRM_FORMAT_ABGR8888: + fb_format = ((GRPH_DEPTH_32BPP << GRPH_CONTROL__GRPH_DEPTH__SHIFT) | + (GRPH_FORMAT_ARGB8888 << GRPH_CONTROL__GRPH_FORMAT__SHIFT)); + fb_swap = ((GRPH_RED_SEL_B << GRPH_SWAP_CNTL__GRPH_RED_CROSSBAR__SHIFT) | + (GRPH_BLUE_SEL_R << GRPH_SWAP_CNTL__GRPH_BLUE_CROSSBAR__SHIFT)); +#ifdef __BIG_ENDIAN + fb_swap |= (GRPH_ENDIAN_8IN32 << GRPH_SWAP_CNTL__GRPH_ENDIAN_SWAP__SHIFT); +#endif + break; default: DRM_ERROR("Unsupported screen format %s\n", drm_get_format_name(target_fb->format->format, &format_name)); @@ -2632,19 +2643,19 @@ static int dce_v8_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->mode_info.num_crtc; i++) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + 1, &adev->crtc_irq); if (r) return r; } for (i = 8; i < 20; i += 2) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i, &adev->pageflip_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i, &adev->pageflip_irq); if (r) return r; } /* HPD hotplug */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 42, &adev->hpd_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 42, &adev->hpd_irq); if (r) return r; @@ -3447,8 +3458,7 @@ static const struct amdgpu_display_funcs dce_v8_0_display_funcs = { static void dce_v8_0_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_v8_0_display_funcs; + adev->mode_info.funcs = &dce_v8_0_display_funcs; } static const struct amdgpu_irq_src_funcs dce_v8_0_crtc_irq_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c index 15257634a53a..fdace004544d 100644 --- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c +++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c @@ -372,7 +372,7 @@ static int dce_virtual_sw_init(void *handle) int r, i; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SMU_DISP_TIMER2_TRIGGER, &adev->crtc_irq); if (r) return r; @@ -649,8 +649,7 @@ static const struct amdgpu_display_funcs dce_virtual_display_funcs = { static void dce_virtual_set_display_funcs(struct amdgpu_device *adev) { - if (adev->mode_info.funcs == NULL) - adev->mode_info.funcs = &dce_virtual_display_funcs; + adev->mode_info.funcs = &dce_virtual_display_funcs; } static int dce_virtual_pageflip(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c index de184a886057..1dc3013ea1d5 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c @@ -1552,7 +1552,7 @@ static void gfx_v6_0_config_init(struct amdgpu_device *adev) adev->gfx.config.double_offchip_lds_buf = 0; } -static void gfx_v6_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v6_0_constants_init(struct amdgpu_device *adev) { u32 gb_addr_config = 0; u32 mc_shared_chmap, mc_arb_ramcfg; @@ -1775,18 +1775,15 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_CONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -1798,13 +1795,11 @@ static int gfx_v6_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -1845,9 +1840,11 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, } static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ @@ -1892,17 +1889,15 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_SET_CONFIG_REG, 1); ib.ptr[1] = ((scratch - PACKET3_SET_CONFIG_REG_START)); ib.ptr[2] = 0xDEADBEEF; @@ -1914,22 +1909,16 @@ static int gfx_v6_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -1950,9 +1939,9 @@ static void gfx_v6_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) CP_ME_CNTL__CE_HALT_MASK)); WREG32(mmSCRATCH_UMSK, 0); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; + adev->gfx.compute_ring[i].sched.ready = false; } udelay(50); } @@ -2124,12 +2113,9 @@ static int gfx_v6_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the rings */ gfx_v6_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } return 0; } @@ -2227,14 +2213,11 @@ static int gfx_v6_0_cp_compute_resume(struct amdgpu_device *adev) WREG32(mmCP_RB2_CNTL, tmp); WREG32(mmCP_RB2_BASE, ring->gpu_addr >> 8); - adev->gfx.compute_ring[0].ready = false; - adev->gfx.compute_ring[1].ready = false; for (i = 0; i < 2; i++) { - r = amdgpu_ring_test_ring(&adev->gfx.compute_ring[i]); + r = amdgpu_ring_test_helper(&adev->gfx.compute_ring[i]); if (r) return r; - adev->gfx.compute_ring[i].ready = true; } return 0; @@ -2368,18 +2351,11 @@ static void gfx_v6_0_ring_emit_wreg(struct amdgpu_ring *ring, amdgpu_ring_write(ring, val); } -static void gfx_v6_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); -} - static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; volatile u32 *dst_ptr; - u32 dws, i; + u32 dws; u64 reg_list_mc_addr; const struct cs_section_def *cs_data; int r; @@ -2394,26 +2370,10 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (src_ptr) { - /* save restore block */ - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_obj, - &adev->gfx.rlc.save_restore_gpu_addr, - (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC sr bo failed\n", - r); - gfx_v6_0_rlc_fini(adev); + /* init save restore block */ + r = amdgpu_gfx_rlc_init_sr(adev, dws); + if (r) return r; - } - - /* write the sr buffer */ - dst_ptr = adev->gfx.rlc.sr_ptr; - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) - dst_ptr[i] = cpu_to_le32(src_ptr[i]); - - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } if (cs_data) { @@ -2428,7 +2388,7 @@ static int gfx_v6_0_rlc_init(struct amdgpu_device *adev) (void **)&adev->gfx.rlc.cs_ptr); if (r) { dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v6_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); return r; } @@ -2549,8 +2509,8 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) if (!adev->gfx.rlc_fw) return -EINVAL; - gfx_v6_0_rlc_stop(adev); - gfx_v6_0_rlc_reset(adev); + adev->gfx.rlc.funcs->stop(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v6_0_init_pg(adev); gfx_v6_0_init_cg(adev); @@ -2578,7 +2538,7 @@ static int gfx_v6_0_rlc_resume(struct amdgpu_device *adev) WREG32(mmRLC_UCODE_ADDR, 0); gfx_v6_0_enable_lbpw(adev, gfx_v6_0_lbpw_supported(adev)); - gfx_v6_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -3075,6 +3035,14 @@ static const struct amdgpu_gfx_funcs gfx_v6_0_gfx_funcs = { .select_me_pipe_q = &gfx_v6_0_select_me_pipe_q }; +static const struct amdgpu_rlc_funcs gfx_v6_0_rlc_funcs = { + .init = gfx_v6_0_rlc_init, + .resume = gfx_v6_0_rlc_resume, + .stop = gfx_v6_0_rlc_stop, + .reset = gfx_v6_0_rlc_reset, + .start = gfx_v6_0_rlc_start +}; + static int gfx_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -3082,6 +3050,7 @@ static int gfx_v6_0_early_init(void *handle) adev->gfx.num_gfx_rings = GFX6_NUM_GFX_RINGS; adev->gfx.num_compute_rings = GFX6_NUM_COMPUTE_RINGS; adev->gfx.funcs = &gfx_v6_0_gfx_funcs; + adev->gfx.rlc.funcs = &gfx_v6_0_rlc_funcs; gfx_v6_0_set_ring_funcs(adev); gfx_v6_0_set_irq_funcs(adev); @@ -3094,15 +3063,15 @@ static int gfx_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i, r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -3114,7 +3083,7 @@ static int gfx_v6_0_sw_init(void *handle) return r; } - r = gfx_v6_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -3165,7 +3134,7 @@ static int gfx_v6_0_sw_fini(void *handle) for (i = 0; i < adev->gfx.num_compute_rings; i++) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); - gfx_v6_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); return 0; } @@ -3175,9 +3144,9 @@ static int gfx_v6_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v6_0_gpu_init(adev); + gfx_v6_0_constants_init(adev); - r = gfx_v6_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3195,7 +3164,7 @@ static int gfx_v6_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfx_v6_0_cp_enable(adev, false); - gfx_v6_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v6_0_fini_pg(adev); return 0; @@ -3393,12 +3362,31 @@ static int gfx_v6_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v6_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_ring *ring; + + switch (entry->ring_id) { + case 0: + ring = &adev->gfx.gfx_ring[0]; + break; + case 1: + case 2: + ring = &adev->gfx.compute_ring[entry->ring_id - 1]; + break; + default: + return; + } + drm_sched_fault(&ring->sched); +} + static int gfx_v6_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v6_0_fault(adev, entry); return 0; } @@ -3407,7 +3395,7 @@ static int gfx_v6_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v6_0_fault(adev, entry); return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c index 95452c5a9df6..3a9fb6018c16 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c @@ -882,7 +882,6 @@ static const u32 kalindi_rlc_save_restore_register_list[] = static u32 gfx_v7_0_get_csb_size(struct amdgpu_device *adev); static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev, volatile u32 *buffer); -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev); static void gfx_v7_0_init_pg(struct amdgpu_device *adev); static void gfx_v7_0_get_cu_info(struct amdgpu_device *adev); @@ -1886,14 +1885,14 @@ static void gfx_v7_0_config_init(struct amdgpu_device *adev) } /** - * gfx_v7_0_gpu_init - setup the 3D engine + * gfx_v7_0_constants_init - setup the 3D engine * * @adev: amdgpu_device pointer * - * Configures the 3D engine and tiling configuration - * registers so that the 3D engine is usable. + * init the gfx constants such as the 3D engine, tiling configuration + * registers, maximum number of quad pipes, render backends... */ -static void gfx_v7_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v7_0_constants_init(struct amdgpu_device *adev) { u32 sh_mem_cfg, sh_static_mem_cfg, sh_mem_base; u32 tmp; @@ -2064,17 +2063,14 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -2086,13 +2082,10 @@ static int gfx_v7_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -2233,9 +2226,11 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring, * on the gfx ring for execution by the GPU. */ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; /* insert SWITCH_BUFFER packet before first IB in the ring frame */ @@ -2262,9 +2257,11 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); @@ -2316,17 +2313,15 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: failed to get scratch reg (%ld).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_SET_UCONFIG_REG, 1); ib.ptr[1] = ((scratch - PACKET3_SET_UCONFIG_REG_START)); ib.ptr[2] = 0xDEADBEEF; @@ -2338,22 +2333,16 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -2403,7 +2392,7 @@ static void gfx_v7_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_ME_CNTL, (CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK)); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } udelay(50); } @@ -2613,12 +2602,9 @@ static int gfx_v7_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v7_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } return 0; } @@ -2675,7 +2661,7 @@ static void gfx_v7_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; + adev->gfx.compute_ring[i].sched.ready = false; } udelay(50); } @@ -2781,7 +2767,7 @@ static int gfx_v7_0_mec_init(struct amdgpu_device *adev) * GFX7_MEC_HPD_SIZE * 2; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -3106,10 +3092,7 @@ static int gfx_v7_0_cp_compute_resume(struct amdgpu_device *adev) for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + amdgpu_ring_test_helper(ring); } return 0; @@ -3268,18 +3251,10 @@ static void gfx_v7_0_ring_emit_wreg(struct amdgpu_ring *ring, * The RLC is a multi-purpose microengine that handles a * variety of functions. */ -static void gfx_v7_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.save_restore_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); -} - static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) { const u32 *src_ptr; - volatile u32 *dst_ptr; - u32 dws, i; + u32 dws; const struct cs_section_def *cs_data; int r; @@ -3306,66 +3281,23 @@ static int gfx_v7_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (src_ptr) { - /* save restore block */ - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.save_restore_obj, - &adev->gfx.rlc.save_restore_gpu_addr, - (void **)&adev->gfx.rlc.sr_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create, pin or map of RLC sr bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init save restore block */ + r = amdgpu_gfx_rlc_init_sr(adev, dws); + if (r) return r; - } - - /* write the sr buffer */ - dst_ptr = adev->gfx.rlc.sr_ptr; - for (i = 0; i < adev->gfx.rlc.reg_list_size; i++) - dst_ptr[i] = cpu_to_le32(src_ptr[i]); - amdgpu_bo_kunmap(adev->gfx.rlc.save_restore_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.save_restore_obj); } if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v7_0_get_csb_size(adev); - - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v7_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if (adev->gfx.rlc.cp_table_size) { - - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); - gfx_v7_0_rlc_fini(adev); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - gfx_v7_0_init_cp_pg_table(adev); - - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); - } return 0; @@ -3446,7 +3378,12 @@ static u32 gfx_v7_0_halt_rlc(struct amdgpu_device *adev) return orig; } -static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v7_0_is_rlc_enabled(struct amdgpu_device *adev) +{ + return true; +} + +static void gfx_v7_0_set_safe_mode(struct amdgpu_device *adev) { u32 tmp, i, mask; @@ -3468,7 +3405,7 @@ static void gfx_v7_0_enter_rlc_safe_mode(struct amdgpu_device *adev) } } -static void gfx_v7_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v7_0_unset_safe_mode(struct amdgpu_device *adev) { u32 tmp; @@ -3545,13 +3482,13 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) adev->gfx.rlc_feature_version = le32_to_cpu( hdr->ucode_feature_version); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ tmp = RREG32(mmRLC_CGCG_CGLS_CTRL) & 0xfffffffc; WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); - gfx_v7_0_rlc_reset(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v7_0_init_pg(adev); @@ -3582,7 +3519,7 @@ static int gfx_v7_0_rlc_resume(struct amdgpu_device *adev) if (adev->asic_type == CHIP_BONAIRE) WREG32(mmRLC_DRIVER_CPDMA_STATUS, 0); - gfx_v7_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -3784,72 +3721,12 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable) WREG32(mmRLC_PG_CNTL, data); } -static void gfx_v7_0_init_cp_pg_table(struct amdgpu_device *adev) +static int gfx_v7_0_cp_pg_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 4; - u32 bo_offset = 0; - u32 table_offset, table_size; - if (adev->asic_type == CHIP_KAVERI) - max_me = 5; - - if (adev->gfx.rlc.cp_table_ptr == NULL) - return; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } + return 5; + else + return 4; } static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev, @@ -4170,15 +4047,6 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - /* GDS Base */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | @@ -4212,6 +4080,18 @@ static void gfx_v7_0_ring_emit_gds_switch(struct amdgpu_ring *ring, amdgpu_ring_write(ring, (1 << (oa_size + oa_base)) - (1 << oa_base)); } +static void gfx_v7_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32(mmSQ_CMD, value); +} + static uint32_t wave_read_ind(struct amdgpu_device *adev, uint32_t simd, uint32_t wave, uint32_t address) { WREG32(mmSQ_IND_INDEX, @@ -4285,8 +4165,17 @@ static const struct amdgpu_gfx_funcs gfx_v7_0_gfx_funcs = { }; static const struct amdgpu_rlc_funcs gfx_v7_0_rlc_funcs = { - .enter_safe_mode = gfx_v7_0_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v7_0_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v7_0_is_rlc_enabled, + .set_safe_mode = gfx_v7_0_set_safe_mode, + .unset_safe_mode = gfx_v7_0_unset_safe_mode, + .init = gfx_v7_0_rlc_init, + .get_csb_size = gfx_v7_0_get_csb_size, + .get_csb_buffer = gfx_v7_0_get_csb_buffer, + .get_cp_table_num = gfx_v7_0_cp_pg_table_num, + .resume = gfx_v7_0_rlc_resume, + .stop = gfx_v7_0_rlc_stop, + .reset = gfx_v7_0_rlc_reset, + .start = gfx_v7_0_rlc_start }; static int gfx_v7_0_early_init(void *handle) @@ -4474,7 +4363,7 @@ static int gfx_v7_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); irq_type = AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP @@ -4513,18 +4402,18 @@ static int gfx_v7_0_sw_init(void *handle) adev->gfx.mec.num_queue_per_pipe = 8; /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 181, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 184, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 184, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 185, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 185, &adev->gfx.priv_inst_irq); if (r) return r; @@ -4537,7 +4426,7 @@ static int gfx_v7_0_sw_init(void *handle) return r; } - r = gfx_v7_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -4579,25 +4468,6 @@ static int gfx_v7_0_sw_init(void *handle) } } - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; gfx_v7_0_gpu_early_init(adev); @@ -4620,7 +4490,7 @@ static int gfx_v7_0_sw_fini(void *handle) amdgpu_ring_fini(&adev->gfx.compute_ring[i]); gfx_v7_0_cp_compute_fini(adev); - gfx_v7_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); gfx_v7_0_mec_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, @@ -4640,10 +4510,10 @@ static int gfx_v7_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - gfx_v7_0_gpu_init(adev); + gfx_v7_0_constants_init(adev); /* init rlc */ - r = gfx_v7_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -4661,7 +4531,7 @@ static int gfx_v7_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); gfx_v7_0_cp_enable(adev, false); - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v7_0_fini_pg(adev); return 0; @@ -4746,7 +4616,7 @@ static int gfx_v7_0_soft_reset(void *handle) gfx_v7_0_update_cg(adev, false); /* stop the rlc */ - gfx_v7_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* Disable GFX parsing/prefetching */ WREG32(mmCP_ME_CNTL, CP_ME_CNTL__ME_HALT_MASK | CP_ME_CNTL__PFP_HALT_MASK | CP_ME_CNTL__CE_HALT_MASK); @@ -4975,12 +4845,36 @@ static int gfx_v7_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v7_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + struct amdgpu_ring *ring; + u8 me_id, pipe_id; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if ((ring->me == me_id) && (ring->pipe == pipe_id)) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v7_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } @@ -4990,7 +4884,7 @@ static int gfx_v7_0_priv_inst_irq(struct amdgpu_device *adev, { DRM_ERROR("Illegal instruction in command stream\n"); // XXX soft reset the gfx block only - schedule_work(&adev->reset_work); + gfx_v7_0_fault(adev, entry); return 0; } @@ -5088,6 +4982,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = { .pad_ib = amdgpu_ring_generic_pad_ib, .emit_cntxcntl = gfx_v7_ring_emit_cntxcntl, .emit_wreg = gfx_v7_0_ring_emit_wreg, + .soft_recovery = gfx_v7_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = { diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c index 5a9534a82d40..381f593b0cda 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c @@ -44,7 +44,6 @@ #include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_enum.h" #include "gca/gfx_8_0_sh_mask.h" -#include "gca/gfx_8_0_enum.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" @@ -54,7 +53,7 @@ #include "ivsrcid/ivsrcid_vislands30.h" #define GFX8_NUM_GFX_RINGS 1 -#define GFX8_MEC_HPD_SIZE 2048 +#define GFX8_MEC_HPD_SIZE 4096 #define TOPAZ_GB_ADDR_CONFIG_GOLDEN 0x22010001 #define CARRIZO_GB_ADDR_CONFIG_GOLDEN 0x22010001 @@ -839,18 +838,14 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -862,14 +857,11 @@ static int gfx_v8_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -886,19 +878,16 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 16, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; ib.ptr[2] = lower_32_bits(gpu_addr); @@ -912,22 +901,17 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); r = -ETIMEDOUT; goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err2; } tmp = adev->wb.wb[index]; - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("ib test on ring %d failed\n", ring->idx); + else r = -EINVAL; - } err2: amdgpu_ib_free(adev, &ib, NULL); @@ -1114,14 +1098,14 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); if (adev->asic_type >= CHIP_POLARIS10 && adev->asic_type <= CHIP_POLARIS12) { @@ -1173,64 +1157,61 @@ static int gfx_v8_0_init_microcode(struct amdgpu_device *adev) } } - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; - info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; - info->fw = adev->gfx.pfp_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; - info->ucode_id = AMDGPU_UCODE_ID_CP_ME; - info->fw = adev->gfx.me_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; - info->ucode_id = AMDGPU_UCODE_ID_CP_CE; - info->fw = adev->gfx.ce_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_PFP]; + info->ucode_id = AMDGPU_UCODE_ID_CP_PFP; + info->fw = adev->gfx.pfp_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_ME]; + info->ucode_id = AMDGPU_UCODE_ID_CP_ME; + info->fw = adev->gfx.me_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_CE]; + info->ucode_id = AMDGPU_UCODE_ID_CP_CE; + info->fw = adev->gfx.ce_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; + info->ucode_id = AMDGPU_UCODE_ID_RLC_G; + info->fw = adev->gfx.rlc_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; + info->fw = adev->gfx.mec_fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + + /* we need account JT in */ + cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_RLC_G]; - info->ucode_id = AMDGPU_UCODE_ID_RLC_G; - info->fw = adev->gfx.rlc_fw; - header = (const struct common_firmware_header *)info->fw->data; + if (amdgpu_sriov_vf(adev)) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; + info->ucode_id = AMDGPU_UCODE_ID_STORAGE; + info->fw = adev->gfx.mec_fw; adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); + } - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC1]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC1; - info->fw = adev->gfx.mec_fw; + if (adev->gfx.mec2_fw) { + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; + info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; + info->fw = adev->gfx.mec2_fw; header = (const struct common_firmware_header *)info->fw->data; adev->firmware.fw_size += ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - - /* we need account JT in */ - cp_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(cp_hdr->jt_size) << 2, PAGE_SIZE); - - if (amdgpu_sriov_vf(adev)) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_STORAGE]; - info->ucode_id = AMDGPU_UCODE_ID_STORAGE; - info->fw = adev->gfx.mec_fw; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(64 * PAGE_SIZE), PAGE_SIZE); - } - - if (adev->gfx.mec2_fw) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_CP_MEC2]; - info->ucode_id = AMDGPU_UCODE_ID_CP_MEC2; - info->fw = adev->gfx.mec2_fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } - } out: @@ -1301,81 +1282,16 @@ static void gfx_v8_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(0); } -static void cz_init_cp_jump_table(struct amdgpu_device *adev) +static int gfx_v8_0_cp_jump_table_num(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 4; - u32 bo_offset = 0; - u32 table_offset, table_size; - if (adev->asic_type == CHIP_CARRIZO) - max_me = 5; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 4) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } - - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } - - bo_offset += table_size; - } -} - -static void gfx_v8_0_rlc_fini(struct amdgpu_device *adev) -{ - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, NULL, NULL); - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, NULL, NULL); + return 5; + else + return 4; } static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; - u32 dws; const struct cs_section_def *cs_data; int r; @@ -1384,44 +1300,18 @@ static int gfx_v8_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v8_0_get_csb_size(adev); - - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC c bo failed\n", r); - gfx_v8_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v8_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if ((adev->asic_type == CHIP_CARRIZO) || (adev->asic_type == CHIP_STONEY)) { adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_warn(adev->dev, "(%d) create RLC cp table bo failed\n", r); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - cz_init_cp_jump_table(adev); - - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); } return 0; @@ -1446,7 +1336,7 @@ static int gfx_v8_0_mec_init(struct amdgpu_device *adev) mec_hpd_size = adev->gfx.num_compute_rings * GFX8_MEC_HPD_SIZE; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -1632,7 +1522,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev) return 0; /* bail if the compute ring is not ready */ - if (!ring->ready) + if (!ring->sched.ready) return 0; tmp = RREG32(mmGB_EDC_MODE); @@ -2000,7 +1890,7 @@ static int gfx_v8_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_MEC_RING0 + ring_id; + ring->doorbell_index = adev->doorbell_index.mec_ring0 + ring_id; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX8_MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); @@ -2048,36 +1938,31 @@ static int gfx_v8_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; - /* KIQ event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_INT_IB2, &adev->gfx.kiq.irq); - if (r) - return r; - /* EOP Event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_END_OF_PIPE, &adev->gfx.eop_irq); if (r) return r; /* Privileged reg */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_REG_FAULT, &adev->gfx.priv_reg_irq); if (r) return r; /* Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_PRIV_INSTR_FAULT, &adev->gfx.priv_inst_irq); if (r) return r; /* Add CP EDC/ECC irq */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_CP_ECC_ERROR, &adev->gfx.cp_ecc_error_irq); if (r) return r; /* SQ interrupts. */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SQ_INTERRUPT_MSG, &adev->gfx.sq_irq); if (r) { DRM_ERROR("amdgpu_irq_add() for SQ failed: %d\n", r); @@ -2096,7 +1981,7 @@ static int gfx_v8_0_sw_init(void *handle) return r; } - r = gfx_v8_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -2116,7 +2001,7 @@ static int gfx_v8_0_sw_init(void *handle) /* no gfx doorbells on iceland */ if (adev->asic_type != CHIP_TOPAZ) { ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL_GFX_RING0; + ring->doorbell_index = adev->doorbell_index.gfx_ring0; } r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, @@ -2161,25 +2046,6 @@ static int gfx_v8_0_sw_init(void *handle) if (r) return r; - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; r = gfx_v8_0_gpu_early_init(adev); @@ -2208,7 +2074,7 @@ static int gfx_v8_0_sw_fini(void *handle) amdgpu_gfx_kiq_fini(adev); gfx_v8_0_mec_fini(adev); - gfx_v8_0_rlc_fini(adev); + amdgpu_gfx_rlc_fini(adev); amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, &adev->gfx.rlc.clear_state_gpu_addr, (void **)&adev->gfx.rlc.cs_ptr); @@ -3854,7 +3720,7 @@ static void gfx_v8_0_config_init(struct amdgpu_device *adev) } } -static void gfx_v8_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v8_0_constants_init(struct amdgpu_device *adev) { u32 tmp, sh_static_mem_cfg; int i; @@ -4200,66 +4066,17 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev) udelay(50); } -static int gfx_v8_0_rlc_load_microcode(struct amdgpu_device *adev) -{ - const struct rlc_firmware_header_v2_0 *hdr; - const __le32 *fw_data; - unsigned i, fw_size; - - if (!adev->gfx.rlc_fw) - return -EINVAL; - - hdr = (const struct rlc_firmware_header_v2_0 *)adev->gfx.rlc_fw->data; - amdgpu_ucode_print_rlc_hdr(&hdr->header); - - fw_data = (const __le32 *)(adev->gfx.rlc_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - - WREG32(mmRLC_GPM_UCODE_ADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmRLC_GPM_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(mmRLC_GPM_UCODE_ADDR, adev->gfx.rlc_fw_version); - - return 0; -} - static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev) { - int r; - u32 tmp; - - gfx_v8_0_rlc_stop(adev); - - /* disable CG */ - tmp = RREG32(mmRLC_CGCG_CGLS_CTRL); - tmp &= ~(RLC_CGCG_CGLS_CTRL__CGCG_EN_MASK | - RLC_CGCG_CGLS_CTRL__CGLS_EN_MASK); - WREG32(mmRLC_CGCG_CGLS_CTRL, tmp); - if (adev->asic_type == CHIP_POLARIS11 || - adev->asic_type == CHIP_POLARIS10 || - adev->asic_type == CHIP_POLARIS12 || - adev->asic_type == CHIP_VEGAM) { - tmp = RREG32(mmRLC_CGCG_CGLS_CTRL_3D); - tmp &= ~0x3; - WREG32(mmRLC_CGCG_CGLS_CTRL_3D, tmp); + if (amdgpu_sriov_vf(adev)) { + gfx_v8_0_init_csb(adev); + return 0; } - /* disable PG */ - WREG32(mmRLC_PG_CNTL, 0); - - gfx_v8_0_rlc_reset(adev); + adev->gfx.rlc.funcs->stop(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v8_0_init_pg(adev); - - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - /* legacy rlc firmware loading */ - r = gfx_v8_0_rlc_load_microcode(adev); - if (r) - return r; - } - - gfx_v8_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -4278,69 +4095,12 @@ static void gfx_v8_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1); tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1); for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } WREG32(mmCP_ME_CNTL, tmp); udelay(50); } -static int gfx_v8_0_cp_gfx_load_microcode(struct amdgpu_device *adev) -{ - const struct gfx_firmware_header_v1_0 *pfp_hdr; - const struct gfx_firmware_header_v1_0 *ce_hdr; - const struct gfx_firmware_header_v1_0 *me_hdr; - const __le32 *fw_data; - unsigned i, fw_size; - - if (!adev->gfx.me_fw || !adev->gfx.pfp_fw || !adev->gfx.ce_fw) - return -EINVAL; - - pfp_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.pfp_fw->data; - ce_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.ce_fw->data; - me_hdr = (const struct gfx_firmware_header_v1_0 *) - adev->gfx.me_fw->data; - - amdgpu_ucode_print_gfx_hdr(&pfp_hdr->header); - amdgpu_ucode_print_gfx_hdr(&ce_hdr->header); - amdgpu_ucode_print_gfx_hdr(&me_hdr->header); - - gfx_v8_0_cp_gfx_enable(adev, false); - - /* PFP */ - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(pfp_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(pfp_hdr->header.ucode_size_bytes) / 4; - WREG32(mmCP_PFP_UCODE_ADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmCP_PFP_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(mmCP_PFP_UCODE_ADDR, adev->gfx.pfp_fw_version); - - /* CE */ - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(ce_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(ce_hdr->header.ucode_size_bytes) / 4; - WREG32(mmCP_CE_UCODE_ADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmCP_CE_UCODE_DATA, le32_to_cpup(fw_data++)); - WREG32(mmCP_CE_UCODE_ADDR, adev->gfx.ce_fw_version); - - /* ME */ - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(me_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(me_hdr->header.ucode_size_bytes) / 4; - WREG32(mmCP_ME_RAM_WADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmCP_ME_RAM_DATA, le32_to_cpup(fw_data++)); - WREG32(mmCP_ME_RAM_WADDR, adev->gfx.me_fw_version); - - return 0; -} - static u32 gfx_v8_0_get_csb_size(struct amdgpu_device *adev) { u32 count = 0; @@ -4460,7 +4220,7 @@ static void gfx_v8_0_set_cpg_door_bell(struct amdgpu_device *adev, struct amdgpu tmp = REG_SET_FIELD(0, CP_RB_DOORBELL_RANGE_LOWER, DOORBELL_RANGE_LOWER, - AMDGPU_DOORBELL_GFX_RING0); + adev->doorbell_index.gfx_ring0); WREG32(mmCP_RB_DOORBELL_RANGE_LOWER, tmp); WREG32(mmCP_RB_DOORBELL_RANGE_UPPER, @@ -4517,10 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ amdgpu_ring_clear_ring(ring); gfx_v8_0_cp_gfx_start(adev); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + ring->sched.ready = true; + r = amdgpu_ring_test_helper(ring); return r; } @@ -4534,58 +4292,12 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) } else { WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; - adev->gfx.kiq.ring.ready = false; + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; } udelay(50); } -static int gfx_v8_0_cp_compute_load_microcode(struct amdgpu_device *adev) -{ - const struct gfx_firmware_header_v1_0 *mec_hdr; - const __le32 *fw_data; - unsigned i, fw_size; - - if (!adev->gfx.mec_fw) - return -EINVAL; - - gfx_v8_0_cp_compute_enable(adev, false); - - mec_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - amdgpu_ucode_print_gfx_hdr(&mec_hdr->header); - - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(mec_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(mec_hdr->header.ucode_size_bytes) / 4; - - /* MEC1 */ - WREG32(mmCP_MEC_ME1_UCODE_ADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmCP_MEC_ME1_UCODE_DATA, le32_to_cpup(fw_data+i)); - WREG32(mmCP_MEC_ME1_UCODE_ADDR, adev->gfx.mec_fw_version); - - /* Loading MEC2 firmware is only necessary if MEC2 should run different microcode than MEC1. */ - if (adev->gfx.mec2_fw) { - const struct gfx_firmware_header_v1_0 *mec2_hdr; - - mec2_hdr = (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - amdgpu_ucode_print_gfx_hdr(&mec2_hdr->header); - - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(mec2_hdr->header.ucode_array_offset_bytes)); - fw_size = le32_to_cpu(mec2_hdr->header.ucode_size_bytes) / 4; - - WREG32(mmCP_MEC_ME2_UCODE_ADDR, 0); - for (i = 0; i < fw_size; i++) - WREG32(mmCP_MEC_ME2_UCODE_DATA, le32_to_cpup(fw_data+i)); - WREG32(mmCP_MEC_ME2_UCODE_ADDR, adev->gfx.mec2_fw_version); - } - - return 0; -} - /* KIQ functions */ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) { @@ -4604,7 +4316,6 @@ static void gfx_v8_0_kiq_setting(struct amdgpu_ring *ring) static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) { struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint32_t scratch, tmp = 0; uint64_t queue_mask = 0; int r, i; @@ -4623,17 +4334,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) queue_mask |= (1ull << i); } - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 11); + r = amdgpu_ring_alloc(kiq_ring, (8 * adev->gfx.num_compute_rings) + 8); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); return r; } /* set resources */ @@ -4665,25 +4368,10 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - amdgpu_gfx_scratch_free(adev, scratch); + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ enable failed\n"); return r; } @@ -4933,7 +4621,7 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) struct vi_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct vi_mqd_allocation)); ((struct vi_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct vi_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -4961,8 +4649,8 @@ static int gfx_v8_0_kcq_init_queue(struct amdgpu_ring *ring) static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) { if (adev->asic_type > CHIP_TONGA) { - WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, AMDGPU_DOORBELL_KIQ << 2); - WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, AMDGPU_DOORBELL_MEC_RING7 << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_LOWER, adev->doorbell_index.kiq << 2); + WREG32(mmCP_MEC_DOORBELL_RANGE_UPPER, adev->doorbell_index.mec_ring7 << 2); } /* enable doorbells */ WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1); @@ -4970,26 +4658,33 @@ static void gfx_v8_0_set_mec_doorbell_range(struct amdgpu_device *adev) static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; - - gfx_v8_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; + int r; ring = &adev->gfx.kiq.ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, &ring->mqd_ptr); - if (!r) { - r = gfx_v8_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } + if (unlikely(r != 0)) + return r; + + gfx_v8_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - goto done; + ring->sched.ready = true; + return 0; +} + +static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v8_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -5014,22 +4709,12 @@ static int gfx_v8_0_kiq_resume(struct amdgpu_device *adev) if (r) goto done; - /* Test KIQ */ - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - goto done; - } - - /* Test KCQs */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) { + /* Test KCQs - reversing the order of rings seems to fix ring test failure + * after GPU reset + */ + for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) { ring = &adev->gfx.compute_ring[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + r = amdgpu_ring_test_helper(ring); } done: @@ -5043,25 +4728,17 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev) if (!(adev->flags & AMD_IS_APU)) gfx_v8_0_enable_gui_idle_interrupt(adev, false); - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - /* legacy firmware loading */ - r = gfx_v8_0_cp_gfx_load_microcode(adev); - if (r) - return r; - - r = gfx_v8_0_cp_compute_load_microcode(adev); - if (r) - return r; - } + r = gfx_v8_0_kiq_resume(adev); + if (r) + return r; r = gfx_v8_0_cp_gfx_resume(adev); if (r) return r; - r = gfx_v8_0_kiq_resume(adev); + r = gfx_v8_0_kcq_resume(adev); if (r) return r; - gfx_v8_0_enable_gui_idle_interrupt(adev, true); return 0; @@ -5079,9 +4756,9 @@ static int gfx_v8_0_hw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; gfx_v8_0_init_golden_registers(adev); - gfx_v8_0_gpu_init(adev); + gfx_v8_0_constants_init(adev); - r = gfx_v8_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -5090,61 +4767,88 @@ static int gfx_v8_0_hw_init(void *handle) return r; } -static int gfx_v8_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) +static int gfx_v8_0_kcq_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = kiq_ring->adev; - uint32_t scratch, tmp = 0; int r, i; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, 10); - if (r) { + r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); + if (r) DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } - /* unmap queues */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); + amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + } + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ disable failed\n"); + + return r; +} + +static bool gfx_v8_0_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE) + || RREG32(mmGRBM_STATUS2) != 0x8) + return false; + else + return true; +} + +static bool gfx_v8_0_rlc_is_idle(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + if (RREG32(mmGRBM_STATUS2) != 0x8) + return false; + else + return true; +} + +static int gfx_v8_0_wait_for_rlc_idle(void *handle) +{ + unsigned int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); + if (gfx_v8_0_rlc_is_idle(handle)) + return 0; + + udelay(1); } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); - r = -EINVAL; + return -ETIMEDOUT; +} + +static int gfx_v8_0_wait_for_idle(void *handle) +{ + unsigned int i; + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + + for (i = 0; i < adev->usec_timeout; i++) { + if (gfx_v8_0_is_idle(handle)) + return 0; + + udelay(1); } - amdgpu_gfx_scratch_free(adev, scratch); - return r; + return -ETIMEDOUT; } static int gfx_v8_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); @@ -5154,62 +4858,33 @@ static int gfx_v8_0_hw_fini(void *handle) amdgpu_irq_put(adev, &adev->gfx.sq_irq, 0); /* disable KCQ to avoid CPC touch memory not valid anymore */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) - gfx_v8_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); + gfx_v8_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { pr_debug("For SRIOV client, shouldn't do anything.\n"); return 0; } - gfx_v8_0_cp_enable(adev, false); - gfx_v8_0_rlc_stop(adev); - - amdgpu_device_ip_set_powergating_state(adev, - AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_UNGATE); - + amdgpu_gfx_rlc_enter_safe_mode(adev); + if (!gfx_v8_0_wait_for_idle(adev)) + gfx_v8_0_cp_enable(adev, false); + else + pr_err("cp is busy, skip halt cp\n"); + if (!gfx_v8_0_wait_for_rlc_idle(adev)) + adev->gfx.rlc.funcs->stop(adev); + else + pr_err("rlc is busy, skip halt rlc\n"); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } static int gfx_v8_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - adev->gfx.in_suspend = true; - return gfx_v8_0_hw_fini(adev); + return gfx_v8_0_hw_fini(handle); } static int gfx_v8_0_resume(void *handle) { - int r; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - r = gfx_v8_0_hw_init(adev); - adev->gfx.in_suspend = false; - return r; -} - -static bool gfx_v8_0_is_idle(void *handle) -{ - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - if (REG_GET_FIELD(RREG32(mmGRBM_STATUS), GRBM_STATUS, GUI_ACTIVE)) - return false; - else - return true; -} - -static int gfx_v8_0_wait_for_idle(void *handle) -{ - unsigned i; - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - for (i = 0; i < adev->usec_timeout; i++) { - if (gfx_v8_0_is_idle(handle)) - return 0; - - udelay(1); - } - return -ETIMEDOUT; + return gfx_v8_0_hw_init(handle); } static bool gfx_v8_0_check_soft_reset(void *handle) @@ -5277,17 +4952,16 @@ static bool gfx_v8_0_check_soft_reset(void *handle) static int gfx_v8_0_pre_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; /* stop the rlc */ - gfx_v8_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) @@ -5381,18 +5055,13 @@ static int gfx_v8_0_soft_reset(void *handle) static int gfx_v8_0_post_soft_reset(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - u32 grbm_soft_reset = 0, srbm_soft_reset = 0; + u32 grbm_soft_reset = 0; if ((!adev->gfx.grbm_soft_reset) && (!adev->gfx.srbm_soft_reset)) return 0; grbm_soft_reset = adev->gfx.grbm_soft_reset; - srbm_soft_reset = adev->gfx.srbm_soft_reset; - - if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || - REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) - gfx_v8_0_cp_gfx_resume(adev); if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) || @@ -5410,8 +5079,14 @@ static int gfx_v8_0_post_soft_reset(void *handle) mutex_unlock(&adev->srbm_mutex); } gfx_v8_0_kiq_resume(adev); + gfx_v8_0_kcq_resume(adev); } - gfx_v8_0_rlc_start(adev); + + if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) || + REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX)) + gfx_v8_0_cp_gfx_resume(adev); + + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -5442,15 +5117,6 @@ static void gfx_v8_0_ring_emit_gds_switch(struct amdgpu_ring *ring, uint32_t gws_base, uint32_t gws_size, uint32_t oa_base, uint32_t oa_size) { - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - /* GDS Base */ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3)); amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) | @@ -5668,7 +5334,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); switch (adev->asic_type) { case CHIP_CARRIZO: case CHIP_STONEY: @@ -5722,7 +5388,7 @@ static int gfx_v8_0_set_powergating_state(void *handle, AMD_PG_SUPPORT_RLC_SMU_HS | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_GFX_DMG)) - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return 0; } @@ -5816,57 +5482,53 @@ static void gfx_v8_0_send_serdes_cmd(struct amdgpu_device *adev, #define RLC_GPR_REG2__MESSAGE__SHIFT 0x00000001 #define RLC_GPR_REG2__MESSAGE_MASK 0x0000001e -static void iceland_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v8_0_is_rlc_enabled(struct amdgpu_device *adev) { - u32 data; - unsigned i; + uint32_t rlc_setting; - data = RREG32(mmRLC_CNTL); - if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; + rlc_setting = RREG32(mmRLC_CNTL); + if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) + return false; - if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - data |= RLC_SAFE_MODE__CMD_MASK; - data &= ~RLC_SAFE_MODE__MESSAGE_MASK; - data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32(mmRLC_SAFE_MODE, data); + return true; +} - for (i = 0; i < adev->usec_timeout; i++) { - if ((RREG32(mmRLC_GPM_STAT) & - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == - (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | - RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) - break; - udelay(1); - } +static void gfx_v8_0_set_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + unsigned i; + data = RREG32(mmRLC_CNTL); + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32(mmRLC_SAFE_MODE, data); - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) - break; - udelay(1); - } - adev->gfx.rlc.in_safe_mode = true; + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if ((RREG32(mmRLC_GPM_STAT) & + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) == + (RLC_GPM_STAT__GFX_CLOCK_STATUS_MASK | + RLC_GPM_STAT__GFX_POWER_STATUS_MASK)) + break; + udelay(1); + } + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); } } -static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v8_0_unset_safe_mode(struct amdgpu_device *adev) { - u32 data = 0; + uint32_t data; unsigned i; data = RREG32(mmRLC_CNTL); - if (!(data & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; - - if (adev->cg_flags & (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - if (adev->gfx.rlc.in_safe_mode) { - data |= RLC_SAFE_MODE__CMD_MASK; - data &= ~RLC_SAFE_MODE__MESSAGE_MASK; - WREG32(mmRLC_SAFE_MODE, data); - adev->gfx.rlc.in_safe_mode = false; - } - } + data |= RLC_SAFE_MODE__CMD_MASK; + data &= ~RLC_SAFE_MODE__MESSAGE_MASK; + WREG32(mmRLC_SAFE_MODE, data); for (i = 0; i < adev->usec_timeout; i++) { if (!REG_GET_FIELD(RREG32(mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) @@ -5876,8 +5538,17 @@ static void iceland_exit_rlc_safe_mode(struct amdgpu_device *adev) } static const struct amdgpu_rlc_funcs iceland_rlc_funcs = { - .enter_safe_mode = iceland_enter_rlc_safe_mode, - .exit_safe_mode = iceland_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v8_0_is_rlc_enabled, + .set_safe_mode = gfx_v8_0_set_safe_mode, + .unset_safe_mode = gfx_v8_0_unset_safe_mode, + .init = gfx_v8_0_rlc_init, + .get_csb_size = gfx_v8_0_get_csb_size, + .get_csb_buffer = gfx_v8_0_get_csb_buffer, + .get_cp_table_num = gfx_v8_0_cp_jump_table_num, + .resume = gfx_v8_0_rlc_resume, + .stop = gfx_v8_0_rlc_stop, + .reset = gfx_v8_0_rlc_reset, + .start = gfx_v8_0_rlc_start }; static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev, @@ -5885,7 +5556,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev { uint32_t temp, data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); /* It is disabled by HW by default */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) { @@ -5981,7 +5652,7 @@ static void gfx_v8_0_update_medium_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -5991,7 +5662,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev temp = data = RREG32(mmRLC_CGCG_CGLS_CTRL); - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { temp1 = data1 = RREG32(mmRLC_CGTT_MGCG_OVERRIDE); @@ -6074,7 +5745,7 @@ static void gfx_v8_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev gfx_v8_0_wait_for_rlc_serdes(adev); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static int gfx_v8_0_update_gfx_clock_gating(struct amdgpu_device *adev, bool enable) @@ -6354,9 +6025,11 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring) } static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; if (ib->flags & AMDGPU_IB_FLAG_CE) @@ -6384,9 +6057,11 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); @@ -6727,6 +6402,18 @@ static void gfx_v8_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg, amdgpu_ring_write(ring, val); } +static void gfx_v8_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32(mmSQ_CMD, value); +} + static void gfx_v8_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -6949,12 +6636,39 @@ static int gfx_v8_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v8_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v8_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v8_0_fault(adev, entry); return 0; } @@ -6963,7 +6677,7 @@ static int gfx_v8_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v8_0_fault(adev, entry); return 0; } @@ -7075,52 +6789,6 @@ static int gfx_v8_0_sq_irq(struct amdgpu_device *adev, return 0; } -static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned int type, - enum amdgpu_interrupt_state state) -{ - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - switch (type) { - case AMDGPU_CP_KIQ_IRQ_DRIVER0: - WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); - if (ring->me == 1) - WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL, - ring->pipe, - GENERIC2_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); - else - WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL, - ring->pipe, - GENERIC2_INT_ENABLE, - state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1); - break; - default: - BUG(); /* kiq only support GENERIC2_INT now */ - break; - } - return 0; -} - -static int gfx_v8_0_kiq_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; - DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", - me_id, pipe_id, queue_id); - - amdgpu_fence_process(ring); - return 0; -} - static const struct amd_ip_funcs gfx_v8_0_ip_funcs = { .name = "gfx_v8_0", .early_init = gfx_v8_0_early_init, @@ -7184,6 +6852,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = { .init_cond_exec = gfx_v8_0_ring_emit_init_cond_exec, .patch_cond_exec = gfx_v8_0_ring_emit_patch_cond_exec, .emit_wreg = gfx_v8_0_ring_emit_wreg, + .soft_recovery = gfx_v8_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = { @@ -7232,10 +6901,8 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = { 17 + /* gfx_v8_0_ring_emit_vm_flush */ 7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v8_0_ring_emit_ib_compute */ - .emit_ib = gfx_v8_0_ring_emit_ib_compute, .emit_fence = gfx_v8_0_ring_emit_fence_kiq, .test_ring = gfx_v8_0_ring_test_ring, - .test_ib = gfx_v8_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v8_0_ring_emit_rreg, @@ -7270,11 +6937,6 @@ static const struct amdgpu_irq_src_funcs gfx_v8_0_priv_inst_irq_funcs = { .process = gfx_v8_0_priv_inst_irq, }; -static const struct amdgpu_irq_src_funcs gfx_v8_0_kiq_irq_funcs = { - .set = gfx_v8_0_kiq_set_interrupt_state, - .process = gfx_v8_0_kiq_irq, -}; - static const struct amdgpu_irq_src_funcs gfx_v8_0_cp_ecc_error_irq_funcs = { .set = gfx_v8_0_set_cp_ecc_int_state, .process = gfx_v8_0_cp_ecc_error_irq, @@ -7296,9 +6958,6 @@ static void gfx_v8_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v8_0_priv_inst_irq_funcs; - adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; - adev->gfx.kiq.irq.funcs = &gfx_v8_0_kiq_irq_funcs; - adev->gfx.cp_ecc_error_irq.num_types = 1; adev->gfx.cp_ecc_error_irq.funcs = &gfx_v8_0_cp_ecc_error_irq_funcs; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c index ef00d14f8645..7556716038d3 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c @@ -41,7 +41,7 @@ #include "ivsrcid/gfx/irqsrcs_gfx_9_0.h" #define GFX9_NUM_GFX_RINGS 1 -#define GFX9_MEC_HPD_SIZE 2048 +#define GFX9_MEC_HPD_SIZE 4096 #define RLCG_UCODE_LOADING_START_ADDRESS 0x00002000L #define RLC_SAVE_RESTORE_ADDR_STARTING_OFFSET 0x00000000L @@ -80,9 +80,25 @@ MODULE_FIRMWARE("amdgpu/raven_mec.bin"); MODULE_FIRMWARE("amdgpu/raven_mec2.bin"); MODULE_FIRMWARE("amdgpu/raven_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_ce.bin"); +MODULE_FIRMWARE("amdgpu/picasso_pfp.bin"); +MODULE_FIRMWARE("amdgpu/picasso_me.bin"); +MODULE_FIRMWARE("amdgpu/picasso_mec.bin"); +MODULE_FIRMWARE("amdgpu/picasso_mec2.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc.bin"); +MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin"); + +MODULE_FIRMWARE("amdgpu/raven2_ce.bin"); +MODULE_FIRMWARE("amdgpu/raven2_pfp.bin"); +MODULE_FIRMWARE("amdgpu/raven2_me.bin"); +MODULE_FIRMWARE("amdgpu/raven2_mec.bin"); +MODULE_FIRMWARE("amdgpu/raven2_mec2.bin"); +MODULE_FIRMWARE("amdgpu/raven2_rlc.bin"); + static const struct soc15_reg_golden golden_settings_gc_9_0[] = { - SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000420), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG2, 0xf00fffff, 0x00000400), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmDB_DEBUG3, 0x80000000, 0x80000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_GPU_ID, 0x0000000f, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_BINNER_EVENT_CNTL_3, 0x00000003, 0x82400024), SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE, 0x3fffffff, 0x00000001), @@ -119,7 +135,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] = @@ -159,7 +178,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00003120), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000000ff), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = @@ -173,6 +195,29 @@ static const struct soc15_reg_golden golden_settings_gc_9_1_rv1[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x00000800) }; +static const struct soc15_reg_golden golden_settings_gc_9_1_rv2[] = +{ + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_DCC_CONFIG, 0xff7fffff, 0x04000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL, 0xfffdf3cf, 0x00014104), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCB_HW_CONTROL_2, 0xff7fffff, 0x0a000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPC_UTCL1_CNTL, 0x7f0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPF_UTCL1_CNTL, 0xff8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCPG_UTCL1_CNTL, 0x7f8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG, 0xffff77ff, 0x26013041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmGB_ADDR_CONFIG_READ, 0xffff77ff, 0x26013041), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmIA_UTCL1_CNTL, 0x3f8fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmPA_SC_ENHANCE_1, 0xffffffff, 0x04040000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_0, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_1, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_GPM_UTCL1_CNTL_2, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_PREWALKER_UTCL1_CNTL, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmRLC_SPM_UTCL1_CNTL, 0xff0fffff, 0x08000080), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x00000010), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x3f8fffff, 0x08000080), +}; + static const struct soc15_reg_golden golden_settings_gc_9_x_common[] = { SOC15_REG_GOLDEN_VALUE(GC, 0, mmGRBM_CAM_INDEX, 0xffffffff, 0x00000000), @@ -210,7 +255,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_2_1_vg12[] = SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0xffff03ff, 0x01000107), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x00000000), SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0x76325410), - SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000) + SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x01bd9f33, 0x01000000), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800), + SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000) }; static const u32 GFX_RLC_SRM_INDEX_CNTL_ADDR_OFFSETS[] = @@ -240,6 +288,7 @@ static const u32 GFX_RLC_SRM_INDEX_CNTL_DATA_OFFSETS[] = #define VEGA10_GB_ADDR_CONFIG_GOLDEN 0x2a114042 #define VEGA12_GB_ADDR_CONFIG_GOLDEN 0x24104041 #define RAVEN_GB_ADDR_CONFIG_GOLDEN 0x24000042 +#define RAVEN2_GB_ADDR_CONFIG_GOLDEN 0x26013041 static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev); static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev); @@ -279,12 +328,16 @@ static void gfx_v9_0_init_golden_registers(struct amdgpu_device *adev) ARRAY_SIZE(golden_settings_gc_9_0_vg20)); break; case CHIP_RAVEN: - soc15_program_register_sequence(adev, - golden_settings_gc_9_1, - ARRAY_SIZE(golden_settings_gc_9_1)); - soc15_program_register_sequence(adev, - golden_settings_gc_9_1_rv1, - ARRAY_SIZE(golden_settings_gc_9_1_rv1)); + soc15_program_register_sequence(adev, golden_settings_gc_9_1, + ARRAY_SIZE(golden_settings_gc_9_1)); + if (adev->rev_id >= 8) + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rv2, + ARRAY_SIZE(golden_settings_gc_9_1_rv2)); + else + soc15_program_register_sequence(adev, + golden_settings_gc_9_1_rv1, + ARRAY_SIZE(golden_settings_gc_9_1_rv1)); break; default: break; @@ -344,18 +397,14 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("amdgpu: cp failed to get scratch reg (%d).\n", r); + if (r) return r; - } + WREG32(scratch, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } + if (r) + goto error_free_scratch; + amdgpu_ring_write(ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); amdgpu_ring_write(ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -367,14 +416,11 @@ static int gfx_v9_0_ring_test_ring(struct amdgpu_ring *ring) break; DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (scratch(0x%04X)=0x%08X)\n", - ring->idx, scratch, tmp); - r = -EINVAL; - } + + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + +error_free_scratch: amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -391,19 +437,16 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); adev->wb.wb[index] = cpu_to_le32(0xCAFEDEAD); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 16, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err1; - } + ib.ptr[0] = PACKET3(PACKET3_WRITE_DATA, 3); ib.ptr[1] = WRITE_DATA_DST_SEL(5) | WR_CONFIRM; ib.ptr[2] = lower_32_bits(gpu_addr); @@ -417,22 +460,17 @@ static int gfx_v9_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); - r = -ETIMEDOUT; - goto err2; + r = -ETIMEDOUT; + goto err2; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - goto err2; + goto err2; } tmp = adev->wb.wb[index]; - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); - r = 0; - } else { - DRM_ERROR("ib test on ring %d failed\n", ring->idx); - r = -EINVAL; - } + if (tmp == 0xDEADBEEF) + r = 0; + else + r = -EINVAL; err2: amdgpu_ib_free(adev, &ib, NULL); @@ -482,6 +520,61 @@ static void gfx_v9_0_init_rlc_ext_microcode(struct amdgpu_device *adev) le32_to_cpu(rlc_hdr->reg_list_format_direct_reg_list_length); } +static void gfx_v9_0_check_fw_write_wait(struct amdgpu_device *adev) +{ + adev->gfx.me_fw_write_wait = false; + adev->gfx.mec_fw_write_wait = false; + + switch (adev->asic_type) { + case CHIP_VEGA10: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x000000b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000193) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA12: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x000000b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000196) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_VEGA20: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 44) && + (adev->gfx.pfp_fw_version >= 0x000000b2) && + (adev->gfx.pfp_feature_version >= 44)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000197) && + (adev->gfx.mec_feature_version >= 44)) + adev->gfx.mec_fw_write_wait = true; + break; + case CHIP_RAVEN: + if ((adev->gfx.me_fw_version >= 0x0000009c) && + (adev->gfx.me_feature_version >= 42) && + (adev->gfx.pfp_fw_version >= 0x000000b1) && + (adev->gfx.pfp_feature_version >= 42)) + adev->gfx.me_fw_write_wait = true; + + if ((adev->gfx.mec_fw_version >= 0x00000192) && + (adev->gfx.mec_feature_version >= 42)) + adev->gfx.mec_fw_write_wait = true; + break; + default: + break; + } +} + static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) { const char *chip_name; @@ -509,7 +602,12 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) chip_name = "vega20"; break; case CHIP_RAVEN: - chip_name = "raven"; + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; break; default: BUG(); @@ -548,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version); adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version); - snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); + /* + * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin + * instead of picasso_rlc.bin. + * Judgment method: + * PCO AM4: revision >= 0xC8 && revision <= 0xCF + * or revision >= 0xD8 && revision <= 0xDF + * otherwise is PCO FP5 + */ + if (!strcmp(chip_name, "picasso") && + (((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) || + ((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF)))) + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name); + else + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name); err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev); if (err) goto out; @@ -590,14 +701,14 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_format_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_format_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_format_size_bytes >> 2); i++) adev->gfx.rlc.register_list_format[i] = le32_to_cpu(tmp[i]); adev->gfx.rlc.register_restore = adev->gfx.rlc.register_list_format + i; tmp = (unsigned int *)((uintptr_t)rlc_hdr + le32_to_cpu(rlc_hdr->reg_list_array_offset_bytes)); - for (i = 0 ; i < (rlc_hdr->reg_list_size_bytes >> 2); i++) + for (i = 0 ; i < (adev->gfx.rlc.reg_list_size_bytes >> 2); i++) adev->gfx.rlc.register_restore[i] = le32_to_cpu(tmp[i]); if (adev->gfx.rlc.is_rlc_v2_1) @@ -716,6 +827,7 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev) } out: + gfx_v9_0_check_fw_write_wait(adev); if (err) { dev_err(adev->dev, "gfx9: Failed to load firmware \"%s\"\n", @@ -805,6 +917,50 @@ static void gfx_v9_0_get_csb_buffer(struct amdgpu_device *adev, buffer[count++] = cpu_to_le32(0); } +static void gfx_v9_0_init_always_on_cu_mask(struct amdgpu_device *adev) +{ + struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info; + uint32_t pg_always_on_cu_num = 2; + uint32_t always_on_cu_num; + uint32_t i, j, k; + uint32_t mask, cu_bitmap, counter; + + if (adev->flags & AMD_IS_APU) + always_on_cu_num = 4; + else if (adev->asic_type == CHIP_VEGA12) + always_on_cu_num = 8; + else + always_on_cu_num = 12; + + mutex_lock(&adev->grbm_idx_mutex); + for (i = 0; i < adev->gfx.config.max_shader_engines; i++) { + for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) { + mask = 1; + cu_bitmap = 0; + counter = 0; + gfx_v9_0_select_se_sh(adev, i, j, 0xffffffff); + + for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) { + if (cu_info->bitmap[i][j] & mask) { + if (counter == pg_always_on_cu_num) + WREG32_SOC15(GC, 0, mmRLC_PG_ALWAYS_ON_CU_MASK, cu_bitmap); + if (counter < always_on_cu_num) + cu_bitmap |= mask; + else + break; + counter++; + } + mask <<= 1; + } + + WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, cu_bitmap); + cu_info->ao_cu_bitmap[i][j] = cu_bitmap; + } + } + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + mutex_unlock(&adev->grbm_idx_mutex); +} + static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) { uint32_t data; @@ -838,8 +994,10 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) data |= 0x00C00000; WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); - /* set RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF */ - WREG32_SOC15(GC, 0, mmRLC_LB_ALWAYS_ACTIVE_CU_MASK, 0xFFF); + /* + * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xF (4 CUs AON for Raven), + * programmed in gfx_v9_0_init_always_on_cu_mask() + */ /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, * but used for RLC_LB_CNTL configuration */ @@ -848,92 +1006,71 @@ static void gfx_v9_0_init_lbpw(struct amdgpu_device *adev) data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); mutex_unlock(&adev->grbm_idx_mutex); -} -static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) -{ - WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); + gfx_v9_0_init_always_on_cu_mask(adev); } -static void rv_init_cp_jump_table(struct amdgpu_device *adev) +static void gfx_v9_4_init_lbpw(struct amdgpu_device *adev) { - const __le32 *fw_data; - volatile u32 *dst_ptr; - int me, i, max_me = 5; - u32 bo_offset = 0; - u32 table_offset, table_size; - - /* write the cp table buffer */ - dst_ptr = adev->gfx.rlc.cp_table_ptr; - for (me = 0; me < max_me; me++) { - if (me == 0) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.ce_fw->data; - fw_data = (const __le32 *) - (adev->gfx.ce_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 1) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.pfp_fw->data; - fw_data = (const __le32 *) - (adev->gfx.pfp_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 2) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.me_fw->data; - fw_data = (const __le32 *) - (adev->gfx.me_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 3) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } else if (me == 4) { - const struct gfx_firmware_header_v1_0 *hdr = - (const struct gfx_firmware_header_v1_0 *)adev->gfx.mec2_fw->data; - fw_data = (const __le32 *) - (adev->gfx.mec2_fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - table_offset = le32_to_cpu(hdr->jt_offset); - table_size = le32_to_cpu(hdr->jt_size); - } + uint32_t data; - for (i = 0; i < table_size; i ++) { - dst_ptr[bo_offset + i] = - cpu_to_le32(le32_to_cpu(fw_data[table_offset + i])); - } + /* set mmRLC_LB_THR_CONFIG_1/2/3/4 */ + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_1, 0x0000007F); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_2, 0x033388F8); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_3, 0x00000077); + WREG32_SOC15(GC, 0, mmRLC_LB_THR_CONFIG_4, (0x10 | 0x27 << 8 | 0x02FA << 16)); - bo_offset += table_size; - } + /* set mmRLC_LB_CNTR_INIT = 0x0000_0000 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_INIT, 0x00000000); + + /* set mmRLC_LB_CNTR_MAX = 0x0000_0500 */ + WREG32_SOC15(GC, 0, mmRLC_LB_CNTR_MAX, 0x00000800); + + mutex_lock(&adev->grbm_idx_mutex); + /* set mmRLC_LB_INIT_CU_MASK thru broadcast mode to enable all SE/SH*/ + gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff); + WREG32_SOC15(GC, 0, mmRLC_LB_INIT_CU_MASK, 0xffffffff); + + /* set mmRLC_LB_PARAMS = 0x003F_1006 */ + data = REG_SET_FIELD(0, RLC_LB_PARAMS, FIFO_SAMPLES, 0x0003); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLES, 0x0010); + data |= REG_SET_FIELD(data, RLC_LB_PARAMS, PG_IDLE_SAMPLE_INTERVAL, 0x033F); + WREG32_SOC15(GC, 0, mmRLC_LB_PARAMS, data); + + /* set mmRLC_GPM_GENERAL_7[31-16] = 0x00C0 */ + data = RREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7); + data &= 0x0000FFFF; + data |= 0x00C00000; + WREG32_SOC15(GC, 0, mmRLC_GPM_GENERAL_7, data); + + /* + * RLC_LB_ALWAYS_ACTIVE_CU_MASK = 0xFFF (12 CUs AON), + * programmed in gfx_v9_0_init_always_on_cu_mask() + */ + + /* set RLC_LB_CNTL = 0x8000_0095, 31 bit is reserved, + * but used for RLC_LB_CNTL configuration */ + data = RLC_LB_CNTL__LB_CNT_SPIM_ACTIVE_MASK; + data |= REG_SET_FIELD(data, RLC_LB_CNTL, CU_MASK_USED_OFF_HYST, 0x09); + data |= REG_SET_FIELD(data, RLC_LB_CNTL, RESERVED, 0x80000); + WREG32_SOC15(GC, 0, mmRLC_LB_CNTL, data); + mutex_unlock(&adev->grbm_idx_mutex); + + gfx_v9_0_init_always_on_cu_mask(adev); } -static void gfx_v9_0_rlc_fini(struct amdgpu_device *adev) +static void gfx_v9_0_enable_lbpw(struct amdgpu_device *adev, bool enable) { - /* clear state block */ - amdgpu_bo_free_kernel(&adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); + WREG32_FIELD15(GC, 0, RLC_LB_CNTL, LOAD_BALANCE_ENABLE, enable ? 1 : 0); +} - /* jump table block */ - amdgpu_bo_free_kernel(&adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); +static int gfx_v9_0_cp_jump_table_num(struct amdgpu_device *adev) +{ + return 5; } static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) { - volatile u32 *dst_ptr; - u32 dws; const struct cs_section_def *cs_data; int r; @@ -942,47 +1079,29 @@ static int gfx_v9_0_rlc_init(struct amdgpu_device *adev) cs_data = adev->gfx.rlc.cs_data; if (cs_data) { - /* clear state block */ - adev->gfx.rlc.clear_state_size = dws = gfx_v9_0_get_csb_size(adev); - r = amdgpu_bo_create_reserved(adev, dws * 4, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.clear_state_obj, - &adev->gfx.rlc.clear_state_gpu_addr, - (void **)&adev->gfx.rlc.cs_ptr); - if (r) { - dev_err(adev->dev, "(%d) failed to create rlc csb bo\n", - r); - gfx_v9_0_rlc_fini(adev); + /* init clear state block */ + r = amdgpu_gfx_rlc_init_csb(adev); + if (r) return r; - } - /* set up the cs buffer */ - dst_ptr = adev->gfx.rlc.cs_ptr; - gfx_v9_0_get_csb_buffer(adev, dst_ptr); - amdgpu_bo_kunmap(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unpin(adev->gfx.rlc.clear_state_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.clear_state_obj); } if (adev->asic_type == CHIP_RAVEN) { /* TODO: double check the cp_table_size for RV */ adev->gfx.rlc.cp_table_size = ALIGN(96 * 5 * 4, 2048) + (64 * 1024); /* JT + GDS */ - r = amdgpu_bo_create_reserved(adev, adev->gfx.rlc.cp_table_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_VRAM, - &adev->gfx.rlc.cp_table_obj, - &adev->gfx.rlc.cp_table_gpu_addr, - (void **)&adev->gfx.rlc.cp_table_ptr); - if (r) { - dev_err(adev->dev, - "(%d) failed to create cp table bo\n", r); - gfx_v9_0_rlc_fini(adev); + r = amdgpu_gfx_rlc_init_cpt(adev); + if (r) return r; - } - - rv_init_cp_jump_table(adev); - amdgpu_bo_kunmap(adev->gfx.rlc.cp_table_obj); - amdgpu_bo_unreserve(adev->gfx.rlc.cp_table_obj); + } + switch (adev->asic_type) { + case CHIP_RAVEN: gfx_v9_0_init_lbpw(adev); + break; + case CHIP_VEGA20: + gfx_v9_4_init_lbpw(adev); + break; + default: + break; } return 0; @@ -1045,7 +1164,7 @@ static int gfx_v9_0_mec_init(struct amdgpu_device *adev) mec_hpd_size = adev->gfx.num_compute_rings * GFX9_MEC_HPD_SIZE; r = amdgpu_bo_create_reserved(adev, mec_hpd_size, PAGE_SIZE, - AMDGPU_GEM_DOMAIN_GTT, + AMDGPU_GEM_DOMAIN_VRAM, &adev->gfx.mec.hpd_eop_obj, &adev->gfx.mec.hpd_eop_gpu_addr, (void **)&hpd); @@ -1210,7 +1329,10 @@ static int gfx_v9_0_gpu_early_init(struct amdgpu_device *adev) adev->gfx.config.sc_prim_fifo_size_backend = 0x100; adev->gfx.config.sc_hiz_tile_fifo_size = 0x30; adev->gfx.config.sc_earlyz_tile_fifo_size = 0x4C0; - gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; + if (adev->rev_id >= 8) + gb_addr_config = RAVEN2_GB_ADDR_CONFIG_GOLDEN; + else + gb_addr_config = RAVEN_GB_ADDR_CONFIG_GOLDEN; break; default: BUG(); @@ -1413,16 +1535,15 @@ static int gfx_v9_0_ngg_en(struct amdgpu_device *adev) /* Clear GDS reserved memory */ r = amdgpu_ring_alloc(ring, 17); if (r) { - DRM_ERROR("amdgpu: NGG failed to lock ring %d (%d).\n", - ring->idx, r); + DRM_ERROR("amdgpu: NGG failed to lock ring %s (%d).\n", + ring->name, r); return r; } gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_SIZE), (adev->gds.mem.total_size + - adev->gfx.ngg.gds_reserve_size) >> - AMDGPU_GDS_SHIFT); + adev->gfx.ngg.gds_reserve_size)); amdgpu_ring_write(ring, PACKET3(PACKET3_DMA_DATA, 5)); amdgpu_ring_write(ring, (PACKET3_DMA_DATA_CP_SYNC | @@ -1459,7 +1580,7 @@ static int gfx_v9_0_compute_ring_init(struct amdgpu_device *adev, int ring_id, ring->ring_obj = NULL; ring->use_doorbell = true; - ring->doorbell_index = (AMDGPU_DOORBELL_MEC_RING0 + ring_id) << 1; + ring->doorbell_index = (adev->doorbell_index.mec_ring0 + ring_id) << 1; ring->eop_gpu_addr = adev->gfx.mec.hpd_eop_gpu_addr + (ring_id * GFX9_MEC_HPD_SIZE); sprintf(ring->name, "comp_%d.%d.%d", ring->me, ring->pipe, ring->queue); @@ -1500,11 +1621,6 @@ static int gfx_v9_0_sw_init(void *handle) adev->gfx.mec.num_pipe_per_mec = 4; adev->gfx.mec.num_queue_per_pipe = 8; - /* KIQ event */ - r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_IB2_INTERRUPT_PKT, &adev->gfx.kiq.irq); - if (r) - return r; - /* EOP Event */ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_GRBM_CP, GFX_9_0__SRCID__CP_EOP_INTERRUPT, &adev->gfx.eop_irq); if (r) @@ -1532,7 +1648,7 @@ static int gfx_v9_0_sw_init(void *handle) return r; } - r = gfx_v9_0_rlc_init(adev); + r = adev->gfx.rlc.funcs->init(adev); if (r) { DRM_ERROR("Failed to init rlc BOs!\n"); return r; @@ -1553,7 +1669,7 @@ static int gfx_v9_0_sw_init(void *handle) else sprintf(ring->name, "gfx_%d", i); ring->use_doorbell = true; - ring->doorbell_index = AMDGPU_DOORBELL64_GFX_RING0 << 1; + ring->doorbell_index = adev->doorbell_index.gfx_ring0 << 1; r = amdgpu_ring_init(adev, ring, 1024, &adev->gfx.eop_irq, AMDGPU_CP_IRQ_GFX_EOP); if (r) @@ -1595,25 +1711,6 @@ static int gfx_v9_0_sw_init(void *handle) if (r) return r; - /* reserve GDS, GWS and OA resource for gfx */ - r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS, - &adev->gds.gds_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS, - &adev->gds.gws_gfx_bo, NULL, NULL); - if (r) - return r; - - r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size, - PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA, - &adev->gds.oa_gfx_bo, NULL, NULL); - if (r) - return r; - adev->gfx.ce_ram_size = 0x8000; r = gfx_v9_0_gpu_early_init(adev); @@ -1761,7 +1858,7 @@ static void gfx_v9_0_init_compute_vmid(struct amdgpu_device *adev) mutex_unlock(&adev->srbm_mutex); } -static void gfx_v9_0_gpu_init(struct amdgpu_device *adev) +static void gfx_v9_0_constants_init(struct amdgpu_device *adev) { u32 tmp; int i; @@ -2243,12 +2340,13 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev) #endif WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1); + udelay(50); /* carrizo do enable cp interrupt after cp inited */ - if (!(adev->flags & AMD_IS_APU)) + if (!(adev->flags & AMD_IS_APU)) { gfx_v9_0_enable_gui_idle_interrupt(adev, true); - - udelay(50); + udelay(50); + } #ifdef AMDGPU_RLC_DEBUG_RETRY /* RLC_GPM_GENERAL_6 : RLC Ucode version */ @@ -2301,12 +2399,12 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return 0; } - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* disable CG */ WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, 0); - gfx_v9_0_rlc_reset(adev); + adev->gfx.rlc.funcs->reset(adev); gfx_v9_0_init_pg(adev); @@ -2317,14 +2415,24 @@ static int gfx_v9_0_rlc_resume(struct amdgpu_device *adev) return r; } - if (adev->asic_type == CHIP_RAVEN) { - if (amdgpu_lbpw != 0) + switch (adev->asic_type) { + case CHIP_RAVEN: + if (amdgpu_lbpw == 0) + gfx_v9_0_enable_lbpw(adev, false); + else + gfx_v9_0_enable_lbpw(adev, true); + break; + case CHIP_VEGA20: + if (amdgpu_lbpw > 0) gfx_v9_0_enable_lbpw(adev, true); else gfx_v9_0_enable_lbpw(adev, false); + break; + default: + break; } - gfx_v9_0_rlc_start(adev); + adev->gfx.rlc.funcs->start(adev); return 0; } @@ -2339,7 +2447,7 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable) tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1); if (!enable) { for (i = 0; i < adev->gfx.num_gfx_rings; i++) - adev->gfx.gfx_ring[i].ready = false; + adev->gfx.gfx_ring[i].sched.ready = false; } WREG32_SOC15(GC, 0, mmCP_ME_CNTL, tmp); udelay(50); @@ -2529,7 +2637,7 @@ static int gfx_v9_0_cp_gfx_resume(struct amdgpu_device *adev) /* start the ring */ gfx_v9_0_cp_gfx_start(adev); - ring->ready = true; + ring->sched.ready = true; return 0; } @@ -2544,8 +2652,8 @@ static void gfx_v9_0_cp_compute_enable(struct amdgpu_device *adev, bool enable) WREG32_SOC15(GC, 0, mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK)); for (i = 0; i < adev->gfx.num_compute_rings; i++) - adev->gfx.compute_ring[i].ready = false; - adev->gfx.kiq.ring.ready = false; + adev->gfx.compute_ring[i].sched.ready = false; + adev->gfx.kiq.ring.sched.ready = false; } udelay(50); } @@ -2610,7 +2718,6 @@ static void gfx_v9_0_kiq_setting(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) { struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - uint32_t scratch, tmp = 0; uint64_t queue_mask = 0; int r, i; @@ -2629,17 +2736,9 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) queue_mask |= (1ull << i); } - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 11); + r = amdgpu_ring_alloc(kiq_ring, (7 * adev->gfx.num_compute_rings) + 8); if (r) { DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); return r; } @@ -2676,24 +2775,10 @@ static int gfx_v9_0_kiq_kcq_enable(struct amdgpu_device *adev) amdgpu_ring_write(kiq_ring, lower_32_bits(wptr_addr)); amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr)); } - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ enable failed (scratch(0x%04X)=0x%08X)\n", - scratch, tmp); - r = -EINVAL; - } - amdgpu_gfx_scratch_free(adev, scratch); + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ enable failed\n"); return r; } @@ -2911,9 +2996,9 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring) /* enable the doorbell if requested */ if (ring->use_doorbell) { WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_LOWER, - (AMDGPU_DOORBELL64_KIQ *2) << 2); + (adev->doorbell_index.kiq * 2) << 2); WREG32_SOC15(GC, 0, mmCP_MEC_DOORBELL_RANGE_UPPER, - (AMDGPU_DOORBELL64_USERQUEUE_END * 2) << 2); + (adev->doorbell_index.userqueue_end * 2) << 2); } WREG32_SOC15(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL, @@ -3026,7 +3111,7 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) struct v9_mqd *mqd = ring->mqd_ptr; int mqd_idx = ring - &adev->gfx.compute_ring[0]; - if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { memset((void *)mqd, 0, sizeof(struct v9_mqd_allocation)); ((struct v9_mqd_allocation *)mqd)->dynamic_cu_mask = 0xFFFFFFFF; ((struct v9_mqd_allocation *)mqd)->dynamic_rb_mask = 0xFFFFFFFF; @@ -3055,26 +3140,33 @@ static int gfx_v9_0_kcq_init_queue(struct amdgpu_ring *ring) static int gfx_v9_0_kiq_resume(struct amdgpu_device *adev) { - struct amdgpu_ring *ring = NULL; - int r = 0, i; - - gfx_v9_0_cp_compute_enable(adev, true); + struct amdgpu_ring *ring; + int r; ring = &adev->gfx.kiq.ring; r = amdgpu_bo_reserve(ring->mqd_obj, false); if (unlikely(r != 0)) - goto done; + return r; r = amdgpu_bo_kmap(ring->mqd_obj, (void **)&ring->mqd_ptr); - if (!r) { - r = gfx_v9_0_kiq_init_queue(ring); - amdgpu_bo_kunmap(ring->mqd_obj); - ring->mqd_ptr = NULL; - } + if (unlikely(r != 0)) + return r; + + gfx_v9_0_kiq_init_queue(ring); + amdgpu_bo_kunmap(ring->mqd_obj); + ring->mqd_ptr = NULL; amdgpu_bo_unreserve(ring->mqd_obj); - if (r) - goto done; + ring->sched.ready = true; + return 0; +} + +static int gfx_v9_0_kcq_resume(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = NULL; + int r = 0, i; + + gfx_v9_0_cp_compute_enable(adev, true); for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; @@ -3117,34 +3209,26 @@ static int gfx_v9_0_cp_resume(struct amdgpu_device *adev) return r; } - r = gfx_v9_0_cp_gfx_resume(adev); + r = gfx_v9_0_kiq_resume(adev); if (r) return r; - r = gfx_v9_0_kiq_resume(adev); + r = gfx_v9_0_cp_gfx_resume(adev); if (r) return r; - ring = &adev->gfx.gfx_ring[0]; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = gfx_v9_0_kcq_resume(adev); + if (r) return r; - } - ring = &adev->gfx.kiq.ring; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); + ring = &adev->gfx.gfx_ring[0]; + r = amdgpu_ring_test_helper(ring); if (r) - ring->ready = false; + return r; for (i = 0; i < adev->gfx.num_compute_rings; i++) { ring = &adev->gfx.compute_ring[i]; - - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) - ring->ready = false; + amdgpu_ring_test_helper(ring); } gfx_v9_0_enable_gui_idle_interrupt(adev, true); @@ -3165,13 +3249,13 @@ static int gfx_v9_0_hw_init(void *handle) gfx_v9_0_init_golden_registers(adev); - gfx_v9_0_gpu_init(adev); + gfx_v9_0_constants_init(adev); r = gfx_v9_0_csb_vram_pin(adev); if (r) return r; - r = gfx_v9_0_rlc_resume(adev); + r = adev->gfx.rlc.funcs->resume(adev); if (r) return r; @@ -3186,71 +3270,45 @@ static int gfx_v9_0_hw_init(void *handle) return r; } -static int gfx_v9_0_kcq_disable(struct amdgpu_ring *kiq_ring,struct amdgpu_ring *ring) +static int gfx_v9_0_kcq_disable(struct amdgpu_device *adev) { - struct amdgpu_device *adev = kiq_ring->adev; - uint32_t scratch, tmp = 0; int r, i; + struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring; - r = amdgpu_gfx_scratch_get(adev, &scratch); - if (r) { - DRM_ERROR("Failed to get scratch reg (%d).\n", r); - return r; - } - WREG32(scratch, 0xCAFEDEAD); - - r = amdgpu_ring_alloc(kiq_ring, 10); - if (r) { + r = amdgpu_ring_alloc(kiq_ring, 6 * adev->gfx.num_compute_rings); + if (r) DRM_ERROR("Failed to lock KIQ (%d).\n", r); - amdgpu_gfx_scratch_free(adev, scratch); - return r; - } - /* unmap queues */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); - amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + struct amdgpu_ring *ring = &adev->gfx.compute_ring[i]; + + amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_UNMAP_QUEUES, 4)); + amdgpu_ring_write(kiq_ring, /* Q_sel: 0, vmid: 0, engine: 0, num_Q: 1 */ PACKET3_UNMAP_QUEUES_ACTION(1) | /* RESET_QUEUES */ PACKET3_UNMAP_QUEUES_QUEUE_SEL(0) | PACKET3_UNMAP_QUEUES_ENGINE_SEL(0) | PACKET3_UNMAP_QUEUES_NUM_QUEUES(1)); - amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - amdgpu_ring_write(kiq_ring, 0); - /* write to scratch for completion */ - amdgpu_ring_write(kiq_ring, PACKET3(PACKET3_SET_UCONFIG_REG, 1)); - amdgpu_ring_write(kiq_ring, (scratch - PACKET3_SET_UCONFIG_REG_START)); - amdgpu_ring_write(kiq_ring, 0xDEADBEEF); - amdgpu_ring_commit(kiq_ring); - - for (i = 0; i < adev->usec_timeout; i++) { - tmp = RREG32(scratch); - if (tmp == 0xDEADBEEF) - break; - DRM_UDELAY(1); - } - if (i >= adev->usec_timeout) { - DRM_ERROR("KCQ disabled failed (scratch(0x%04X)=0x%08X)\n", scratch, tmp); - r = -EINVAL; + amdgpu_ring_write(kiq_ring, PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(ring->doorbell_index)); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); + amdgpu_ring_write(kiq_ring, 0); } - amdgpu_gfx_scratch_free(adev, scratch); + r = amdgpu_ring_test_helper(kiq_ring); + if (r) + DRM_ERROR("KCQ disable failed\n"); + return r; } static int gfx_v9_0_hw_fini(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int i; - - amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_GFX, - AMD_PG_STATE_UNGATE); amdgpu_irq_put(adev, &adev->gfx.priv_reg_irq, 0); amdgpu_irq_put(adev, &adev->gfx.priv_inst_irq, 0); /* disable KCQ to avoid CPC touch memory not valid anymore */ - for (i = 0; i < adev->gfx.num_compute_rings; i++) - gfx_v9_0_kcq_disable(&adev->gfx.kiq.ring, &adev->gfx.compute_ring[i]); + gfx_v9_0_kcq_disable(adev); if (amdgpu_sriov_vf(adev)) { gfx_v9_0_cp_gfx_enable(adev, false); @@ -3266,7 +3324,7 @@ static int gfx_v9_0_hw_fini(void *handle) /* Use deinitialize sequence from CAIL when unbinding device from driver, * otherwise KIQ is hanging when binding back */ - if (!adev->in_gpu_reset && !adev->gfx.in_suspend) { + if (!adev->in_gpu_reset && !adev->in_suspend) { mutex_lock(&adev->srbm_mutex); soc15_grbm_select(adev, adev->gfx.kiq.ring.me, adev->gfx.kiq.ring.pipe, @@ -3277,7 +3335,7 @@ static int gfx_v9_0_hw_fini(void *handle) } gfx_v9_0_cp_enable(adev, false); - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); gfx_v9_0_csb_vram_unpin(adev); @@ -3286,20 +3344,12 @@ static int gfx_v9_0_hw_fini(void *handle) static int gfx_v9_0_suspend(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - - adev->gfx.in_suspend = true; - return gfx_v9_0_hw_fini(adev); + return gfx_v9_0_hw_fini(handle); } static int gfx_v9_0_resume(void *handle) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; - int r; - - r = gfx_v9_0_hw_init(adev); - adev->gfx.in_suspend = false; - return r; + return gfx_v9_0_hw_init(handle); } static bool gfx_v9_0_is_idle(void *handle) @@ -3360,7 +3410,7 @@ static int gfx_v9_0_soft_reset(void *handle) if (grbm_soft_reset) { /* stop the rlc */ - gfx_v9_0_rlc_stop(adev); + adev->gfx.rlc.funcs->stop(adev); /* Disable GFX parsing/prefetching */ gfx_v9_0_cp_gfx_enable(adev, false); @@ -3408,15 +3458,6 @@ static void gfx_v9_0_ring_emit_gds_switch(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; - gds_base = gds_base >> AMDGPU_GDS_SHIFT; - gds_size = gds_size >> AMDGPU_GDS_SHIFT; - - gws_base = gws_base >> AMDGPU_GWS_SHIFT; - gws_size = gws_size >> AMDGPU_GWS_SHIFT; - - oa_base = oa_base >> AMDGPU_OA_SHIFT; - oa_size = oa_size >> AMDGPU_OA_SHIFT; - /* GDS Base */ gfx_v9_0_write_data_to_reg(ring, 0, false, SOC15_REG_OFFSET(GC, 0, mmGDS_VMID0_BASE) + 2 * vmid, @@ -3468,64 +3509,47 @@ static int gfx_v9_0_late_init(void *handle) return 0; } -static void gfx_v9_0_enter_rlc_safe_mode(struct amdgpu_device *adev) +static bool gfx_v9_0_is_rlc_enabled(struct amdgpu_device *adev) { - uint32_t rlc_setting, data; - unsigned i; - - if (adev->gfx.rlc.in_safe_mode) - return; + uint32_t rlc_setting; /* if RLC is not enabled, do nothing */ rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; - - if (adev->cg_flags & - (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG | - AMD_CG_SUPPORT_GFX_3D_CGCG)) { - data = RLC_SAFE_MODE__CMD_MASK; - data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); + return false; - /* wait for RLC_SAFE_MODE */ - for (i = 0; i < adev->usec_timeout; i++) { - if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) - break; - udelay(1); - } - adev->gfx.rlc.in_safe_mode = true; - } + return true; } -static void gfx_v9_0_exit_rlc_safe_mode(struct amdgpu_device *adev) +static void gfx_v9_0_set_safe_mode(struct amdgpu_device *adev) { - uint32_t rlc_setting, data; - - if (!adev->gfx.rlc.in_safe_mode) - return; + uint32_t data; + unsigned i; - /* if RLC is not enabled, do nothing */ - rlc_setting = RREG32_SOC15(GC, 0, mmRLC_CNTL); - if (!(rlc_setting & RLC_CNTL__RLC_ENABLE_F32_MASK)) - return; + data = RLC_SAFE_MODE__CMD_MASK; + data |= (1 << RLC_SAFE_MODE__MESSAGE__SHIFT); + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); - if (adev->cg_flags & - (AMD_CG_SUPPORT_GFX_CGCG | AMD_CG_SUPPORT_GFX_MGCG)) { - /* - * Try to exit safe mode only if it is already in safe - * mode. - */ - data = RLC_SAFE_MODE__CMD_MASK; - WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); - adev->gfx.rlc.in_safe_mode = false; + /* wait for RLC_SAFE_MODE */ + for (i = 0; i < adev->usec_timeout; i++) { + if (!REG_GET_FIELD(RREG32_SOC15(GC, 0, mmRLC_SAFE_MODE), RLC_SAFE_MODE, CMD)) + break; + udelay(1); } } +static void gfx_v9_0_unset_safe_mode(struct amdgpu_device *adev) +{ + uint32_t data; + + data = RLC_SAFE_MODE__CMD_MASK; + WREG32_SOC15(GC, 0, mmRLC_SAFE_MODE, data); +} + static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, bool enable) { - gfx_v9_0_enter_rlc_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if ((adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) && enable) { gfx_v9_0_enable_gfx_cg_power_gating(adev, true); @@ -3536,7 +3560,7 @@ static void gfx_v9_0_update_gfx_cg_power_gating(struct amdgpu_device *adev, gfx_v9_0_enable_gfx_pipeline_powergating(adev, false); } - gfx_v9_0_exit_rlc_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v9_0_update_gfx_mg_power_gating(struct amdgpu_device *adev, @@ -3634,7 +3658,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, { uint32_t data, def; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); /* Enable 3D CGCG/CGLS */ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_3D_CGCG)) { @@ -3674,7 +3698,7 @@ static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev, WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL_3D, data); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev, @@ -3682,7 +3706,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev { uint32_t def, data; - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) { def = data = RREG32_SOC15(GC, 0, mmRLC_CGTT_MGCG_OVERRIDE); @@ -3722,7 +3746,7 @@ static void gfx_v9_0_update_coarse_grain_clock_gating(struct amdgpu_device *adev WREG32_SOC15(GC, 0, mmRLC_CGCG_CGLS_CTRL, data); } - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, @@ -3751,8 +3775,17 @@ static int gfx_v9_0_update_gfx_clock_gating(struct amdgpu_device *adev, } static const struct amdgpu_rlc_funcs gfx_v9_0_rlc_funcs = { - .enter_safe_mode = gfx_v9_0_enter_rlc_safe_mode, - .exit_safe_mode = gfx_v9_0_exit_rlc_safe_mode + .is_rlc_enabled = gfx_v9_0_is_rlc_enabled, + .set_safe_mode = gfx_v9_0_set_safe_mode, + .unset_safe_mode = gfx_v9_0_unset_safe_mode, + .init = gfx_v9_0_rlc_init, + .get_csb_size = gfx_v9_0_get_csb_size, + .get_csb_buffer = gfx_v9_0_get_csb_buffer, + .get_cp_table_num = gfx_v9_0_cp_jump_table_num, + .resume = gfx_v9_0_rlc_resume, + .stop = gfx_v9_0_rlc_stop, + .reset = gfx_v9_0_rlc_reset, + .start = gfx_v9_0_rlc_start }; static int gfx_v9_0_set_powergating_state(void *handle, @@ -3763,6 +3796,10 @@ static int gfx_v9_0_set_powergating_state(void *handle, switch (adev->asic_type) { case CHIP_RAVEN: + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } if (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS) { gfx_v9_0_enable_sck_slow_down_on_power_up(adev, true); gfx_v9_0_enable_sck_slow_down_on_power_down(adev, true); @@ -3782,14 +3819,16 @@ static int gfx_v9_0_set_powergating_state(void *handle, /* update mgcg state */ gfx_v9_0_update_gfx_mg_power_gating(adev, enable); - /* set gfx off through smu */ - if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); + if (enable) + amdgpu_gfx_off_ctrl(adev, true); break; case CHIP_VEGA12: - /* set gfx off through smu */ - if (enable && adev->powerplay.pp_funcs->set_powergating_by_smu) - amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, true); + if (!enable) { + amdgpu_gfx_off_ctrl(adev, false); + cancel_delayed_work_sync(&adev->gfx.gfx_off_delay_work); + } else { + amdgpu_gfx_off_ctrl(adev, true); + } break; default: break; @@ -3927,9 +3966,11 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring) } static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 header, control = 0; if (ib->flags & AMDGPU_IB_FLAG_CE) @@ -3958,20 +3999,22 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring, } static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { - u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24); - amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); + amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2)); BUG_ON(ib->gpu_addr & 0x3); /* Dword align */ - amdgpu_ring_write(ring, + amdgpu_ring_write(ring, #ifdef __BIG_ENDIAN - (2 << 0) | + (2 << 0) | #endif - lower_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); - amdgpu_ring_write(ring, control); + lower_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, upper_32_bits(ib->gpu_addr)); + amdgpu_ring_write(ring, control); } static void gfx_v9_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, @@ -4350,8 +4393,11 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, uint32_t ref, uint32_t mask) { int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX); + struct amdgpu_device *adev = ring->adev; + bool fw_version_ok = (ring->funcs->type == AMDGPU_RING_TYPE_GFX) ? + adev->gfx.me_fw_write_wait : adev->gfx.mec_fw_write_wait; - if (amdgpu_sriov_vf(ring->adev)) + if (fw_version_ok) gfx_v9_0_wait_reg_mem(ring, usepfp, 0, 1, reg0, reg1, ref, mask, 0x20); else @@ -4359,6 +4405,18 @@ static void gfx_v9_0_ring_emit_reg_write_reg_wait(struct amdgpu_ring *ring, ref, mask); } +static void gfx_v9_0_ring_soft_recovery(struct amdgpu_ring *ring, unsigned vmid) +{ + struct amdgpu_device *adev = ring->adev; + uint32_t value = 0; + + value = REG_SET_FIELD(value, SQ_CMD, CMD, 0x03); + value = REG_SET_FIELD(value, SQ_CMD, MODE, 0x01); + value = REG_SET_FIELD(value, SQ_CMD, CHECK_VMID, 1); + value = REG_SET_FIELD(value, SQ_CMD, VM_ID, vmid); + WREG32(mmSQ_CMD, value); +} + static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev, enum amdgpu_interrupt_state state) { @@ -4535,12 +4593,39 @@ static int gfx_v9_0_eop_irq(struct amdgpu_device *adev, return 0; } +static void gfx_v9_0_fault(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry) +{ + u8 me_id, pipe_id, queue_id; + struct amdgpu_ring *ring; + int i; + + me_id = (entry->ring_id & 0x0c) >> 2; + pipe_id = (entry->ring_id & 0x03) >> 0; + queue_id = (entry->ring_id & 0x70) >> 4; + + switch (me_id) { + case 0: + drm_sched_fault(&adev->gfx.gfx_ring[0].sched); + break; + case 1: + case 2: + for (i = 0; i < adev->gfx.num_compute_rings; i++) { + ring = &adev->gfx.compute_ring[i]; + if (ring->me == me_id && ring->pipe == pipe_id && + ring->queue == queue_id) + drm_sched_fault(&ring->sched); + } + break; + } +} + static int gfx_v9_0_priv_reg_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal register access in command stream\n"); - schedule_work(&adev->reset_work); + gfx_v9_0_fault(adev, entry); return 0; } @@ -4549,69 +4634,7 @@ static int gfx_v9_0_priv_inst_irq(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { DRM_ERROR("Illegal instruction in command stream\n"); - schedule_work(&adev->reset_work); - return 0; -} - -static int gfx_v9_0_kiq_set_interrupt_state(struct amdgpu_device *adev, - struct amdgpu_irq_src *src, - unsigned int type, - enum amdgpu_interrupt_state state) -{ - uint32_t tmp, target; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - if (ring->me == 1) - target = SOC15_REG_OFFSET(GC, 0, mmCP_ME1_PIPE0_INT_CNTL); - else - target = SOC15_REG_OFFSET(GC, 0, mmCP_ME2_PIPE0_INT_CNTL); - target += ring->pipe; - - switch (type) { - case AMDGPU_CP_KIQ_IRQ_DRIVER0: - if (state == AMDGPU_IRQ_STATE_DISABLE) { - tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 0); - WREG32(target, tmp); - } else { - tmp = RREG32_SOC15(GC, 0, mmCPC_INT_CNTL); - tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32_SOC15(GC, 0, mmCPC_INT_CNTL, tmp); - - tmp = RREG32(target); - tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL, - GENERIC2_INT_ENABLE, 1); - WREG32(target, tmp); - } - break; - default: - BUG(); /* kiq only support GENERIC2_INT now */ - break; - } - return 0; -} - -static int gfx_v9_0_kiq_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - u8 me_id, pipe_id, queue_id; - struct amdgpu_ring *ring = &(adev->gfx.kiq.ring); - - me_id = (entry->ring_id & 0x0c) >> 2; - pipe_id = (entry->ring_id & 0x03) >> 0; - queue_id = (entry->ring_id & 0x70) >> 4; - DRM_DEBUG("IH: CPC GENERIC2_INT, me:%d, pipe:%d, queue:%d\n", - me_id, pipe_id, queue_id); - - amdgpu_fence_process(ring); + gfx_v9_0_fault(adev, entry); return 0; } @@ -4681,6 +4704,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_gfx = { .emit_wreg = gfx_v9_0_ring_emit_wreg, .emit_reg_wait = gfx_v9_0_ring_emit_reg_wait, .emit_reg_write_reg_wait = gfx_v9_0_ring_emit_reg_write_reg_wait, + .soft_recovery = gfx_v9_0_ring_soft_recovery, }; static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = { @@ -4737,10 +4761,8 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = { 2 + /* gfx_v9_0_ring_emit_vm_flush */ 8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */ .emit_ib_size = 4, /* gfx_v9_0_ring_emit_ib_compute */ - .emit_ib = gfx_v9_0_ring_emit_ib_compute, .emit_fence = gfx_v9_0_ring_emit_fence_kiq, .test_ring = gfx_v9_0_ring_test_ring, - .test_ib = gfx_v9_0_ring_test_ib, .insert_nop = amdgpu_ring_insert_nop, .pad_ib = amdgpu_ring_generic_pad_ib, .emit_rreg = gfx_v9_0_ring_emit_rreg, @@ -4762,11 +4784,6 @@ static void gfx_v9_0_set_ring_funcs(struct amdgpu_device *adev) adev->gfx.compute_ring[i].funcs = &gfx_v9_0_ring_funcs_compute; } -static const struct amdgpu_irq_src_funcs gfx_v9_0_kiq_irq_funcs = { - .set = gfx_v9_0_kiq_set_interrupt_state, - .process = gfx_v9_0_kiq_irq, -}; - static const struct amdgpu_irq_src_funcs gfx_v9_0_eop_irq_funcs = { .set = gfx_v9_0_set_eop_interrupt_state, .process = gfx_v9_0_eop_irq, @@ -4792,9 +4809,6 @@ static void gfx_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gfx.priv_inst_irq.num_types = 1; adev->gfx.priv_inst_irq.funcs = &gfx_v9_0_priv_inst_irq_funcs; - - adev->gfx.kiq.irq.num_types = AMDGPU_CP_KIQ_IRQ_LAST; - adev->gfx.kiq.irq.funcs = &gfx_v9_0_kiq_irq_funcs; } static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) @@ -4814,7 +4828,20 @@ static void gfx_v9_0_set_rlc_funcs(struct amdgpu_device *adev) static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev) { /* init asci gds info */ - adev->gds.mem.total_size = RREG32_SOC15(GC, 0, mmGDS_VMID0_SIZE); + switch (adev->asic_type) { + case CHIP_VEGA10: + case CHIP_VEGA12: + case CHIP_VEGA20: + adev->gds.mem.total_size = 0x10000; + break; + case CHIP_RAVEN: + adev->gds.mem.total_size = 0x1000; + break; + default: + adev->gds.mem.total_size = 0x10000; + break; + } + adev->gds.gws.total_size = 64; adev->gds.oa.total_size = 16; diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c index acfbd2d749cf..f5edddf3b29d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c @@ -35,26 +35,25 @@ u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev) return (u64)RREG32_SOC15(GC, 0, mmMC_VM_FB_OFFSET) << 24; } -static void gfxhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value; - - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->gmc.vram_start - + adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /*valid bit*/ + /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ + int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void gfxhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - gfxhub_v1_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + gfxhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(GC, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -71,16 +70,28 @@ static void gfxhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) { uint64_t value; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(GC, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, 0); - WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, 0xFFFFFFFF); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(GC, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); - WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max((adev->gmc.fb_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); + else + WREG32_SOC15(GC, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h index 206e29cad753..92d3a70cd9b1 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.h @@ -30,5 +30,7 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value); void gfxhub_v1_0_init(struct amdgpu_device *adev); u64 gfxhub_v1_0_get_mc_fb_offset(struct amdgpu_device *adev); +void gfxhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c new file mode 100644 index 000000000000..5e9ab8eb214a --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.c @@ -0,0 +1,53 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "gfxhub_v1_1.h" + +#include "gc/gc_9_2_1_offset.h" +#include "gc/gc_9_2_1_sh_mask.h" + +#include "soc15_common.h" + +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev) +{ + u32 xgmi_lfb_cntl = RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_CNTL); + u32 max_region = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_MAX_REGION); + + /* PF_MAX_REGION=0 means xgmi is disabled */ + if (max_region) { + adev->gmc.xgmi.num_physical_nodes = max_region + 1; + if (adev->gmc.xgmi.num_physical_nodes > 4) + return -EINVAL; + + adev->gmc.xgmi.physical_node_id = + REG_GET_FIELD(xgmi_lfb_cntl, MC_VM_XGMI_LFB_CNTL, PF_LFB_REGION); + if (adev->gmc.xgmi.physical_node_id > 3) + return -EINVAL; + adev->gmc.xgmi.node_segment_size = REG_GET_FIELD( + RREG32_SOC15(GC, 0, mmMC_VM_XGMI_LFB_SIZE), + MC_VM_XGMI_LFB_SIZE, PF_LFB_SIZE) << 24; + } + + return 0; +} diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h new file mode 100644 index 000000000000..d753cf28a0a6 --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_1.h @@ -0,0 +1,29 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __GFXHUB_V1_1_H__ +#define __GFXHUB_V1_1_H__ + +int gfxhub_v1_1_get_xgmi_info(struct amdgpu_device *adev); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c index ad151fefa41f..9fc3296592fe 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c @@ -26,6 +26,7 @@ #include "amdgpu.h" #include "gmc_v6_0.h" #include "amdgpu_ucode.h" +#include "amdgpu_gem.h" #include "bif/bif_3_0_d.h" #include "bif/bif_3_0_sh_mask.h" @@ -45,6 +46,7 @@ MODULE_FIRMWARE("amdgpu/tahiti_mc.bin"); MODULE_FIRMWARE("amdgpu/pitcairn_mc.bin"); MODULE_FIRMWARE("amdgpu/verde_mc.bin"); MODULE_FIRMWARE("amdgpu/oland_mc.bin"); +MODULE_FIRMWARE("amdgpu/hainan_mc.bin"); MODULE_FIRMWARE("amdgpu/si58_mc.bin"); #define MC_SEQ_MISC0__MT__MASK 0xf0000000 @@ -223,8 +225,8 @@ static void gmc_v6_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->gmc, base); - amdgpu_device_gart_location(adev, mc); + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); } static void gmc_v6_0_mc_program(struct amdgpu_device *adev) @@ -357,7 +359,8 @@ static int gmc_v6_0_mc_init(struct amdgpu_device *adev) return 0; } -static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) +static void gmc_v6_0_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid, uint32_t flush_type) { WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); } @@ -493,16 +496,20 @@ static void gmc_v6_0_set_prt(struct amdgpu_device *adev, bool enable) static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) { + uint64_t table_addr; int r, i; u32 field; - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } r = amdgpu_gart_table_vram_pin(adev); if (r) return r; + + table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + /* Setup TLB control */ WREG32(mmMC_VM_MX_L1_TLB_CNTL, (0xA << 7) | @@ -531,7 +538,7 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) /* setup context0 */ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); @@ -555,10 +562,10 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) for (i = 1; i < 16; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, - adev->gart.table_addr >> 12); + table_addr >> 12); else WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, - adev->gart.table_addr >> 12); + table_addr >> 12); } /* enable context1-15 */ @@ -575,10 +582,10 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev) else gmc_v6_0_set_fault_enable_default(adev, true); - gmc_v6_0_flush_gpu_tlb(adev, 0); + gmc_v6_0_flush_gpu_tlb(adev, 0, 0); dev_info(adev->dev, "PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)adev->gart.table_addr); + (unsigned long long)table_addr); adev->gart.ready = true; return 0; } @@ -587,7 +594,7 @@ static int gmc_v6_0_gart_init(struct amdgpu_device *adev) { int r; - if (adev->gart.robj) { + if (adev->gart.bo) { dev_warn(adev->dev, "gmc_v6_0 PCIE GART already initialized\n"); return 0; } @@ -854,11 +861,11 @@ static int gmc_v6_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v6_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 146, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 147, &adev->gmc.vm_fault); if (r) return r; @@ -1175,8 +1182,7 @@ static const struct amdgpu_irq_src_funcs gmc_v6_0_irq_funcs = { static void gmc_v6_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v6_0_gmc_funcs; } static void gmc_v6_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c index f8d8a3a73e42..761dcfb2fec0 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c @@ -29,6 +29,7 @@ #include "gmc_v7_0.h" #include "amdgpu_ucode.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_gem.h" #include "bif/bif_4_1_d.h" #include "bif/bif_4_1_sh_mask.h" @@ -241,8 +242,8 @@ static void gmc_v7_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->gmc, base); - amdgpu_device_gart_location(adev, mc); + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); } /** @@ -429,7 +430,8 @@ static int gmc_v7_0_mc_init(struct amdgpu_device *adev) * * Flush the TLB for the requested page table (CIK). */ -static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid) +static void gmc_v7_0_flush_gpu_tlb(struct amdgpu_device *adev, + uint32_t vmid, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -601,16 +603,20 @@ static void gmc_v7_0_set_prt(struct amdgpu_device *adev, bool enable) */ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) { + uint64_t table_addr; int r, i; u32 tmp, field; - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } r = amdgpu_gart_table_vram_pin(adev); if (r) return r; + + table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + /* Setup TLB control */ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -642,7 +648,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) /* setup context0 */ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); @@ -666,10 +672,10 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) for (i = 1; i < 16; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, - adev->gart.table_addr >> 12); + table_addr >> 12); else WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, - adev->gart.table_addr >> 12); + table_addr >> 12); } /* enable context1-15 */ @@ -693,10 +699,10 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev) WREG32(mmCHUB_CONTROL, tmp); } - gmc_v7_0_flush_gpu_tlb(adev, 0); + gmc_v7_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)adev->gart.table_addr); + (unsigned long long)table_addr); adev->gart.ready = true; return 0; } @@ -705,7 +711,7 @@ static int gmc_v7_0_gart_init(struct amdgpu_device *adev) { int r; - if (adev->gart.robj) { + if (adev->gart.bo) { WARN(1, "R600 PCIE GART already initialized\n"); return 0; } @@ -986,11 +992,11 @@ static int gmc_v7_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v7_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -1383,8 +1389,7 @@ static const struct amdgpu_irq_src_funcs gmc_v7_0_irq_funcs = { static void gmc_v7_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v7_0_gmc_funcs; } static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c index 9333109b210d..1ad7e6b8ed1d 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c @@ -27,6 +27,7 @@ #include "gmc_v8_0.h" #include "amdgpu_ucode.h" #include "amdgpu_amdkfd.h" +#include "amdgpu_gem.h" #include "gmc/gmc_8_1_d.h" #include "gmc/gmc_8_1_sh_mask.h" @@ -55,6 +56,9 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris11_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris10_mc.bin"); MODULE_FIRMWARE("amdgpu/polaris12_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin"); +MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin"); static const u32 golden_settings_tonga_a11[] = { @@ -223,13 +227,39 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev) chip_name = "tonga"; break; case CHIP_POLARIS11: - chip_name = "polaris11"; + if (((adev->pdev->device == 0x67ef) && + ((adev->pdev->revision == 0xe0) || + (adev->pdev->revision == 0xe5))) || + ((adev->pdev->device == 0x67ff) && + ((adev->pdev->revision == 0xcf) || + (adev->pdev->revision == 0xef) || + (adev->pdev->revision == 0xff)))) + chip_name = "polaris11_k"; + else if ((adev->pdev->device == 0x67ef) && + (adev->pdev->revision == 0xe2)) + chip_name = "polaris11_k"; + else + chip_name = "polaris11"; break; case CHIP_POLARIS10: - chip_name = "polaris10"; + if ((adev->pdev->device == 0x67df) && + ((adev->pdev->revision == 0xe1) || + (adev->pdev->revision == 0xf7))) + chip_name = "polaris10_k"; + else + chip_name = "polaris10"; break; case CHIP_POLARIS12: - chip_name = "polaris12"; + if (((adev->pdev->device == 0x6987) && + ((adev->pdev->revision == 0xc0) || + (adev->pdev->revision == 0xc3))) || + ((adev->pdev->device == 0x6981) && + ((adev->pdev->revision == 0x00) || + (adev->pdev->revision == 0x01) || + (adev->pdev->revision == 0x10)))) + chip_name = "polaris12_k"; + else + chip_name = "polaris12"; break; case CHIP_FIJI: case CHIP_CARRIZO: @@ -336,7 +366,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) const struct mc_firmware_header_v1_0 *hdr; const __le32 *fw_data = NULL; const __le32 *io_mc_regs = NULL; - u32 data, vbios_version; + u32 data; int i, ucode_size, regs_size; /* Skip MC ucode loading on SR-IOV capable boards. @@ -347,13 +377,6 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev) if (amdgpu_sriov_bios(adev)) return 0; - WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 0x9F); - data = RREG32(mmMC_SEQ_IO_DEBUG_DATA); - vbios_version = data & 0xf; - - if (vbios_version == 0) - return 0; - if (!adev->gmc.fw) return -EINVAL; @@ -410,8 +433,8 @@ static void gmc_v8_0_vram_gtt_location(struct amdgpu_device *adev, base = RREG32(mmMC_VM_FB_LOCATION) & 0xFFFF; base <<= 24; - amdgpu_device_vram_location(adev, &adev->gmc, base); - amdgpu_device_gart_location(adev, mc); + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); } /** @@ -610,7 +633,7 @@ static int gmc_v8_0_mc_init(struct amdgpu_device *adev) * Flush the TLB for the requested page table (CIK). */ static void gmc_v8_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) + uint32_t vmid, uint32_t flush_type) { /* bits 0-15 are the VM contexts0-15 */ WREG32(mmVM_INVALIDATE_REQUEST, 1 << vmid); @@ -806,16 +829,20 @@ static void gmc_v8_0_set_prt(struct amdgpu_device *adev, bool enable) */ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) { + uint64_t table_addr; int r, i; u32 tmp, field; - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } r = amdgpu_gart_table_vram_pin(adev); if (r) return r; + + table_addr = amdgpu_bo_gpu_offset(adev->gart.bo); + /* Setup TLB control */ tmp = RREG32(mmMC_VM_MX_L1_TLB_CNTL); tmp = REG_SET_FIELD(tmp, MC_VM_MX_L1_TLB_CNTL, ENABLE_L1_TLB, 1); @@ -863,7 +890,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) /* setup context0 */ WREG32(mmVM_CONTEXT0_PAGE_TABLE_START_ADDR, adev->gmc.gart_start >> 12); WREG32(mmVM_CONTEXT0_PAGE_TABLE_END_ADDR, adev->gmc.gart_end >> 12); - WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, adev->gart.table_addr >> 12); + WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR, table_addr >> 12); WREG32(mmVM_CONTEXT0_PROTECTION_FAULT_DEFAULT_ADDR, (u32)(adev->dummy_page_addr >> 12)); WREG32(mmVM_CONTEXT0_CNTL2, 0); @@ -887,10 +914,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) for (i = 1; i < 16; i++) { if (i < 8) WREG32(mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR + i, - adev->gart.table_addr >> 12); + table_addr >> 12); else WREG32(mmVM_CONTEXT8_PAGE_TABLE_BASE_ADDR + i - 8, - adev->gart.table_addr >> 12); + table_addr >> 12); } /* enable context1-15 */ @@ -915,10 +942,10 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev) else gmc_v8_0_set_fault_enable_default(adev, true); - gmc_v8_0_flush_gpu_tlb(adev, 0); + gmc_v8_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)adev->gart.table_addr); + (unsigned long long)table_addr); adev->gart.ready = true; return 0; } @@ -927,7 +954,7 @@ static int gmc_v8_0_gart_init(struct amdgpu_device *adev) { int r; - if (adev->gart.robj) { + if (adev->gart.bo) { WARN(1, "R600 PCIE GART already initialized\n"); return 0; } @@ -1090,11 +1117,11 @@ static int gmc_v8_0_sw_init(void *handle) adev->gmc.vram_type = gmc_v8_0_convert_vram_type(tmp); } - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_PAGE_INV_FAULT, &adev->gmc.vm_fault); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_GFX_MEM_PROT_FAULT, &adev->gmc.vm_fault); if (r) return r; @@ -1728,8 +1755,7 @@ static const struct amdgpu_irq_src_funcs gmc_v8_0_irq_funcs = { static void gmc_v8_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v8_0_gmc_funcs; } static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev) diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c index 72f8018fa2a8..bacdaef77b6c 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c @@ -25,6 +25,7 @@ #include "amdgpu.h" #include "gmc_v9_0.h" #include "amdgpu_atomfirmware.h" +#include "amdgpu_gem.h" #include "hdp/hdp_4_0_offset.h" #include "hdp/hdp_4_0_sh_mask.h" @@ -42,6 +43,7 @@ #include "gfxhub_v1_0.h" #include "mmhub_v1_0.h" +#include "gfxhub_v1_1.h" #include "ivsrcid/vmc/irqsrcs_vmc_1_0.h" @@ -242,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev, return 0; } +/** + * vega10_ih_prescreen_iv - prescreen an interrupt vector + * + * @adev: amdgpu_device pointer + * + * Returns true if the interrupt vector should be further processed. + */ +static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev, + struct amdgpu_iv_entry *entry, + uint64_t addr) +{ + struct amdgpu_vm *vm; + u64 key; + int r; + + /* No PASID, can't identify faulting process */ + if (!entry->pasid) + return true; + + /* Not a retry fault */ + if (!(entry->src_data[1] & 0x80)) + return true; + + /* Track retry faults in per-VM fault FIFO. */ + spin_lock(&adev->vm_manager.pasid_lock); + vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid); + if (!vm) { + /* VM not found, process it normally */ + spin_unlock(&adev->vm_manager.pasid_lock); + return true; + } + + key = AMDGPU_VM_FAULT(entry->pasid, addr); + r = amdgpu_vm_add_fault(vm->fault_hash, key); + + /* Hash table is full or the fault is already being processed, + * ignore further page faults + */ + if (r != 0) { + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + /* No locking required with single writer and single reader */ + r = kfifo_put(&vm->faults, key); + if (!r) { + /* FIFO is full. Ignore it until there is space */ + amdgpu_vm_clear_fault(vm->fault_hash, key); + spin_unlock(&adev->vm_manager.pasid_lock); + return false; + } + + spin_unlock(&adev->vm_manager.pasid_lock); + /* It's the first fault for this address, process it normally */ + return true; +} + static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) @@ -253,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, addr = (u64)entry->src_data[0] << 12; addr |= ((u64)entry->src_data[1] & 0xf) << 44; + if (!gmc_v9_0_prescreen_iv(adev, entry, addr)) + return 1; /* This also prevents sending it to KFD */ + if (!amdgpu_sriov_vf(adev)) { status = RREG32(hub->vm_l2_pro_fault_status); WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1); @@ -264,12 +325,12 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev, amdgpu_vm_get_task_info(adev, entry->pasid, &task_info); dev_err(adev->dev, - "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d\n)\n", + "[%s] VMC page fault (src_id:%u ring:%u vmid:%u pasid:%u, for process %s pid %d thread %s pid %d)\n", entry->vmid_src ? "mmhub" : "gfxhub", entry->src_id, entry->ring_id, entry->vmid, entry->pasid, task_info.process_name, task_info.tgid, task_info.task_name, task_info.pid); - dev_err(adev->dev, " at address 0x%016llx from %d\n", + dev_err(adev->dev, " in page starting at address 0x%016llx from %d\n", addr, entry->client_id); if (!amdgpu_sriov_vf(adev)) dev_err(adev->dev, @@ -291,14 +352,14 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev) adev->gmc.vm_fault.funcs = &gmc_v9_0_irq_funcs; } -static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) +static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid, + uint32_t flush_type) { u32 req = 0; - /* invalidate using legacy mode on vmid*/ req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, PER_VMID_INVALIDATE_REQ, 1 << vmid); - req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0); + req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, flush_type); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1); req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1); @@ -318,54 +379,52 @@ static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vmid) */ /** - * gmc_v9_0_flush_gpu_tlb - gart tlb flush callback + * gmc_v9_0_flush_gpu_tlb - tlb flush with certain type * * @adev: amdgpu_device pointer * @vmid: vm instance to flush + * @flush_type: the flush type * - * Flush the TLB for the requested page table. + * Flush the TLB for the requested page table using certain type. */ static void gmc_v9_0_flush_gpu_tlb(struct amdgpu_device *adev, - uint32_t vmid) + uint32_t vmid, uint32_t flush_type) { - /* Use register 17 for GART */ const unsigned eng = 17; unsigned i, j; - spin_lock(&adev->gmc.invalidate_lock); - for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) { struct amdgpu_vmhub *hub = &adev->vmhub[i]; - u32 tmp = gmc_v9_0_get_invalidate_req(vmid); - - WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); + u32 tmp = gmc_v9_0_get_invalidate_req(vmid, flush_type); - /* Busy wait for ACK.*/ - for (j = 0; j < 100; j++) { - tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - tmp &= 1 << vmid; - if (tmp) - break; - cpu_relax(); - } - if (j < 100) + /* This is necessary for a HW workaround under SRIOV as well + * as GFXOFF under bare metal + */ + if (adev->gfx.kiq.ring.sched.ready && + (amdgpu_sriov_runtime(adev) || !amdgpu_sriov_vf(adev)) && + !adev->in_gpu_reset) { + uint32_t req = hub->vm_inv_eng0_req + eng; + uint32_t ack = hub->vm_inv_eng0_ack + eng; + + amdgpu_virt_kiq_reg_write_reg_wait(adev, req, ack, tmp, + 1 << vmid); continue; + } - /* Wait for ACK with a delay.*/ + spin_lock(&adev->gmc.invalidate_lock); + WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp); for (j = 0; j < adev->usec_timeout; j++) { tmp = RREG32_NO_KIQ(hub->vm_inv_eng0_ack + eng); - tmp &= 1 << vmid; - if (tmp) + if (tmp & (1 << vmid)) break; udelay(1); } + spin_unlock(&adev->gmc.invalidate_lock); if (j < adev->usec_timeout) continue; DRM_ERROR("Timeout waiting for VM flush ACK!\n"); } - - spin_unlock(&adev->gmc.invalidate_lock); } static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, @@ -373,13 +432,9 @@ static uint64_t gmc_v9_0_emit_flush_gpu_tlb(struct amdgpu_ring *ring, { struct amdgpu_device *adev = ring->adev; struct amdgpu_vmhub *hub = &adev->vmhub[ring->funcs->vmhub]; - uint32_t req = gmc_v9_0_get_invalidate_req(vmid); - uint64_t flags = AMDGPU_PTE_VALID; + uint32_t req = gmc_v9_0_get_invalidate_req(vmid, 0); unsigned eng = ring->vm_inv_eng; - amdgpu_gmc_get_vm_pde(adev, -1, &pd_addr, &flags); - pd_addr |= flags; - amdgpu_ring_emit_wreg(ring, hub->ctx0_ptb_addr_lo32 + (2 * vmid), lower_32_bits(pd_addr)); @@ -509,7 +564,7 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev, static void gmc_v9_0_get_vm_pde(struct amdgpu_device *adev, int level, uint64_t *addr, uint64_t *flags) { - if (!(*flags & AMDGPU_PDE_PTE)) + if (!(*flags & AMDGPU_PDE_PTE) && !(*flags & AMDGPU_PTE_SYSTEM)) *addr = adev->vm_manager.vram_base_offset + *addr - adev->gmc.vram_start; BUG_ON(*addr & 0xFFFF00000000003FULL); @@ -541,8 +596,7 @@ static const struct amdgpu_gmc_funcs gmc_v9_0_gmc_funcs = { static void gmc_v9_0_set_gmc_funcs(struct amdgpu_device *adev) { - if (adev->gmc.gmc_funcs == NULL) - adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; + adev->gmc.gmc_funcs = &gmc_v9_0_gmc_funcs; } static int gmc_v9_0_early_init(void *handle) @@ -641,40 +695,69 @@ static int gmc_v9_0_ecc_available(struct amdgpu_device *adev) return lost_sheep == 0; } -static int gmc_v9_0_late_init(void *handle) +static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev) { - struct amdgpu_device *adev = (struct amdgpu_device *)handle; + /* - * The latest engine allocation on gfx9 is: - * Engine 0, 1: idle - * Engine 2, 3: firmware - * Engine 4~13: amdgpu ring, subject to change when ring number changes - * Engine 14~15: idle - * Engine 16: kfd tlb invalidation - * Engine 17: Gart flushes + * TODO: + * Currently there is a bug where some memory client outside + * of the driver writes to first 8M of VRAM on S3 resume, + * this overrides GART which by default gets placed in first 8M and + * causes VM_FAULTS once GTT is accessed. + * Keep the stolen memory reservation until the while this is not solved. + * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init */ - unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 }; + switch (adev->asic_type) { + case CHIP_VEGA10: + return true; + case CHIP_RAVEN: + case CHIP_VEGA12: + case CHIP_VEGA20: + default: + return false; + } +} + +static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring; + unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] = + {GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP}; unsigned i; - int r; + unsigned vmhub, inv_eng; - /* - * TODO - Uncomment once GART corruption issue is fixed. - */ - /* amdgpu_bo_late_init(adev); */ + for (i = 0; i < adev->num_rings; ++i) { + ring = adev->rings[i]; + vmhub = ring->funcs->vmhub; - for(i = 0; i < adev->num_rings; ++i) { - struct amdgpu_ring *ring = adev->rings[i]; - unsigned vmhub = ring->funcs->vmhub; + inv_eng = ffs(vm_inv_engs[vmhub]); + if (!inv_eng) { + dev_err(adev->dev, "no VM inv eng for ring %s\n", + ring->name); + return -EINVAL; + } + + ring->vm_inv_eng = inv_eng - 1; + change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub])); - ring->vm_inv_eng = vm_inv_eng[vmhub]++; - dev_info(adev->dev, "ring %u(%s) uses VM inv eng %u on hub %u\n", - ring->idx, ring->name, ring->vm_inv_eng, - ring->funcs->vmhub); + dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n", + ring->name, ring->vm_inv_eng, ring->funcs->vmhub); } - /* Engine 16 is used for KFD and 17 for GART flushes */ - for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i) - BUG_ON(vm_inv_eng[i] > 16); + return 0; +} + +static int gmc_v9_0_late_init(void *handle) +{ + struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; + + if (!gmc_v9_0_keep_stolen_memory(adev)) + amdgpu_bo_late_init(adev); + + r = gmc_v9_0_allocate_vm_inv_eng(adev); + if (r) + return r; if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) { r = gmc_v9_0_ecc_available(adev); @@ -698,10 +781,18 @@ static void gmc_v9_0_vram_gtt_location(struct amdgpu_device *adev, u64 base = 0; if (!amdgpu_sriov_vf(adev)) base = mmhub_v1_0_get_fb_location(adev); - amdgpu_device_vram_location(adev, &adev->gmc, base); - amdgpu_device_gart_location(adev, mc); + /* add the xgmi offset of the physical node */ + base += adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; + amdgpu_gmc_vram_location(adev, &adev->gmc, base); + amdgpu_gmc_gart_location(adev, mc); + if (!amdgpu_sriov_vf(adev)) + amdgpu_gmc_agp_location(adev, mc); /* base offset of vram pages */ adev->vm_manager.vram_base_offset = gfxhub_v1_0_get_mc_fb_offset(adev); + + /* XXX: add the xgmi offset of the physical node? */ + adev->vm_manager.vram_base_offset += + adev->gmc.xgmi.physical_node_id * adev->gmc.xgmi.node_segment_size; } /** @@ -781,7 +872,7 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) { int r; - if (adev->gart.robj) { + if (adev->gart.bo) { WARN(1, "VEGA10 PCIE GART already initialized\n"); return 0; } @@ -797,18 +888,16 @@ static int gmc_v9_0_gart_init(struct amdgpu_device *adev) static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) { -#if 0 u32 d1vga_control = RREG32_SOC15(DCE, 0, mmD1VGA_CONTROL); -#endif unsigned size; /* * TODO Remove once GART corruption is resolved * Check related code in gmc_v9_0_sw_fini * */ - size = 9 * 1024 * 1024; + if (gmc_v9_0_keep_stolen_memory(adev)) + return 9 * 1024 * 1024; -#if 0 if (REG_GET_FIELD(d1vga_control, D1VGA_CONTROL, D1VGA_MODE_ENABLE)) { size = 9 * 1024 * 1024; /* reserve 8MB for vga emulator and 1 MB for FB */ } else { @@ -825,6 +914,7 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) break; case CHIP_VEGA10: case CHIP_VEGA12: + case CHIP_VEGA20: default: viewport = RREG32_SOC15(DCE, 0, mmSCL0_VIEWPORT_SIZE); size = (REG_GET_FIELD(viewport, SCL0_VIEWPORT_SIZE, VIEWPORT_HEIGHT) * @@ -837,7 +927,6 @@ static unsigned gmc_v9_0_get_vbios_fb_size(struct amdgpu_device *adev) if ((adev->gmc.real_vram_size - size) < (8 * 1024 * 1024)) return 0; -#endif return size; } @@ -881,6 +970,9 @@ static int gmc_v9_0_sw_init(void *handle) /* This interrupt is VMC page fault.*/ r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT, &adev->gmc.vm_fault); + if (r) + return r; + r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT, &adev->gmc.vm_fault); @@ -913,6 +1005,12 @@ static int gmc_v9_0_sw_init(void *handle) } adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits); + if (adev->gmc.xgmi.supported) { + r = gfxhub_v1_1_get_xgmi_info(adev); + if (r) + return r; + } + r = gmc_v9_0_mc_init(adev); if (r) return r; @@ -949,16 +1047,8 @@ static int gmc_v9_0_sw_fini(void *handle) amdgpu_gem_force_release(adev); amdgpu_vm_manager_fini(adev); - /* - * TODO: - * Currently there is a bug where some memory client outside - * of the driver writes to first 8M of VRAM on S3 resume, - * this overrides GART which by default gets placed in first 8M and - * causes VM_FAULTS once GTT is accessed. - * Keep the stolen memory reservation until the while this is not solved. - * Also check code in gmc_v9_0_get_vbios_fb_size and gmc_v9_0_late_init - */ - amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); + if (gmc_v9_0_keep_stolen_memory(adev)) + amdgpu_bo_free_kernel(&adev->stolen_vga_memory, NULL, NULL); amdgpu_gart_table_vram_free(adev); amdgpu_bo_fini(adev); @@ -1007,7 +1097,7 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) golden_settings_vega10_hdp, ARRAY_SIZE(golden_settings_vega10_hdp)); - if (adev->gart.robj == NULL) { + if (adev->gart.bo == NULL) { dev_err(adev->dev, "No VRAM object for PCIE GART.\n"); return -EINVAL; } @@ -1017,7 +1107,6 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) switch (adev->asic_type) { case CHIP_RAVEN: - mmhub_v1_0_initialize_power_gating(adev); mmhub_v1_0_update_power_gating(adev, true); break; default: @@ -1047,11 +1136,11 @@ static int gmc_v9_0_gart_enable(struct amdgpu_device *adev) gfxhub_v1_0_set_fault_enable_default(adev, value); mmhub_v1_0_set_fault_enable_default(adev, value); - gmc_v9_0_flush_gpu_tlb(adev, 0); + gmc_v9_0_flush_gpu_tlb(adev, 0, 0); DRM_INFO("PCIE GART of %uM enabled (table at 0x%016llX).\n", (unsigned)(adev->gmc.gart_size >> 20), - (unsigned long long)adev->gart.table_addr); + (unsigned long long)amdgpu_bo_gpu_offset(adev->gart.bo)); adev->gart.ready = true; return 0; } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h index b030ca5ea107..5c8deac65580 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h @@ -24,6 +24,16 @@ #ifndef __GMC_V9_0_H__ #define __GMC_V9_0_H__ + /* + * The latest engine allocation on gfx9 is: + * Engine 2, 3: firmware + * Engine 0, 1, 4~16: amdgpu ring, + * subject to change when ring number changes + * Engine 17: Gart flushes + */ +#define GFXHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 +#define MMHUB_FREE_VM_INV_ENGS_BITMAP 0x1FFF3 + extern const struct amd_ip_funcs gmc_v9_0_ip_funcs; extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block; diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c index 842c4b677b4d..a3984d10b604 100644 --- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c @@ -208,34 +208,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev) } /** - * iceland_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * iceland_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -255,7 +227,7 @@ static void iceland_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -297,7 +269,7 @@ static int iceland_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -311,7 +283,7 @@ static int iceland_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -440,15 +412,13 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = { static const struct amdgpu_ih_funcs iceland_ih_funcs = { .get_wptr = iceland_ih_get_wptr, - .prescreen_iv = iceland_ih_prescreen_iv, .decode_iv = iceland_ih_decode_iv, .set_rptr = iceland_ih_set_rptr }; static void iceland_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &iceland_ih_funcs; + adev->irq.ih_funcs = &iceland_ih_funcs; } const struct amdgpu_ip_block_version iceland_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c index cb79a93c2eb7..0c9a2c03504e 100644 --- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c @@ -508,19 +508,19 @@ static int kv_enable_didt(struct amdgpu_device *adev, bool enable) pi->caps_db_ramping || pi->caps_td_ramping || pi->caps_tcp_ramping) { - adev->gfx.rlc.funcs->enter_safe_mode(adev); + amdgpu_gfx_rlc_enter_safe_mode(adev); if (enable) { ret = kv_program_pt_config_registers(adev, didt_config_kv); if (ret) { - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); return ret; } } kv_do_enable_didt(adev, enable); - adev->gfx.rlc.funcs->exit_safe_mode(adev); + amdgpu_gfx_rlc_exit_safe_mode(adev); } return 0; @@ -2995,12 +2995,12 @@ static int kv_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c index e70a0d4d6db4..d0d966d6080a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c @@ -38,33 +38,39 @@ u64 mmhub_v1_0_get_fb_location(struct amdgpu_device *adev) { u64 base = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_BASE); + u64 top = RREG32_SOC15(MMHUB, 0, mmMC_VM_FB_LOCATION_TOP); base &= MC_VM_FB_LOCATION_BASE__FB_BASE_MASK; base <<= 24; + top &= MC_VM_FB_LOCATION_TOP__FB_TOP_MASK; + top <<= 24; + + adev->gmc.fb_start = base; + adev->gmc.fb_end = top; + return base; } -static void mmhub_v1_0_init_gart_pt_regs(struct amdgpu_device *adev) +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base) { - uint64_t value; + /* two registers distance between mmVM_CONTEXT0_* to mmVM_CONTEXT1_* */ + int offset = mmVM_CONTEXT1_PAGE_TABLE_BASE_ADDR_LO32 + - mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32; - BUG_ON(adev->gart.table_addr & (~0x0000FFFFFFFFF000ULL)); - value = adev->gart.table_addr - adev->gmc.vram_start + - adev->vm_manager.vram_base_offset; - value &= 0x0000FFFFFFFFF000ULL; - value |= 0x1; /* valid bit */ - - WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, - lower_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32, + offset * vmid, lower_32_bits(page_table_base)); - WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, - upper_32_bits(value)); + WREG32_SOC15_OFFSET(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32, + offset * vmid, upper_32_bits(page_table_base)); } static void mmhub_v1_0_init_gart_aperture_regs(struct amdgpu_device *adev) { - mmhub_v1_0_init_gart_pt_regs(adev); + uint64_t pt_base = amdgpu_gmc_pd_addr(adev->gart.bo); + + mmhub_v1_0_setup_vm_pt_regs(adev, 0, pt_base); WREG32_SOC15(MMHUB, 0, mmVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32, (u32)(adev->gmc.gart_start >> 12)); @@ -82,16 +88,28 @@ static void mmhub_v1_0_init_system_aperture_regs(struct amdgpu_device *adev) uint64_t value; uint32_t tmp; - /* Disable AGP. */ + /* Program the AGP BAR */ WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BASE, 0); - WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, 0); - WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, 0x00FFFFFF); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_BOT, adev->gmc.agp_start >> 24); + WREG32_SOC15(MMHUB, 0, mmMC_VM_AGP_TOP, adev->gmc.agp_end >> 24); /* Program the system aperture low logical page number. */ WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_LOW_ADDR, - adev->gmc.vram_start >> 18); - WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, - adev->gmc.vram_end >> 18); + min(adev->gmc.fb_start, adev->gmc.agp_start) >> 18); + + if (adev->asic_type == CHIP_RAVEN && adev->rev_id >= 0x8) + /* + * Raven2 has a HW issue that it is unable to use the vram which + * is out of MC_VM_SYSTEM_APERTURE_HIGH_ADDR. So here is the + * workaround that increase system aperture high address (add 1) + * to get rid of the VM fault and hardware hang. + */ + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max((adev->gmc.fb_end >> 18) + 0x1, + adev->gmc.agp_end >> 18)); + else + WREG32_SOC15(MMHUB, 0, mmMC_VM_SYSTEM_APERTURE_HIGH_ADDR, + max(adev->gmc.fb_end, adev->gmc.agp_end) >> 18); /* Set default page address. */ value = adev->vram_scratch.gpu_addr - adev->gmc.vram_start + @@ -260,236 +278,16 @@ static void mmhub_v1_0_program_invalidation(struct amdgpu_device *adev) } } -struct pctl_data { - uint32_t index; - uint32_t data; -}; - -static const struct pctl_data pctl0_data[] = { - {0x0, 0x7a640}, - {0x9, 0x2a64a}, - {0xd, 0x2a680}, - {0x11, 0x6a684}, - {0x19, 0xea68e}, - {0x29, 0xa69e}, - {0x2b, 0x0010a6c0}, - {0x3d, 0x83a707}, - {0xc2, 0x8a7a4}, - {0xcc, 0x1a7b8}, - {0xcf, 0xfa7cc}, - {0xe0, 0x17a7dd}, - {0xf9, 0xa7dc}, - {0xfb, 0x12a7f5}, - {0x10f, 0xa808}, - {0x111, 0x12a810}, - {0x125, 0x7a82c} -}; -#define PCTL0_DATA_LEN (ARRAY_SIZE(pctl0_data)) - -#define PCTL0_RENG_EXEC_END_PTR 0x12d -#define PCTL0_STCTRL_REG_SAVE_RANGE0_BASE 0xa640 -#define PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa833 - -static const struct pctl_data pctl1_data[] = { - {0x0, 0x39a000}, - {0x3b, 0x44a040}, - {0x81, 0x2a08d}, - {0x85, 0x6ba094}, - {0xf2, 0x18a100}, - {0x10c, 0x4a132}, - {0x112, 0xca141}, - {0x120, 0x2fa158}, - {0x151, 0x17a1d0}, - {0x16a, 0x1a1e9}, - {0x16d, 0x13a1ec}, - {0x182, 0x7a201}, - {0x18b, 0x3a20a}, - {0x190, 0x7a580}, - {0x199, 0xa590}, - {0x19b, 0x4a594}, - {0x1a1, 0x1a59c}, - {0x1a4, 0x7a82c}, - {0x1ad, 0xfa7cc}, - {0x1be, 0x17a7dd}, - {0x1d7, 0x12a810}, - {0x1eb, 0x4000a7e1}, - {0x1ec, 0x5000a7f5}, - {0x1ed, 0x4000a7e2}, - {0x1ee, 0x5000a7dc}, - {0x1ef, 0x4000a7e3}, - {0x1f0, 0x5000a7f6}, - {0x1f1, 0x5000a7e4} -}; -#define PCTL1_DATA_LEN (ARRAY_SIZE(pctl1_data)) - -#define PCTL1_RENG_EXEC_END_PTR 0x1f1 -#define PCTL1_STCTRL_REG_SAVE_RANGE0_BASE 0xa000 -#define PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT 0xa20d -#define PCTL1_STCTRL_REG_SAVE_RANGE1_BASE 0xa580 -#define PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT 0xa59d -#define PCTL1_STCTRL_REG_SAVE_RANGE2_BASE 0xa82c -#define PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT 0xa833 - -static void mmhub_v1_0_power_gating_write_save_ranges(struct amdgpu_device *adev) -{ - uint32_t tmp = 0; - - /* PCTL0_STCTRL_REGISTER_SAVE_RANGE0 */ - tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, - STCTRL_REGISTER_SAVE_BASE, - PCTL0_STCTRL_REG_SAVE_RANGE0_BASE); - tmp = REG_SET_FIELD(tmp, PCTL0_STCTRL_REGISTER_SAVE_RANGE0, - STCTRL_REGISTER_SAVE_LIMIT, - PCTL0_STCTRL_REG_SAVE_RANGE0_LIMIT); - WREG32_SOC15(MMHUB, 0, mmPCTL0_STCTRL_REGISTER_SAVE_RANGE0, tmp); - - /* PCTL1_STCTRL_REGISTER_SAVE_RANGE0 */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, - STCTRL_REGISTER_SAVE_BASE, - PCTL1_STCTRL_REG_SAVE_RANGE0_BASE); - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE0, - STCTRL_REGISTER_SAVE_LIMIT, - PCTL1_STCTRL_REG_SAVE_RANGE0_LIMIT); - WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE0, tmp); - - /* PCTL1_STCTRL_REGISTER_SAVE_RANGE1 */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, - STCTRL_REGISTER_SAVE_BASE, - PCTL1_STCTRL_REG_SAVE_RANGE1_BASE); - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE1, - STCTRL_REGISTER_SAVE_LIMIT, - PCTL1_STCTRL_REG_SAVE_RANGE1_LIMIT); - WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE1, tmp); - - /* PCTL1_STCTRL_REGISTER_SAVE_RANGE2 */ - tmp = 0; - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, - STCTRL_REGISTER_SAVE_BASE, - PCTL1_STCTRL_REG_SAVE_RANGE2_BASE); - tmp = REG_SET_FIELD(tmp, PCTL1_STCTRL_REGISTER_SAVE_RANGE2, - STCTRL_REGISTER_SAVE_LIMIT, - PCTL1_STCTRL_REG_SAVE_RANGE2_LIMIT); - WREG32_SOC15(MMHUB, 0, mmPCTL1_STCTRL_REGISTER_SAVE_RANGE2, tmp); -} - -void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev) -{ - uint32_t pctl0_misc = 0; - uint32_t pctl0_reng_execute = 0; - uint32_t pctl1_misc = 0; - uint32_t pctl1_reng_execute = 0; - int i = 0; - - if (amdgpu_sriov_vf(adev)) - return; - - /****************** pctl0 **********************/ - pctl0_misc = RREG32_SOC15(MMHUB, 0, mmPCTL0_MISC); - pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); - - /* Light sleep must be disabled before writing to pctl0 registers */ - pctl0_misc &= ~PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); - - /* Write data used to access ram of register engine */ - for (i = 0; i < PCTL0_DATA_LEN; i++) { - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_INDEX, - pctl0_data[i].index); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_RAM_DATA, - pctl0_data[i].data); - } - - /* Re-enable light sleep */ - pctl0_misc |= PCTL0_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL0_MISC, pctl0_misc); - - /****************** pctl1 **********************/ - pctl1_misc = RREG32_SOC15(MMHUB, 0, mmPCTL1_MISC); - pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); - - /* Light sleep must be disabled before writing to pctl1 registers */ - pctl1_misc &= ~PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); - - /* Write data used to access ram of register engine */ - for (i = 0; i < PCTL1_DATA_LEN; i++) { - WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_INDEX, - pctl1_data[i].index); - WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_RAM_DATA, - pctl1_data[i].data); - } - - /* Re-enable light sleep */ - pctl1_misc |= PCTL1_MISC__RENG_MEM_LS_ENABLE_MASK; - WREG32_SOC15(MMHUB, 0, mmPCTL1_MISC, pctl1_misc); - - mmhub_v1_0_power_gating_write_save_ranges(adev); - - /* Set the reng execute end ptr for pctl0 */ - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_END_PTR, - PCTL0_RENG_EXEC_END_PTR); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); - - /* Set the reng execute end ptr for pctl1 */ - pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, - PCTL1_RENG_EXECUTE, - RENG_EXECUTE_END_PTR, - PCTL1_RENG_EXEC_END_PTR); - WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); -} - void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, bool enable) { - uint32_t pctl0_reng_execute = 0; - uint32_t pctl1_reng_execute = 0; - if (amdgpu_sriov_vf(adev)) return; - pctl0_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE); - pctl1_reng_execute = RREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE); - if (enable && adev->pg_flags & AMD_PG_SUPPORT_MMHUB) { - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_ON_PWR_UP, 1); - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_ON_REG_UPDATE, 1); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); - - pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, - PCTL1_RENG_EXECUTE, - RENG_EXECUTE_ON_PWR_UP, 1); - pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, - PCTL1_RENG_EXECUTE, - RENG_EXECUTE_ON_REG_UPDATE, 1); - WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); - - if (adev->powerplay.pp_funcs->set_powergating_by_smu) + if (adev->powerplay.pp_funcs && adev->powerplay.pp_funcs->set_powergating_by_smu) amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GMC, true); - } else { - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_ON_PWR_UP, 0); - pctl0_reng_execute = REG_SET_FIELD(pctl0_reng_execute, - PCTL0_RENG_EXECUTE, - RENG_EXECUTE_ON_REG_UPDATE, 0); - WREG32_SOC15(MMHUB, 0, mmPCTL0_RENG_EXECUTE, pctl0_reng_execute); - - pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, - PCTL1_RENG_EXECUTE, - RENG_EXECUTE_ON_PWR_UP, 0); - pctl1_reng_execute = REG_SET_FIELD(pctl1_reng_execute, - PCTL1_RENG_EXECUTE, - RENG_EXECUTE_ON_REG_UPDATE, 0); - WREG32_SOC15(MMHUB, 0, mmPCTL1_RENG_EXECUTE, pctl1_reng_execute); } } diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h index 5d38229baf69..0de0fdf98c00 100644 --- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h +++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.h @@ -32,8 +32,9 @@ void mmhub_v1_0_init(struct amdgpu_device *adev); int mmhub_v1_0_set_clockgating(struct amdgpu_device *adev, enum amd_clockgating_state state); void mmhub_v1_0_get_clockgating(struct amdgpu_device *adev, u32 *flags); -void mmhub_v1_0_initialize_power_gating(struct amdgpu_device *adev); void mmhub_v1_0_update_power_gating(struct amdgpu_device *adev, bool enable); +void mmhub_v1_0_setup_vm_pt_regs(struct amdgpu_device *adev, uint32_t vmid, + uint64_t page_table_base); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c index 078f70faedcb..8cbb4655896a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c @@ -266,8 +266,8 @@ flr_done: } /* Trigger recovery for world switch failure if no TDR */ - if (amdgpu_lockup_timeout == 0) - amdgpu_device_gpu_recover(adev, NULL, true); + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL); } static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev, diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c index 9fc1c37344ce..6a0fcd67662a 100644 --- a/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c +++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_vi.c @@ -37,7 +37,6 @@ #include "gmc/gmc_8_2_sh_mask.h" #include "oss/oss_3_0_d.h" #include "oss/oss_3_0_sh_mask.h" -#include "gca/gfx_8_0_sh_mask.h" #include "dce/dce_10_0_d.h" #include "dce/dce_10_0_sh_mask.h" #include "smu/smu_7_1_3_d.h" @@ -521,7 +520,8 @@ static void xgpu_vi_mailbox_flr_work(struct work_struct *work) } /* Trigger recovery due to world switch failure */ - amdgpu_device_gpu_recover(adev, NULL, false); + if (amdgpu_device_should_recover_gpu(adev)) + amdgpu_device_gpu_recover(adev, NULL); } static int xgpu_vi_set_mailbox_rcv_irq(struct amdgpu_device *adev, @@ -579,11 +579,11 @@ int xgpu_vi_mailbox_add_irq_id(struct amdgpu_device *adev) { int r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq); if (r) return r; - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 138, &adev->virt.ack_irq); if (r) { amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0); return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c index 6f9c54978cc1..accdedd63c98 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c @@ -32,6 +32,7 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 #define smnPCIE_CONFIG_CNTL 0x11180044 +#define smnPCIE_CI_CNTL 0x11180080 static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev) { @@ -270,6 +271,12 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev) if (def != data) WREG32_PCIE(smnPCIE_CONFIG_CNTL, data); + + def = data = RREG32_PCIE(smnPCIE_CI_CNTL); + data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CI_CNTL, data); } const struct amdgpu_nbio_funcs nbio_v6_1_funcs = { diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c index 365517c0121e..df34dc79d444 100644 --- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_0.c @@ -34,19 +34,10 @@ #define smnCPM_CONTROL 0x11180460 #define smnPCIE_CNTL2 0x11180070 -/* vega20 */ -#define mmRCC_DEV0_EPF0_STRAP0_VG20 0x0011 -#define mmRCC_DEV0_EPF0_STRAP0_VG20_BASE_IDX 2 - static u32 nbio_v7_0_get_rev_id(struct amdgpu_device *adev) { u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); - if (adev->asic_type == CHIP_VEGA20) - tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0_VG20); - else - tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); - tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; @@ -84,14 +75,10 @@ static void nbio_v7_0_sdma_doorbell_range(struct amdgpu_device *adev, int instan SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); u32 doorbell_range = RREG32(reg); - u32 range = 2; - - if (adev->asic_type == CHIP_VEGA20) - range = 8; if (use_doorbell) { doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); - doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, range); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); } else doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); @@ -146,9 +133,6 @@ static void nbio_v7_0_update_medium_grain_clock_gating(struct amdgpu_device *ade { uint32_t def, data; - if (adev->asic_type == CHIP_VEGA20) - return; - /* NBIF_MGCG_CTRL_LCLK */ def = data = RREG32_PCIE(smnNBIF_MGCG_CTRL_LCLK); diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c new file mode 100644 index 000000000000..4cd31a276dcd --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c @@ -0,0 +1,255 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#include "amdgpu.h" +#include "amdgpu_atombios.h" +#include "nbio_v7_4.h" + +#include "nbio/nbio_7_4_offset.h" +#include "nbio/nbio_7_4_sh_mask.h" + +#define smnNBIF_MGCG_CTRL_LCLK 0x1013a21c + +#define smnCPM_CONTROL 0x11180460 +#define smnPCIE_CNTL2 0x11180070 +#define smnPCIE_CI_CNTL 0x11180080 + +static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev) +{ + u32 tmp = RREG32_SOC15(NBIO, 0, mmRCC_DEV0_EPF0_STRAP0); + + tmp &= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0_MASK; + tmp >>= RCC_DEV0_EPF0_STRAP0__STRAP_ATI_REV_ID_DEV0_F0__SHIFT; + + return tmp; +} + +static void nbio_v7_4_mc_access_enable(struct amdgpu_device *adev, bool enable) +{ + if (enable) + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, + BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK); + else + WREG32_SOC15(NBIO, 0, mmBIF_FB_EN, 0); +} + +static void nbio_v7_4_hdp_flush(struct amdgpu_device *adev, + struct amdgpu_ring *ring) +{ + if (!ring || !ring->funcs->emit_wreg) + WREG32_SOC15_NO_KIQ(NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL, 0); + else + amdgpu_ring_emit_wreg(ring, SOC15_REG_OFFSET( + NBIO, 0, mmHDP_MEM_COHERENCY_FLUSH_CNTL), 0); +} + +static u32 nbio_v7_4_get_memsize(struct amdgpu_device *adev) +{ + return RREG32_SOC15(NBIO, 0, mmRCC_CONFIG_MEMSIZE); +} + +static void nbio_v7_4_sdma_doorbell_range(struct amdgpu_device *adev, int instance, + bool use_doorbell, int doorbell_index) +{ + u32 reg = instance == 0 ? SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA0_DOORBELL_RANGE) : + SOC15_REG_OFFSET(NBIO, 0, mmBIF_SDMA1_DOORBELL_RANGE); + + u32 doorbell_range = RREG32(reg); + + if (use_doorbell) { + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, OFFSET, doorbell_index); + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 2); + } else + doorbell_range = REG_SET_FIELD(doorbell_range, BIF_SDMA0_DOORBELL_RANGE, SIZE, 0); + + WREG32(reg, doorbell_range); +} + +static void nbio_v7_4_enable_doorbell_aperture(struct amdgpu_device *adev, + bool enable) +{ + WREG32_FIELD15(NBIO, 0, RCC_DOORBELL_APER_EN, BIF_DOORBELL_APER_EN, enable ? 1 : 0); +} + +static void nbio_v7_4_enable_doorbell_selfring_aperture(struct amdgpu_device *adev, + bool enable) +{ + +} + +static void nbio_v7_4_ih_doorbell_range(struct amdgpu_device *adev, + bool use_doorbell, int doorbell_index) +{ + u32 ih_doorbell_range = RREG32_SOC15(NBIO, 0 , mmBIF_IH_DOORBELL_RANGE); + + if (use_doorbell) { + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, OFFSET, doorbell_index); + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 2); + } else + ih_doorbell_range = REG_SET_FIELD(ih_doorbell_range, BIF_IH_DOORBELL_RANGE, SIZE, 0); + + WREG32_SOC15(NBIO, 0, mmBIF_IH_DOORBELL_RANGE, ih_doorbell_range); +} + + +static void nbio_v7_4_update_medium_grain_clock_gating(struct amdgpu_device *adev, + bool enable) +{ + //TODO: Add support for v7.4 +} + +static void nbio_v7_4_update_medium_grain_light_sleep(struct amdgpu_device *adev, + bool enable) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CNTL2); + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) { + data |= (PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } else { + data &= ~(PCIE_CNTL2__SLV_MEM_LS_EN_MASK | + PCIE_CNTL2__MST_MEM_LS_EN_MASK | + PCIE_CNTL2__REPLAY_MEM_LS_EN_MASK); + } + + if (def != data) + WREG32_PCIE(smnPCIE_CNTL2, data); +} + +static void nbio_v7_4_get_clockgating_state(struct amdgpu_device *adev, + u32 *flags) +{ + int data; + + /* AMD_CG_SUPPORT_BIF_MGCG */ + data = RREG32_PCIE(smnCPM_CONTROL); + if (data & CPM_CONTROL__LCLK_DYN_GATE_ENABLE_MASK) + *flags |= AMD_CG_SUPPORT_BIF_MGCG; + + /* AMD_CG_SUPPORT_BIF_LS */ + data = RREG32_PCIE(smnPCIE_CNTL2); + if (data & PCIE_CNTL2__SLV_MEM_LS_EN_MASK) + *flags |= AMD_CG_SUPPORT_BIF_LS; +} + +static void nbio_v7_4_ih_control(struct amdgpu_device *adev) +{ + u32 interrupt_cntl; + + /* setup interrupt control */ + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL2, adev->dummy_page_addr >> 8); + interrupt_cntl = RREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL); + /* INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=0 - dummy read disabled with msi, enabled without msi + * INTERRUPT_CNTL__IH_DUMMY_RD_OVERRIDE_MASK=1 - dummy read controlled by IH_DUMMY_RD_EN + */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_DUMMY_RD_OVERRIDE, 0); + /* INTERRUPT_CNTL__IH_REQ_NONSNOOP_EN_MASK=1 if ring is in non-cacheable memory, e.g., vram */ + interrupt_cntl = REG_SET_FIELD(interrupt_cntl, INTERRUPT_CNTL, IH_REQ_NONSNOOP_EN, 0); + WREG32_SOC15(NBIO, 0, mmINTERRUPT_CNTL, interrupt_cntl); +} + +static u32 nbio_v7_4_get_hdp_flush_req_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_REQ); +} + +static u32 nbio_v7_4_get_hdp_flush_done_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmGPU_HDP_FLUSH_DONE); +} + +static u32 nbio_v7_4_get_pcie_index_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2); +} + +static u32 nbio_v7_4_get_pcie_data_offset(struct amdgpu_device *adev) +{ + return SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2); +} + +static const struct nbio_hdp_flush_reg nbio_v7_4_hdp_flush_reg = { + .ref_and_mask_cp0 = GPU_HDP_FLUSH_DONE__CP0_MASK, + .ref_and_mask_cp1 = GPU_HDP_FLUSH_DONE__CP1_MASK, + .ref_and_mask_cp2 = GPU_HDP_FLUSH_DONE__CP2_MASK, + .ref_and_mask_cp3 = GPU_HDP_FLUSH_DONE__CP3_MASK, + .ref_and_mask_cp4 = GPU_HDP_FLUSH_DONE__CP4_MASK, + .ref_and_mask_cp5 = GPU_HDP_FLUSH_DONE__CP5_MASK, + .ref_and_mask_cp6 = GPU_HDP_FLUSH_DONE__CP6_MASK, + .ref_and_mask_cp7 = GPU_HDP_FLUSH_DONE__CP7_MASK, + .ref_and_mask_cp8 = GPU_HDP_FLUSH_DONE__CP8_MASK, + .ref_and_mask_cp9 = GPU_HDP_FLUSH_DONE__CP9_MASK, + .ref_and_mask_sdma0 = GPU_HDP_FLUSH_DONE__SDMA0_MASK, + .ref_and_mask_sdma1 = GPU_HDP_FLUSH_DONE__SDMA1_MASK, +}; + +static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev) +{ + uint32_t reg; + + reg = RREG32_SOC15(NBIO, 0, mmRCC_IOV_FUNC_IDENTIFIER); + if (reg & 1) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_IS_VF; + + if (reg & 0x80000000) + adev->virt.caps |= AMDGPU_SRIOV_CAPS_ENABLE_IOV; + + if (!reg) { + if (is_virtual_machine()) /* passthrough mode exclus sriov mod */ + adev->virt.caps |= AMDGPU_PASSTHROUGH_MODE; + } +} + +static void nbio_v7_4_init_registers(struct amdgpu_device *adev) +{ + uint32_t def, data; + + def = data = RREG32_PCIE(smnPCIE_CI_CNTL); + data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1); + + if (def != data) + WREG32_PCIE(smnPCIE_CI_CNTL, data); +} + +const struct amdgpu_nbio_funcs nbio_v7_4_funcs = { + .hdp_flush_reg = &nbio_v7_4_hdp_flush_reg, + .get_hdp_flush_req_offset = nbio_v7_4_get_hdp_flush_req_offset, + .get_hdp_flush_done_offset = nbio_v7_4_get_hdp_flush_done_offset, + .get_pcie_index_offset = nbio_v7_4_get_pcie_index_offset, + .get_pcie_data_offset = nbio_v7_4_get_pcie_data_offset, + .get_rev_id = nbio_v7_4_get_rev_id, + .mc_access_enable = nbio_v7_4_mc_access_enable, + .hdp_flush = nbio_v7_4_hdp_flush, + .get_memsize = nbio_v7_4_get_memsize, + .sdma_doorbell_range = nbio_v7_4_sdma_doorbell_range, + .enable_doorbell_aperture = nbio_v7_4_enable_doorbell_aperture, + .enable_doorbell_selfring_aperture = nbio_v7_4_enable_doorbell_selfring_aperture, + .ih_doorbell_range = nbio_v7_4_ih_doorbell_range, + .update_medium_grain_clock_gating = nbio_v7_4_update_medium_grain_clock_gating, + .update_medium_grain_light_sleep = nbio_v7_4_update_medium_grain_light_sleep, + .get_clockgating_state = nbio_v7_4_get_clockgating_state, + .ih_control = nbio_v7_4_ih_control, + .init_registers = nbio_v7_4_init_registers, + .detect_hw_virt = nbio_v7_4_detect_hw_virt, +}; diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h new file mode 100644 index 000000000000..c442865bac4f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.h @@ -0,0 +1,31 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef __NBIO_V7_4_H__ +#define __NBIO_V7_4_H__ + +#include "soc15_common.h" + +extern const struct amdgpu_nbio_funcs nbio_v7_4_funcs; + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h index 0cf48d26c676..0de00fbe9233 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h +++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h @@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id GFX_CTRL_CMD_ID_ENABLE_INT = 0x00050000, /* enable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_DISABLE_INT = 0x00060000, /* disable PSP-to-Gfx interrupt */ GFX_CTRL_CMD_ID_MODE1_RST = 0x00070000, /* trigger the Mode 1 reset */ + GFX_CTRL_CMD_ID_CONSUME_CMD = 0x000A0000, /* send interrupt to psp for updating write pointer of vf */ + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */ GFX_CTRL_CMD_ID_MAX = 0x000F0000, /* max command ID */ }; @@ -89,7 +91,8 @@ enum psp_gfx_cmd_id GFX_CMD_ID_LOAD_IP_FW = 0x00000006, /* load HW IP FW */ GFX_CMD_ID_DESTROY_TMR = 0x00000007, /* destroy TMR region */ GFX_CMD_ID_SAVE_RESTORE = 0x00000008, /* save/restore HW IP FW */ - + GFX_CMD_ID_SETUP_VMR = 0x00000009, /* setup VMR region */ + GFX_CMD_ID_DESTROY_VMR = 0x0000000A, /* destroy VMR region */ }; @@ -189,7 +192,8 @@ enum psp_gfx_fw_type GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM = 20, GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM = 21, GFX_FW_TYPE_RLC_RESTORE_LIST_CNTL = 22, - GFX_FW_TYPE_MAX = 23 + GFX_FW_TYPE_UVD1 = 23, + GFX_FW_TYPE_MAX = 24 }; /* Command to load HW IP FW. */ diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c index 02be34e72ed9..d78b4306a36f 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c @@ -35,6 +35,8 @@ #include "sdma0/sdma0_4_1_offset.h" MODULE_FIRMWARE("amdgpu/raven_asd.bin"); +MODULE_FIRMWARE("amdgpu/picasso_asd.bin"); +MODULE_FIRMWARE("amdgpu/raven2_asd.bin"); static int psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) @@ -91,6 +93,12 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type * case AMDGPU_UCODE_ID_VCN: *type = GFX_FW_TYPE_VCN; break; + case AMDGPU_UCODE_ID_DMCU_ERAM: + *type = GFX_FW_TYPE_DMCU_ERAM; + break; + case AMDGPU_UCODE_ID_DMCU_INTV: + *type = GFX_FW_TYPE_DMCU_ISR; + break; case AMDGPU_UCODE_ID_MAXIMUM: default: return -EINVAL; @@ -111,7 +119,12 @@ static int psp_v10_0_init_microcode(struct psp_context *psp) switch (adev->asic_type) { case CHIP_RAVEN: - chip_name = "raven"; + if (adev->rev_id >= 0x8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; break; default: BUG(); } @@ -227,12 +240,9 @@ static int psp_v10_0_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - struct psp_ring *ring; unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - ring = &psp->km_ring; - /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c new file mode 100644 index 000000000000..0c6e7f9b143f --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c @@ -0,0 +1,774 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/firmware.h> +#include "amdgpu.h" +#include "amdgpu_psp.h" +#include "amdgpu_ucode.h" +#include "soc15_common.h" +#include "psp_v11_0.h" + +#include "mp/mp_11_0_offset.h" +#include "mp/mp_11_0_sh_mask.h" +#include "gc/gc_9_0_offset.h" +#include "sdma0/sdma0_4_0_offset.h" +#include "nbio/nbio_7_4_offset.h" + +MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); +MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); +MODULE_FIRMWARE("amdgpu/vega20_ta.bin"); + +/* address block */ +#define smnMP1_FIRMWARE_FLAGS 0x3010024 + +static int +psp_v11_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *type) +{ + switch (ucode->ucode_id) { + case AMDGPU_UCODE_ID_SDMA0: + *type = GFX_FW_TYPE_SDMA0; + break; + case AMDGPU_UCODE_ID_SDMA1: + *type = GFX_FW_TYPE_SDMA1; + break; + case AMDGPU_UCODE_ID_CP_CE: + *type = GFX_FW_TYPE_CP_CE; + break; + case AMDGPU_UCODE_ID_CP_PFP: + *type = GFX_FW_TYPE_CP_PFP; + break; + case AMDGPU_UCODE_ID_CP_ME: + *type = GFX_FW_TYPE_CP_ME; + break; + case AMDGPU_UCODE_ID_CP_MEC1: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC1_JT: + *type = GFX_FW_TYPE_CP_MEC_ME1; + break; + case AMDGPU_UCODE_ID_CP_MEC2: + *type = GFX_FW_TYPE_CP_MEC; + break; + case AMDGPU_UCODE_ID_CP_MEC2_JT: + *type = GFX_FW_TYPE_CP_MEC_ME2; + break; + case AMDGPU_UCODE_ID_RLC_G: + *type = GFX_FW_TYPE_RLC_G; + break; + case AMDGPU_UCODE_ID_SMC: + *type = GFX_FW_TYPE_SMU; + break; + case AMDGPU_UCODE_ID_UVD: + *type = GFX_FW_TYPE_UVD; + break; + case AMDGPU_UCODE_ID_VCE: + *type = GFX_FW_TYPE_VCE; + break; + case AMDGPU_UCODE_ID_UVD1: + *type = GFX_FW_TYPE_UVD1; + break; + case AMDGPU_UCODE_ID_MAXIMUM: + default: + return -EINVAL; + } + + return 0; +} + +static int psp_v11_0_init_microcode(struct psp_context *psp) +{ + struct amdgpu_device *adev = psp->adev; + const char *chip_name; + char fw_name[30]; + int err = 0; + const struct psp_firmware_header_v1_0 *sos_hdr; + const struct psp_firmware_header_v1_0 *asd_hdr; + const struct ta_firmware_header_v1_0 *ta_hdr; + + DRM_DEBUG("\n"); + + switch (adev->asic_type) { + case CHIP_VEGA20: + chip_name = "vega20"; + break; + default: + BUG(); + } + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_sos.bin", chip_name); + err = request_firmware(&adev->psp.sos_fw, fw_name, adev->dev); + if (err) + goto out; + + err = amdgpu_ucode_validate(adev->psp.sos_fw); + if (err) + goto out; + + sos_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.sos_fw->data; + adev->psp.sos_fw_version = le32_to_cpu(sos_hdr->header.ucode_version); + adev->psp.sos_feature_version = le32_to_cpu(sos_hdr->ucode_feature_version); + adev->psp.sos_bin_size = le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_bin_size = le32_to_cpu(sos_hdr->header.ucode_size_bytes) - + le32_to_cpu(sos_hdr->sos_size_bytes); + adev->psp.sys_start_addr = (uint8_t *)sos_hdr + + le32_to_cpu(sos_hdr->header.ucode_array_offset_bytes); + adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr + + le32_to_cpu(sos_hdr->sos_offset_bytes); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name); + err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev); + if (err) + goto out1; + + err = amdgpu_ucode_validate(adev->psp.asd_fw); + if (err) + goto out1; + + asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data; + adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version); + adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version); + adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes); + adev->psp.asd_start_addr = (uint8_t *)asd_hdr + + le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes); + + snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name); + err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev); + if (err) + goto out2; + + err = amdgpu_ucode_validate(adev->psp.ta_fw); + if (err) + goto out2; + + ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data; + adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version); + adev->psp.ta_xgmi_ucode_size = le32_to_cpu(ta_hdr->ta_xgmi_size_bytes); + adev->psp.ta_xgmi_start_addr = (uint8_t *)ta_hdr + + le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes); + + return 0; + +out2: + release_firmware(adev->psp.ta_fw); + adev->psp.ta_fw = NULL; +out1: + release_firmware(adev->psp.asd_fw); + adev->psp.asd_fw = NULL; +out: + dev_err(adev->dev, + "psp v11.0: Failed to load firmware \"%s\"\n", fw_name); + release_firmware(adev->psp.sos_fw); + adev->psp.sos_fw = NULL; + + return err; +} + +static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp) +{ + int ret; + uint32_t psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); + return 0; + } + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy PSP System Driver binary to memory */ + memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); + + /* Provide the sys driver to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 1 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v11_0_bootloader_load_sos(struct psp_context *psp) +{ + int ret; + unsigned int psp_gfxdrv_command_reg = 0; + struct amdgpu_device *adev = psp->adev; + uint32_t sol_reg; + + /* Check sOS sign of life register to confirm sys driver and sOS + * are already been loaded. + */ + sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); + if (sol_reg) + return 0; + + /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), + 0x80000000, 0x80000000, false); + if (ret) + return ret; + + memset(psp->fw_pri_buf, 0, PSP_1_MEG); + + /* Copy Secure OS binary to PSP memory */ + memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); + + /* Provide the PSP secure OS to bootloader */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, + (uint32_t)(psp->fw_pri_mc_addr >> 20)); + psp_gfxdrv_command_reg = 2 << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_35, + psp_gfxdrv_command_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_81), + RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81), + 0, true); + + return ret; +} + +static int psp_v11_0_prep_cmd_buf(struct amdgpu_firmware_info *ucode, + struct psp_gfx_cmd_resp *cmd) +{ + int ret; + uint64_t fw_mem_mc_addr = ucode->mc_addr; + + memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp)); + + cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW; + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr); + cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size; + + ret = psp_v11_0_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type); + if (ret) + DRM_ERROR("Unknown firmware type\n"); + + return ret; +} + +static int psp_v11_0_ring_init(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring; + struct amdgpu_device *adev = psp->adev; + + ring = &psp->km_ring; + + ring->ring_type = ring_type; + + /* allocate 4k Page of Local Frame Buffer memory for ring */ + ring->ring_size = 0x1000; + ret = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE, + AMDGPU_GEM_DOMAIN_VRAM, + &adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + if (ret) { + ring->ring_size = 0; + return ret; + } + + return 0; +} + +static bool psp_v11_0_support_vmr_ring(struct psp_context *psp) +{ + if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045) + return true; + return false; +} + +static int psp_v11_0_ring_create(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + unsigned int psp_ring_reg = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + if (psp_v11_0_support_vmr_ring(psp)) { + /* Write low address of the ring to C2PMSG_102 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg); + /* Write high address of the ring to C2PMSG_103 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg); + + /* Write the ring initialization command to C2PMSG_101 */ + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_INIT_GPCOM_RING); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_101 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x8000FFFF, false); + + } else { + /* Write low address of the ring to C2PMSG_69 */ + psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg); + /* Write high address of the ring to C2PMSG_70 */ + psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg); + /* Write size of ring to C2PMSG_71 */ + psp_ring_reg = ring->ring_size; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg); + /* Write the ring initialization command to C2PMSG_64 */ + psp_ring_reg = ring_type; + psp_ring_reg = psp_ring_reg << 16; + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) in C2PMSG_64 */ + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x8000FFFF, false); + } + + return ret; +} + +static int psp_v11_0_ring_stop(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct amdgpu_device *adev = psp->adev; + + /* Write the ring destroy command*/ + if (psp_v11_0_support_vmr_ring(psp)) + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, + GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING); + else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, + GFX_CTRL_CMD_ID_DESTROY_RINGS); + + /* there might be handshake issue with hardware which needs delay */ + mdelay(20); + + /* Wait for response flag (bit 31) */ + if (psp_v11_0_support_vmr_ring(psp)) + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101), + 0x80000000, 0x80000000, false); + else + ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64), + 0x80000000, 0x80000000, false); + + return ret; +} + +static int psp_v11_0_ring_destroy(struct psp_context *psp, + enum psp_ring_type ring_type) +{ + int ret = 0; + struct psp_ring *ring = &psp->km_ring; + struct amdgpu_device *adev = psp->adev; + + ret = psp_v11_0_ring_stop(psp, ring_type); + if (ret) + DRM_ERROR("Fail to stop psp ring\n"); + + amdgpu_bo_free_kernel(&adev->firmware.rbuf, + &ring->ring_mem_mc_addr, + (void **)&ring->ring_mem); + + return ret; +} + +static int psp_v11_0_cmd_submit(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + uint64_t cmd_buf_mc_addr, uint64_t fence_mc_addr, + int index) +{ + unsigned int psp_write_ptr_reg = 0; + struct psp_gfx_rb_frame *write_frame = psp->km_ring.ring_mem; + struct psp_ring *ring = &psp->km_ring; + struct psp_gfx_rb_frame *ring_buffer_start = ring->ring_mem; + struct psp_gfx_rb_frame *ring_buffer_end = ring_buffer_start + + ring->ring_size / sizeof(struct psp_gfx_rb_frame) - 1; + struct amdgpu_device *adev = psp->adev; + uint32_t ring_size_dw = ring->ring_size / 4; + uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4; + + /* KM (GPCOM) prepare write pointer */ + if (psp_v11_0_support_vmr_ring(psp)) + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102); + else + psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67); + + /* Update KM RB frame pointer to new frame */ + /* write_frame ptr increments by size of rb_frame in bytes */ + /* psp_write_ptr_reg increments by size of rb_frame in DWORDs */ + if ((psp_write_ptr_reg % ring_size_dw) == 0) + write_frame = ring_buffer_start; + else + write_frame = ring_buffer_start + (psp_write_ptr_reg / rb_frame_size_dw); + /* Check invalid write_frame ptr address */ + if ((write_frame < ring_buffer_start) || (ring_buffer_end < write_frame)) { + DRM_ERROR("ring_buffer_start = %p; ring_buffer_end = %p; write_frame = %p\n", + ring_buffer_start, ring_buffer_end, write_frame); + DRM_ERROR("write_frame is pointing to address out of bounds\n"); + return -EINVAL; + } + + /* Initialize KM RB frame */ + memset(write_frame, 0, sizeof(struct psp_gfx_rb_frame)); + + /* Update KM RB frame */ + write_frame->cmd_buf_addr_hi = upper_32_bits(cmd_buf_mc_addr); + write_frame->cmd_buf_addr_lo = lower_32_bits(cmd_buf_mc_addr); + write_frame->fence_addr_hi = upper_32_bits(fence_mc_addr); + write_frame->fence_addr_lo = lower_32_bits(fence_mc_addr); + write_frame->fence_value = index; + + /* Update the write Pointer in DWORDs */ + psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw; + if (psp_v11_0_support_vmr_ring(psp)) { + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg); + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD); + } else + WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg); + + return 0; +} + +static int +psp_v11_0_sram_map(struct amdgpu_device *adev, + unsigned int *sram_offset, unsigned int *sram_addr_reg_offset, + unsigned int *sram_data_reg_offset, + enum AMDGPU_UCODE_ID ucode_id) +{ + int ret = 0; + + switch (ucode_id) { +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SMC: + *sram_offset = 0; + *sram_addr_reg_offset = 0; + *sram_data_reg_offset = 0; + break; +#endif + + case AMDGPU_UCODE_ID_CP_CE: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_CE_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_PFP: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_PFP_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_ME: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_ME_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC1: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_MEC_ME1_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_CP_MEC2: + *sram_offset = 0x10000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmCP_HYP_MEC2_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_RLC_G: + *sram_offset = 0x2000; + *sram_addr_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_UCODE_DATA); + break; + + case AMDGPU_UCODE_ID_SDMA0: + *sram_offset = 0x0; + *sram_addr_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_ADDR); + *sram_data_reg_offset = SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_UCODE_DATA); + break; + +/* TODO: needs to confirm */ +#if 0 + case AMDGPU_UCODE_ID_SDMA1: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_UVD: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; + + case AMDGPU_UCODE_ID_VCE: + *sram_offset = ; + *sram_addr_reg_offset = ; + break; +#endif + + case AMDGPU_UCODE_ID_MAXIMUM: + default: + ret = -EINVAL; + break; + } + + return ret; +} + +static bool psp_v11_0_compare_sram_data(struct psp_context *psp, + struct amdgpu_firmware_info *ucode, + enum AMDGPU_UCODE_ID ucode_type) +{ + int err = 0; + unsigned int fw_sram_reg_val = 0; + unsigned int fw_sram_addr_reg_offset = 0; + unsigned int fw_sram_data_reg_offset = 0; + unsigned int ucode_size; + uint32_t *ucode_mem = NULL; + struct amdgpu_device *adev = psp->adev; + + err = psp_v11_0_sram_map(adev, &fw_sram_reg_val, &fw_sram_addr_reg_offset, + &fw_sram_data_reg_offset, ucode_type); + if (err) + return false; + + WREG32(fw_sram_addr_reg_offset, fw_sram_reg_val); + + ucode_size = ucode->ucode_size; + ucode_mem = (uint32_t *)ucode->kaddr; + while (ucode_size) { + fw_sram_reg_val = RREG32(fw_sram_data_reg_offset); + + if (*ucode_mem != fw_sram_reg_val) + return false; + + ucode_mem++; + /* 4 bytes */ + ucode_size -= 4; + } + + return true; +} + +static int psp_v11_0_mode1_reset(struct psp_context *psp) +{ + int ret; + uint32_t offset; + struct amdgpu_device *adev = psp->adev; + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x8000FFFF, false); + + if (ret) { + DRM_INFO("psp is not working correctly before mode1 reset!\n"); + return -EINVAL; + } + + /*send the mode 1 reset command*/ + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); + + msleep(500); + + offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); + + ret = psp_wait_for(psp, offset, 0x80000000, 0x80000000, false); + + if (ret) { + DRM_INFO("psp mode 1 reset failed!\n"); + return -EINVAL; + } + + DRM_INFO("psp mode1 reset succeed \n"); + + return 0; +} + +/* TODO: Fill in follow functions once PSP firmware interface for XGMI is ready. + * For now, return success and hack the hive_id so high level code can + * start testing + */ +static int psp_v11_0_xgmi_get_topology_info(struct psp_context *psp, + int number_devices, struct psp_xgmi_topology_info *topology) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + struct ta_xgmi_cmd_get_topology_info_output *topology_info_output; + int i; + int ret; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + /* Fill in the shared memory with topology information as input */ + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to get the topology information */ + ret = psp_xgmi_invoke(psp, TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO); + if (ret) + return ret; + + /* Read the output topology information from the shared memory */ + topology_info_output = &xgmi_cmd->xgmi_out_message.get_topology_info; + topology->num_nodes = xgmi_cmd->xgmi_out_message.get_topology_info.num_nodes; + for (i = 0; i < topology->num_nodes; i++) { + topology->nodes[i].node_id = topology_info_output->nodes[i].node_id; + topology->nodes[i].num_hops = topology_info_output->nodes[i].num_hops; + topology->nodes[i].is_sharing_enabled = topology_info_output->nodes[i].is_sharing_enabled; + topology->nodes[i].sdma_engine = topology_info_output->nodes[i].sdma_engine; + } + + return 0; +} + +static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp, + int number_devices, struct psp_xgmi_topology_info *topology) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + struct ta_xgmi_cmd_get_topology_info_input *topology_info_input; + int i; + + if (!topology || topology->num_nodes > TA_XGMI__MAX_CONNECTED_NODES) + return -EINVAL; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + topology_info_input = &xgmi_cmd->xgmi_in_message.get_topology_info; + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__SET_TOPOLOGY_INFO; + topology_info_input->num_nodes = number_devices; + + for (i = 0; i < topology_info_input->num_nodes; i++) { + topology_info_input->nodes[i].node_id = topology->nodes[i].node_id; + topology_info_input->nodes[i].num_hops = topology->nodes[i].num_hops; + topology_info_input->nodes[i].is_sharing_enabled = topology->nodes[i].is_sharing_enabled; + topology_info_input->nodes[i].sdma_engine = topology->nodes[i].sdma_engine; + } + + /* Invoke xgmi ta to set topology information */ + return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO); +} + +static int psp_v11_0_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_HIVE_ID; + + /* Invoke xgmi ta to get hive id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return ret; + + *hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id; + + return 0; +} + +static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id) +{ + struct ta_xgmi_shared_memory *xgmi_cmd; + int ret; + + xgmi_cmd = (struct ta_xgmi_shared_memory*)psp->xgmi_context.xgmi_shared_buf; + memset(xgmi_cmd, 0, sizeof(struct ta_xgmi_shared_memory)); + + xgmi_cmd->cmd_id = TA_COMMAND_XGMI__GET_NODE_ID; + + /* Invoke xgmi ta to get the node id */ + ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id); + if (ret) + return ret; + + *node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id; + + return 0; +} + +static const struct psp_funcs psp_v11_0_funcs = { + .init_microcode = psp_v11_0_init_microcode, + .bootloader_load_sysdrv = psp_v11_0_bootloader_load_sysdrv, + .bootloader_load_sos = psp_v11_0_bootloader_load_sos, + .prep_cmd_buf = psp_v11_0_prep_cmd_buf, + .ring_init = psp_v11_0_ring_init, + .ring_create = psp_v11_0_ring_create, + .ring_stop = psp_v11_0_ring_stop, + .ring_destroy = psp_v11_0_ring_destroy, + .cmd_submit = psp_v11_0_cmd_submit, + .compare_sram_data = psp_v11_0_compare_sram_data, + .mode1_reset = psp_v11_0_mode1_reset, + .xgmi_get_topology_info = psp_v11_0_xgmi_get_topology_info, + .xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info, + .xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id, + .xgmi_get_node_id = psp_v11_0_xgmi_get_node_id, + .support_vmr_ring = psp_v11_0_support_vmr_ring, +}; + +void psp_v11_0_set_psp_funcs(struct psp_context *psp) +{ + psp->funcs = &psp_v11_0_funcs; +} diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h new file mode 100644 index 000000000000..082c16c887bf --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.h @@ -0,0 +1,30 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ +#ifndef __PSP_V11_0_H__ +#define __PSP_V11_0_H__ + +#include "amdgpu_psp.h" + +void psp_v11_0_set_psp_funcs(struct psp_context *psp); + +#endif diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c index 727071fee6f6..79694ff16969 100644 --- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c +++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c @@ -41,8 +41,6 @@ MODULE_FIRMWARE("amdgpu/vega10_sos.bin"); MODULE_FIRMWARE("amdgpu/vega10_asd.bin"); MODULE_FIRMWARE("amdgpu/vega12_sos.bin"); MODULE_FIRMWARE("amdgpu/vega12_asd.bin"); -MODULE_FIRMWARE("amdgpu/vega20_sos.bin"); -MODULE_FIRMWARE("amdgpu/vega20_asd.bin"); #define smnMP1_FIRMWARE_FLAGS 0x3010028 @@ -196,7 +194,7 @@ static int psp_v3_1_bootloader_load_sysdrv(struct psp_context *psp) /* Copy PSP System Driver binary to memory */ memcpy(psp->fw_pri_buf, psp->sys_start_addr, psp->sys_bin_size); - /* Provide the sys driver to bootrom */ + /* Provide the sys driver to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 1 << 16; @@ -242,8 +240,11 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) * are already been loaded. */ sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81); - if (sol_reg) + if (sol_reg) { + psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58); + printk("sos fw version = 0x%x.\n", psp->sos_fw_version); return 0; + } /* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */ ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35), @@ -256,7 +257,7 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp) /* Copy Secure OS binary to PSP memory */ memcpy(psp->fw_pri_buf, psp->sos_start_addr, psp->sos_bin_size); - /* Provide the PSP secure OS to bootrom */ + /* Provide the PSP secure OS to bootloader */ WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_36, (uint32_t)(psp->fw_pri_mc_addr >> 20)); psp_gfxdrv_command_reg = 2 << 16; @@ -358,12 +359,9 @@ static int psp_v3_1_ring_stop(struct psp_context *psp, enum psp_ring_type ring_type) { int ret = 0; - struct psp_ring *ring; unsigned int psp_ring_reg = 0; struct amdgpu_device *adev = psp->adev; - ring = &psp->km_ring; - /* Write the ring destroy command to C2PMSG_64 */ psp_ring_reg = 3 << 16; WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg); @@ -595,9 +593,9 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp) } /*send the mode 1 reset command*/ - WREG32(offset, 0x70000); + WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST); - mdelay(1000); + msleep(500); offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33); diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c index 15ae4bc9c072..9f3cb2aec7c2 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c @@ -225,7 +225,7 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -245,9 +245,12 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v2_4_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -349,8 +352,8 @@ static void sdma_v2_4_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -471,17 +474,15 @@ static int sdma_v2_4_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } sdma_v2_4_enable(adev, true); for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -504,41 +505,6 @@ static int sdma_v2_4_rlc_resume(struct amdgpu_device *adev) return 0; } -/** - * sdma_v2_4_load_microcode - load the sDMA ME ucode - * - * @adev: amdgpu_device pointer - * - * Loads the sDMA0/1 ucode. - * Returns 0 for success, -EINVAL if the ucode is not available. - */ -static int sdma_v2_4_load_microcode(struct amdgpu_device *adev) -{ - const struct sdma_firmware_header_v1_0 *hdr; - const __le32 *fw_data; - u32 fw_size; - int i, j; - - /* halt the MEs */ - sdma_v2_4_enable(adev, false); - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (!adev->sdma.instance[i].fw) - return -EINVAL; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - amdgpu_ucode_print_sdma_hdr(&hdr->header); - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - fw_data = (const __le32 *) - (adev->sdma.instance[i].fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); - for (j = 0; j < fw_size; j++) - WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); - } - - return 0; -} /** * sdma_v2_4_start - setup and start the async dma engines @@ -552,13 +518,6 @@ static int sdma_v2_4_start(struct amdgpu_device *adev) { int r; - - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = sdma_v2_4_load_microcode(adev); - if (r) - return r; - } - /* halt the engine before programing */ sdma_v2_4_enable(adev, false); @@ -592,21 +551,16 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -623,15 +577,11 @@ static int sdma_v2_4_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -654,20 +604,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -686,21 +632,16 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -802,7 +743,7 @@ static void sdma_v2_4_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -898,19 +839,19 @@ static int sdma_v2_4_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1147,8 +1088,14 @@ static int sdma_v2_4_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id, queue_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + queue_id = (entry->ring_id & 0xc) >> 2; + + if (instance_id <= 1 && queue_id == 0) + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } @@ -1296,10 +1243,8 @@ static const struct amdgpu_buffer_funcs sdma_v2_4_buffer_funcs = { static void sdma_v2_4_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &sdma_v2_4_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { @@ -1312,16 +1257,16 @@ static const struct amdgpu_vm_pte_funcs sdma_v2_4_vm_pte_funcs = { static void sdma_v2_4_set_vm_pte_funcs(struct amdgpu_device *adev) { + struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) - adev->vm_manager.vm_pte_rings[i] = - &adev->sdma.instance[i].ring; - - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v2_4_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v2_4_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 1e07ff274d73..1bccc5fe2d9d 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -318,14 +318,13 @@ static int sdma_v3_0_init_microcode(struct amdgpu_device *adev) if (adev->sdma.instance[i].feature_version >= 20) adev->sdma.instance[i].burst_nop = true; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_SMU) { - info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; - info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; - info->fw = adev->sdma.instance[i].fw; - header = (const struct common_firmware_header *)info->fw->data; - adev->firmware.fw_size += - ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); - } + info = &adev->firmware.ucode[AMDGPU_UCODE_ID_SDMA0 + i]; + info->ucode_id = AMDGPU_UCODE_ID_SDMA0 + i; + info->fw = adev->sdma.instance[i].fw; + header = (const struct common_firmware_header *)info->fw->data; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(header->ucode_size_bytes), PAGE_SIZE); + } out: if (err) { @@ -400,7 +399,7 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring) static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -420,9 +419,12 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VI). */ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v3_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -524,8 +526,8 @@ static void sdma_v3_0_gfx_stop(struct amdgpu_device *adev) ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -740,7 +742,7 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } /* unhalt the MEs */ @@ -750,11 +752,9 @@ static int sdma_v3_0_gfx_resume(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -778,42 +778,6 @@ static int sdma_v3_0_rlc_resume(struct amdgpu_device *adev) } /** - * sdma_v3_0_load_microcode - load the sDMA ME ucode - * - * @adev: amdgpu_device pointer - * - * Loads the sDMA0/1 ucode. - * Returns 0 for success, -EINVAL if the ucode is not available. - */ -static int sdma_v3_0_load_microcode(struct amdgpu_device *adev) -{ - const struct sdma_firmware_header_v1_0 *hdr; - const __le32 *fw_data; - u32 fw_size; - int i, j; - - /* halt the MEs */ - sdma_v3_0_enable(adev, false); - - for (i = 0; i < adev->sdma.num_instances; i++) { - if (!adev->sdma.instance[i].fw) - return -EINVAL; - hdr = (const struct sdma_firmware_header_v1_0 *)adev->sdma.instance[i].fw->data; - amdgpu_ucode_print_sdma_hdr(&hdr->header); - fw_size = le32_to_cpu(hdr->header.ucode_size_bytes) / 4; - fw_data = (const __le32 *) - (adev->sdma.instance[i].fw->data + - le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], 0); - for (j = 0; j < fw_size; j++) - WREG32(mmSDMA0_UCODE_DATA + sdma_offsets[i], le32_to_cpup(fw_data++)); - WREG32(mmSDMA0_UCODE_ADDR + sdma_offsets[i], adev->sdma.instance[i].fw_version); - } - - return 0; -} - -/** * sdma_v3_0_start - setup and start the async dma engines * * @adev: amdgpu_device pointer @@ -825,12 +789,6 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) { int r; - if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { - r = sdma_v3_0_load_microcode(adev); - if (r) - return r; - } - /* disable sdma engine before programing it */ sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); @@ -865,21 +823,16 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -896,15 +849,11 @@ static int sdma_v3_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -927,20 +876,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -959,21 +904,16 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -1074,7 +1014,7 @@ static void sdma_v3_0_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1177,19 +1117,19 @@ static int sdma_v3_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* SDMA trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_TRAP, &adev->sdma.trap_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 241, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 241, &adev->sdma.illegal_inst_irq); if (r) return r; /* SDMA Privileged inst */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_SDMA_SRBM_WRITE, &adev->sdma.illegal_inst_irq); if (r) return r; @@ -1206,7 +1146,7 @@ static int sdma_v3_0_sw_init(void *handle) if (!amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; ring->doorbell_index = (i == 0) ? - AMDGPU_DOORBELL_sDMA_ENGINE0 : AMDGPU_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine0 : adev->doorbell_index.sdma_engine1; } else { ring->use_pollmem = true; } @@ -1483,8 +1423,14 @@ static int sdma_v3_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id, queue_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + queue_id = (entry->ring_id & 0xc) >> 2; + + if (instance_id <= 1 && queue_id == 0) + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } @@ -1736,10 +1682,8 @@ static const struct amdgpu_buffer_funcs sdma_v3_0_buffer_funcs = { static void sdma_v3_0_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &sdma_v3_0_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { @@ -1752,16 +1696,16 @@ static const struct amdgpu_vm_pte_funcs sdma_v3_0_vm_pte_funcs = { static void sdma_v3_0_set_vm_pte_funcs(struct amdgpu_device *adev) { + struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) - adev->vm_manager.vm_pte_rings[i] = - &adev->sdma.instance[i].ring; - - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v3_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v3_0_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c index 7c3b634d8d5f..fd0bfe140ee0 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c @@ -27,10 +27,10 @@ #include "amdgpu_ucode.h" #include "amdgpu_trace.h" -#include "sdma0/sdma0_4_0_offset.h" -#include "sdma0/sdma0_4_0_sh_mask.h" -#include "sdma1/sdma1_4_0_offset.h" -#include "sdma1/sdma1_4_0_sh_mask.h" +#include "sdma0/sdma0_4_2_offset.h" +#include "sdma0/sdma0_4_2_sh_mask.h" +#include "sdma1/sdma1_4_2_offset.h" +#include "sdma1/sdma1_4_2_sh_mask.h" #include "hdp/hdp_4_0_offset.h" #include "sdma0/sdma0_4_1_default.h" @@ -48,10 +48,17 @@ MODULE_FIRMWARE("amdgpu/vega12_sdma1.bin"); MODULE_FIRMWARE("amdgpu/vega20_sdma.bin"); MODULE_FIRMWARE("amdgpu/vega20_sdma1.bin"); MODULE_FIRMWARE("amdgpu/raven_sdma.bin"); +MODULE_FIRMWARE("amdgpu/picasso_sdma.bin"); +MODULE_FIRMWARE("amdgpu/raven2_sdma.bin"); #define SDMA0_POWER_CNTL__ON_OFF_CONDITION_HOLD_TIME_MASK 0x000000F8L #define SDMA0_POWER_CNTL__ON_OFF_STATUS_DURATION_TIME_MASK 0xFC000000L +#define WREG32_SDMA(instance, offset, value) \ + WREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset)), value) +#define RREG32_SDMA(instance, offset) \ + RREG32(sdma_v4_0_get_reg_offset(adev, (instance), (offset))) + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev); static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev); @@ -100,8 +107,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg12[] = { SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001) }; -static const struct soc15_reg_golden golden_settings_sdma_4_1[] = -{ +static const struct soc15_reg_golden golden_settings_sdma_4_1[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_IB_CNTL, 0x800f0111, 0x00000100), @@ -115,26 +121,69 @@ static const struct soc15_reg_golden golden_settings_sdma_4_1[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000) }; -static const struct soc15_reg_golden golden_settings_sdma_4_2[] = +static const struct soc15_reg_golden golden_settings_sdma0_4_2_init[] = { + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), +}; + +static const struct soc15_reg_golden golden_settings_sdma0_4_2[] = { SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RD_BURST_CNTL, 0x0000000f, 0x00000003), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xFE000000, 0x00000000), +}; + +static const struct soc15_reg_golden golden_settings_sdma1_4_2[] = { SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0000773f, 0x00004002), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0000773f, 0x00004002), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_PAGE_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RD_BURST_CNTL, 0x0000000f, 0x00000003), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC0_RB_WPTR_POLL_CNTL, 0xfffffff0, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC1_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), - SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0) + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC2_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC3_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC4_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC5_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC6_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_RPTR_ADDR_LO, 0xfffffffd, 0x00000001), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_RLC7_RB_WPTR_POLL_CNTL, 0xfffffff7, 0x00403000), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_PAGE, 0x000003ff, 0x000003c0), + SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_UTCL1_WATERMK, 0xFE000000, 0x00000000), }; static const struct soc15_reg_golden golden_settings_sdma_rv1[] = @@ -143,6 +192,12 @@ static const struct soc15_reg_golden golden_settings_sdma_rv1[] = SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00000002) }; +static const struct soc15_reg_golden golden_settings_sdma_rv2[] = +{ + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00003001), + SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00003001) +}; + static u32 sdma_v4_0_get_reg_offset(struct amdgpu_device *adev, u32 instance, u32 offset) { @@ -171,16 +226,27 @@ static void sdma_v4_0_init_golden_registers(struct amdgpu_device *adev) break; case CHIP_VEGA20: soc15_program_register_sequence(adev, - golden_settings_sdma_4_2, - ARRAY_SIZE(golden_settings_sdma_4_2)); + golden_settings_sdma0_4_2_init, + ARRAY_SIZE(golden_settings_sdma0_4_2_init)); + soc15_program_register_sequence(adev, + golden_settings_sdma0_4_2, + ARRAY_SIZE(golden_settings_sdma0_4_2)); + soc15_program_register_sequence(adev, + golden_settings_sdma1_4_2, + ARRAY_SIZE(golden_settings_sdma1_4_2)); break; case CHIP_RAVEN: soc15_program_register_sequence(adev, - golden_settings_sdma_4_1, - ARRAY_SIZE(golden_settings_sdma_4_1)); - soc15_program_register_sequence(adev, - golden_settings_sdma_rv1, - ARRAY_SIZE(golden_settings_sdma_rv1)); + golden_settings_sdma_4_1, + ARRAY_SIZE(golden_settings_sdma_4_1)); + if (adev->rev_id >= 8) + soc15_program_register_sequence(adev, + golden_settings_sdma_rv2, + ARRAY_SIZE(golden_settings_sdma_rv2)); + else + soc15_program_register_sequence(adev, + golden_settings_sdma_rv1, + ARRAY_SIZE(golden_settings_sdma_rv1)); break; default: break; @@ -221,7 +287,12 @@ static int sdma_v4_0_init_microcode(struct amdgpu_device *adev) chip_name = "vega20"; break; case CHIP_RAVEN: - chip_name = "raven"; + if (adev->rev_id >= 8) + chip_name = "raven2"; + else if (adev->pdev->device == 0x15d8) + chip_name = "picasso"; + else + chip_name = "raven"; break; default: BUG(); @@ -301,16 +372,11 @@ static uint64_t sdma_v4_0_ring_get_wptr(struct amdgpu_ring *ring) wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); DRM_DEBUG("wptr/doorbell before shift == 0x%016llx\n", wptr); } else { - u32 lowbit, highbit; - - lowbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR)) >> 2; - highbit = RREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI)) >> 2; - - DRM_DEBUG("wptr [%i]high== 0x%08x low==0x%08x\n", - ring->me, highbit, lowbit); - wptr = highbit; + wptr = RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI); wptr = wptr << 32; - wptr |= lowbit; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR); + DRM_DEBUG("wptr before shift [%i] wptr == 0x%016llx\n", + ring->me, wptr); } return wptr >> 2; @@ -351,14 +417,67 @@ static void sdma_v4_0_ring_set_wptr(struct amdgpu_ring *ring) lower_32_bits(ring->wptr << 2), ring->me, upper_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr << 2)); - WREG32(sdma_v4_0_get_reg_offset(adev, ring->me, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR, + lower_32_bits(ring->wptr << 2)); + WREG32_SDMA(ring->me, mmSDMA0_GFX_RB_WPTR_HI, + upper_32_bits(ring->wptr << 2)); + } +} + +/** + * sdma_v4_0_page_ring_get_wptr - get the current write pointer + * + * @ring: amdgpu ring pointer + * + * Get the current wptr from the hardware (VEGA10+). + */ +static uint64_t sdma_v4_0_page_ring_get_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + u64 wptr; + + if (ring->use_doorbell) { + /* XXX check if swapping is necessary on BE */ + wptr = READ_ONCE(*((u64 *)&adev->wb.wb[ring->wptr_offs])); + } else { + wptr = RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI); + wptr = wptr << 32; + wptr |= RREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR); + } + + return wptr >> 2; +} + +/** + * sdma_v4_0_ring_set_wptr - commit the write pointer + * + * @ring: amdgpu ring pointer + * + * Write the wptr back to the hardware (VEGA10+). + */ +static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring) +{ + struct amdgpu_device *adev = ring->adev; + + if (ring->use_doorbell) { + u64 *wb = (u64 *)&adev->wb.wb[ring->wptr_offs]; + + /* XXX check if swapping is necessary on BE */ + WRITE_ONCE(*wb, (ring->wptr << 2)); + WDOORBELL64(ring->doorbell_index, ring->wptr << 2); + } else { + uint64_t wptr = ring->wptr << 2; + + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR, + lower_32_bits(wptr)); + WREG32_SDMA(ring->me, mmSDMA0_PAGE_RB_WPTR_HI, + upper_32_bits(wptr)); } } static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -378,9 +497,12 @@ static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (VEGA10). */ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + /* IB packet must end on a 8 DW boundary */ sdma_v4_0_ring_insert_nop(ring, (10 - (lower_32_bits(ring->wptr) & 7)) % 8); @@ -502,16 +624,16 @@ static void sdma_v4_0_gfx_stop(struct amdgpu_device *adev) amdgpu_ttm_set_buffer_funcs_status(adev, false); for (i = 0; i < adev->sdma.num_instances; i++) { - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -527,6 +649,39 @@ static void sdma_v4_0_rlc_stop(struct amdgpu_device *adev) } /** + * sdma_v4_0_page_stop - stop the page async dma engines + * + * @adev: amdgpu_device pointer + * + * Stop the page async dma ring buffers (VEGA10). + */ +static void sdma_v4_0_page_stop(struct amdgpu_device *adev) +{ + struct amdgpu_ring *sdma0 = &adev->sdma.instance[0].page; + struct amdgpu_ring *sdma1 = &adev->sdma.instance[1].page; + u32 rb_cntl, ib_cntl; + int i; + + if ((adev->mman.buffer_funcs_ring == sdma0) || + (adev->mman.buffer_funcs_ring == sdma1)) + amdgpu_ttm_set_buffer_funcs_status(adev, false); + + for (i = 0; i < adev->sdma.num_instances; i++) { + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, + IB_ENABLE, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); + } + + sdma0->sched.ready = false; + sdma1->sched.ready = false; +} + +/** * sdma_v_0_ctx_switch_enable - stop the async dma engines context switch * * @adev: amdgpu_device pointer @@ -564,18 +719,15 @@ static void sdma_v4_0_ctx_switch_enable(struct amdgpu_device *adev, bool enable) } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, AUTO_CTXSW_ENABLE, enable ? 1 : 0); if (enable && amdgpu_sdma_phase_quantum) { - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), - phase_quantum); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), - phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE0_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE1_QUANTUM, phase_quantum); + WREG32_SDMA(i, mmSDMA0_PHASE2_QUANTUM, phase_quantum); } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_CNTL, f32_cntl); } } @@ -596,156 +748,215 @@ static void sdma_v4_0_enable(struct amdgpu_device *adev, bool enable) if (enable == false) { sdma_v4_0_gfx_stop(adev); sdma_v4_0_rlc_stop(adev); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_stop(adev); } for (i = 0; i < adev->sdma.num_instances; i++) { - f32_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); + f32_cntl = RREG32_SDMA(i, mmSDMA0_F32_CNTL); f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, f32_cntl); } } /** + * sdma_v4_0_rb_cntl - get parameters for rb_cntl + */ +static uint32_t sdma_v4_0_rb_cntl(struct amdgpu_ring *ring, uint32_t rb_cntl) +{ + /* Set ring buffer size in dwords */ + uint32_t rb_bufsz = order_base_2(ring->ring_size / 4); + + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); +#ifdef __BIG_ENDIAN + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_SWAP_ENABLE, 1); +#endif + return rb_cntl; +} + +/** * sdma_v4_0_gfx_resume - setup and start the async dma engines * * @adev: amdgpu_device pointer + * @i: instance to resume * * Set up the gfx DMA ring buffers and enable them (VEGA10). * Returns 0 for success, error for failure. */ -static int sdma_v4_0_gfx_resume(struct amdgpu_device *adev) +static void sdma_v4_0_gfx_resume(struct amdgpu_device *adev, unsigned int i) { - struct amdgpu_ring *ring; + struct amdgpu_ring *ring = &adev->sdma.instance[i].ring; u32 rb_cntl, ib_cntl, wptr_poll_cntl; - u32 rb_bufsz; u32 wb_offset; u32 doorbell; u32 doorbell_offset; - u32 temp; u64 wptr_gpu_addr; - int i, r; - - for (i = 0; i < adev->sdma.num_instances; i++) { - ring = &adev->sdma.instance[i].ring; - wb_offset = (ring->rptr_offs * 4); - - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL), 0); - /* Set ring buffer size in dwords */ - rb_bufsz = order_base_2(ring->ring_size / 4); - rb_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL)); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SIZE, rb_bufsz); -#ifdef __BIG_ENDIAN - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_SWAP_ENABLE, 1); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, - RPTR_WRITEBACK_SWAP_ENABLE, 1); -#endif - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + wb_offset = (ring->rptr_offs * 4); - /* Initialize the ring buffer's read and write pointers */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_HI), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), 0); + rb_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); - /* set the wb address whether it's enabled or not */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_HI), - upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_RPTR_ADDR_LO), - lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_HI, 0); - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RPTR_WRITEBACK_ENABLE, 1); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_GFX_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE), ring->gpu_addr >> 8); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_BASE_HI), ring->gpu_addr >> 40); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - ring->wptr = 0; + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_GFX_RB_BASE_HI, ring->gpu_addr >> 40); - /* before programing wptr to a less value, need set minor_ptr_update first */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 1); + ring->wptr = 0; - if (!amdgpu_sriov_vf(adev)) { /* only bare-metal use register write for wptr */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR), lower_32_bits(ring->wptr) << 2); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_HI), upper_32_bits(ring->wptr) << 2); - } + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 1); - doorbell = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL)); - doorbell_offset = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET)); + doorbell = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET); - if (ring->use_doorbell) { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 1); - doorbell_offset = REG_SET_FIELD(doorbell_offset, SDMA0_GFX_DOORBELL_OFFSET, + doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_GFX_DOORBELL_OFFSET, OFFSET, ring->doorbell_index); - } else { - doorbell = REG_SET_FIELD(doorbell, SDMA0_GFX_DOORBELL, ENABLE, 0); - } - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL), doorbell); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_DOORBELL_OFFSET), doorbell_offset); - adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, - ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_GFX_DOORBELL_OFFSET, doorbell_offset); + adev->nbio_funcs->sdma_doorbell_range(adev, i, ring->use_doorbell, + ring->doorbell_index); + + sdma_v4_0_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_GFX_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_GFX_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_GFX_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_GFX_IB_CNTL, ib_cntl); - if (amdgpu_sriov_vf(adev)) - sdma_v4_0_ring_set_wptr(ring); + ring->sched.ready = true; +} - /* set minor_ptr_update to 0 after wptr programed */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_MINOR_PTR_UPDATE), 0); +/** + * sdma_v4_0_page_resume - setup and start the async dma engines + * + * @adev: amdgpu_device pointer + * @i: instance to resume + * + * Set up the page DMA ring buffers and enable them (VEGA10). + * Returns 0 for success, error for failure. + */ +static void sdma_v4_0_page_resume(struct amdgpu_device *adev, unsigned int i) +{ + struct amdgpu_ring *ring = &adev->sdma.instance[i].page; + u32 rb_cntl, ib_cntl, wptr_poll_cntl; + u32 wb_offset; + u32 doorbell; + u32 doorbell_offset; + u64 wptr_gpu_addr; - /* set utc l1 enable flag always to 1 */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_CNTL), temp); + wb_offset = (ring->rptr_offs * 4); - if (!amdgpu_sriov_vf(adev)) { - /* unhalt engine */ - temp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); - temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), temp); - } + rb_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL); + rb_cntl = sdma_v4_0_rb_cntl(ring, rb_cntl); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); - /* setup the wptr shadow polling */ - wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_LO), - lower_32_bits(wptr_gpu_addr)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_ADDR_HI), - upper_32_bits(wptr_gpu_addr)); - wptr_poll_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL)); - if (amdgpu_sriov_vf(adev)) - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 1); - else - wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, SDMA0_GFX_RB_WPTR_POLL_CNTL, F32_POLL_ENABLE, 0); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_WPTR_POLL_CNTL), wptr_poll_cntl); + /* Initialize the ring buffer's read and write pointers */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_HI, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR, 0); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_HI, 0); - /* enable DMA RB */ - rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_GFX_RB_CNTL, RB_ENABLE, 1); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_RB_CNTL), rb_cntl); + /* set the wb address whether it's enabled or not */ + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_HI, + upper_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFF); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_RPTR_ADDR_LO, + lower_32_bits(adev->wb.gpu_addr + wb_offset) & 0xFFFFFFFC); - ib_cntl = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL)); - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_ENABLE, 1); -#ifdef __BIG_ENDIAN - ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_GFX_IB_CNTL, IB_SWAP_ENABLE, 1); -#endif - /* enable DMA IBs */ - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_GFX_IB_CNTL), ib_cntl); + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, + RPTR_WRITEBACK_ENABLE, 1); - ring->ready = true; + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE, ring->gpu_addr >> 8); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_BASE_HI, ring->gpu_addr >> 40); - if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ - sdma_v4_0_ctx_switch_enable(adev, true); - sdma_v4_0_enable(adev, true); - } + ring->wptr = 0; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; - return r; - } + /* before programing wptr to a less value, need set minor_ptr_update first */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 1); - if (adev->mman.buffer_funcs_ring == ring) - amdgpu_ttm_set_buffer_funcs_status(adev, true); + doorbell = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL); + doorbell_offset = RREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET); - } + doorbell = REG_SET_FIELD(doorbell, SDMA0_PAGE_DOORBELL, ENABLE, + ring->use_doorbell); + doorbell_offset = REG_SET_FIELD(doorbell_offset, + SDMA0_PAGE_DOORBELL_OFFSET, + OFFSET, ring->doorbell_index); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL, doorbell); + WREG32_SDMA(i, mmSDMA0_PAGE_DOORBELL_OFFSET, doorbell_offset); + + /* paging queue doorbell range is setup at sdma_v4_0_gfx_resume */ + sdma_v4_0_page_ring_set_wptr(ring); + + /* set minor_ptr_update to 0 after wptr programed */ + WREG32_SDMA(i, mmSDMA0_PAGE_MINOR_PTR_UPDATE, 0); + + /* setup the wptr shadow polling */ + wptr_gpu_addr = adev->wb.gpu_addr + (ring->wptr_offs * 4); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_LO, + lower_32_bits(wptr_gpu_addr)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_ADDR_HI, + upper_32_bits(wptr_gpu_addr)); + wptr_poll_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL); + wptr_poll_cntl = REG_SET_FIELD(wptr_poll_cntl, + SDMA0_PAGE_RB_WPTR_POLL_CNTL, + F32_POLL_ENABLE, amdgpu_sriov_vf(adev)); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_WPTR_POLL_CNTL, wptr_poll_cntl); + + /* enable DMA RB */ + rb_cntl = REG_SET_FIELD(rb_cntl, SDMA0_PAGE_RB_CNTL, RB_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_PAGE_RB_CNTL, rb_cntl); + + ib_cntl = RREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL); + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_ENABLE, 1); +#ifdef __BIG_ENDIAN + ib_cntl = REG_SET_FIELD(ib_cntl, SDMA0_PAGE_IB_CNTL, IB_SWAP_ENABLE, 1); +#endif + /* enable DMA IBs */ + WREG32_SDMA(i, mmSDMA0_PAGE_IB_CNTL, ib_cntl); - return 0; + ring->sched.ready = true; } static void @@ -754,7 +965,7 @@ sdma_v4_1_update_power_gating(struct amdgpu_device *adev, bool enable) uint32_t def, data; if (enable && (adev->pg_flags & AMD_PG_SUPPORT_SDMA)) { - /* disable idle interrupt */ + /* enable idle interrupt */ def = data = RREG32(SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_CNTL)); data |= SDMA0_CNTL__CTXEMPTY_INT_ENABLE_MASK; @@ -856,12 +1067,14 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) (adev->sdma.instance[i].fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), 0); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, 0); for (j = 0; j < fw_size; j++) - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_DATA), le32_to_cpup(fw_data++)); + WREG32_SDMA(i, mmSDMA0_UCODE_DATA, + le32_to_cpup(fw_data++)); - WREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_UCODE_ADDR), adev->sdma.instance[i].fw_version); + WREG32_SDMA(i, mmSDMA0_UCODE_ADDR, + adev->sdma.instance[i].fw_version); } return 0; @@ -877,33 +1090,78 @@ static int sdma_v4_0_load_microcode(struct amdgpu_device *adev) */ static int sdma_v4_0_start(struct amdgpu_device *adev) { - int r = 0; + struct amdgpu_ring *ring; + int i, r; if (amdgpu_sriov_vf(adev)) { sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); + } else { - /* set RB registers */ - r = sdma_v4_0_gfx_resume(adev); - return r; + if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { + r = sdma_v4_0_load_microcode(adev); + if (r) + return r; + } + + /* unhalt the MEs */ + sdma_v4_0_enable(adev, true); + /* enable sdma ring preemption */ + sdma_v4_0_ctx_switch_enable(adev, true); } - if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) { - r = sdma_v4_0_load_microcode(adev); + /* start the gfx rings and rlc compute queues */ + for (i = 0; i < adev->sdma.num_instances; i++) { + uint32_t temp; + + WREG32_SDMA(i, mmSDMA0_SEM_WAIT_FAIL_TIMER_CNTL, 0); + sdma_v4_0_gfx_resume(adev, i); + if (adev->sdma.has_page_queue) + sdma_v4_0_page_resume(adev, i); + + /* set utc l1 enable flag always to 1 */ + temp = RREG32_SDMA(i, mmSDMA0_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_CNTL, UTC_L1_ENABLE, 1); + WREG32_SDMA(i, mmSDMA0_CNTL, temp); + + if (!amdgpu_sriov_vf(adev)) { + /* unhalt engine */ + temp = RREG32_SDMA(i, mmSDMA0_F32_CNTL); + temp = REG_SET_FIELD(temp, SDMA0_F32_CNTL, HALT, 0); + WREG32_SDMA(i, mmSDMA0_F32_CNTL, temp); + } + } + + if (amdgpu_sriov_vf(adev)) { + sdma_v4_0_ctx_switch_enable(adev, true); + sdma_v4_0_enable(adev, true); + } else { + r = sdma_v4_0_rlc_resume(adev); if (r) return r; } - /* unhalt the MEs */ - sdma_v4_0_enable(adev, true); - /* enable sdma ring preemption */ - sdma_v4_0_ctx_switch_enable(adev, true); + for (i = 0; i < adev->sdma.num_instances; i++) { + ring = &adev->sdma.instance[i].ring; - /* start the gfx rings and rlc compute queues */ - r = sdma_v4_0_gfx_resume(adev); - if (r) - return r; - r = sdma_v4_0_rlc_resume(adev); + r = amdgpu_ring_test_helper(ring); + if (r) + return r; + + if (adev->sdma.has_page_queue) { + struct amdgpu_ring *page = &adev->sdma.instance[i].page; + + r = amdgpu_ring_test_helper(page); + if (r) + return r; + + if (adev->mman.buffer_funcs_ring == page) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } + + if (adev->mman.buffer_funcs_ring == ring) + amdgpu_ttm_set_buffer_funcs_status(adev, true); + } return r; } @@ -927,21 +1185,16 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR)); @@ -958,15 +1211,11 @@ static int sdma_v4_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -989,20 +1238,16 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PKT_HEADER_OP(SDMA_OP_WRITE) | SDMA_PKT_HEADER_SUB_OP(SDMA_SUBOP_WRITE_LINEAR); @@ -1021,21 +1266,17 @@ static int sdma_v4_0_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } + err1: amdgpu_ib_free(adev, &ib, NULL); dma_fence_put(f); @@ -1140,7 +1381,7 @@ static void sdma_v4_0_vm_set_pte_pde(struct amdgpu_ib *ib, */ static void sdma_v4_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; @@ -1206,15 +1447,45 @@ static void sdma_v4_0_ring_emit_reg_wait(struct amdgpu_ring *ring, uint32_t reg, sdma_v4_0_wait_reg_mem(ring, 0, 0, reg, 0, val, mask, 10); } +static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev) +{ + uint fw_version = adev->sdma.instance[0].fw_version; + + switch (adev->asic_type) { + case CHIP_VEGA10: + return fw_version >= 430; + case CHIP_VEGA12: + /*return fw_version >= 31;*/ + return false; + case CHIP_VEGA20: + return fw_version >= 123; + default: + return false; + } +} + static int sdma_v4_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; + int r; if (adev->asic_type == CHIP_RAVEN) adev->sdma.num_instances = 1; else adev->sdma.num_instances = 2; + r = sdma_v4_0_init_microcode(adev); + if (r) { + DRM_ERROR("Failed to load sdma firmware!\n"); + return r; + } + + /* TODO: Page queue breaks driver reload under SRIOV */ + if ((adev->asic_type == CHIP_VEGA10) && amdgpu_sriov_vf((adev))) + adev->sdma.has_page_queue = false; + else if (sdma_v4_0_fw_support_paging_queue(adev)) + adev->sdma.has_page_queue = true; + sdma_v4_0_set_ring_funcs(adev); sdma_v4_0_set_buffer_funcs(adev); sdma_v4_0_set_vm_pte_funcs(adev); @@ -1223,7 +1494,6 @@ static int sdma_v4_0_early_init(void *handle) return 0; } - static int sdma_v4_0_sw_init(void *handle) { struct amdgpu_ring *ring; @@ -1242,12 +1512,6 @@ static int sdma_v4_0_sw_init(void *handle) if (r) return r; - r = sdma_v4_0_init_microcode(adev); - if (r) { - DRM_ERROR("Failed to load sdma firmware!\n"); - return r; - } - for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; ring->ring_obj = NULL; @@ -1256,9 +1520,10 @@ static int sdma_v4_0_sw_init(void *handle) DRM_INFO("use_doorbell being set to: [%s]\n", ring->use_doorbell?"true":"false"); + /* doorbell size is 2 dwords, get DWORD offset */ ring->doorbell_index = (i == 0) ? - (AMDGPU_DOORBELL64_sDMA_ENGINE0 << 1) //get DWORD offset - : (AMDGPU_DOORBELL64_sDMA_ENGINE1 << 1); // get DWORD offset + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); sprintf(ring->name, "sdma%d", i); r = amdgpu_ring_init(adev, ring, 1024, @@ -1268,6 +1533,29 @@ static int sdma_v4_0_sw_init(void *handle) AMDGPU_SDMA_IRQ_TRAP1); if (r) return r; + + if (adev->sdma.has_page_queue) { + ring = &adev->sdma.instance[i].page; + ring->ring_obj = NULL; + ring->use_doorbell = true; + + /* paging queue use same doorbell index/routing as gfx queue + * with 0x400 (4096 dwords) offset on second doorbell page + */ + ring->doorbell_index = (i == 0) ? + (adev->doorbell_index.sdma_engine0 << 1) + : (adev->doorbell_index.sdma_engine1 << 1); + ring->doorbell_index += 0x400; + + sprintf(ring->name, "page%d", i); + r = amdgpu_ring_init(adev, ring, 1024, + &adev->sdma.trap_irq, + (i == 0) ? + AMDGPU_SDMA_IRQ_TRAP0 : + AMDGPU_SDMA_IRQ_TRAP1); + if (r) + return r; + } } return r; @@ -1278,8 +1566,11 @@ static int sdma_v4_0_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int i; - for (i = 0; i < adev->sdma.num_instances; i++) + for (i = 0; i < adev->sdma.num_instances; i++) { amdgpu_ring_fini(&adev->sdma.instance[i].ring); + if (adev->sdma.has_page_queue) + amdgpu_ring_fini(&adev->sdma.instance[i].page); + } for (i = 0; i < adev->sdma.num_instances; i++) { release_firmware(adev->sdma.instance[i].fw); @@ -1294,6 +1585,10 @@ static int sdma_v4_0_hw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs && + adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, false); + sdma_v4_0_init_golden_registers(adev); r = sdma_v4_0_start(adev); @@ -1311,6 +1606,10 @@ static int sdma_v4_0_hw_fini(void *handle) sdma_v4_0_ctx_switch_enable(adev, false); sdma_v4_0_enable(adev, false); + if (adev->asic_type == CHIP_RAVEN && adev->powerplay.pp_funcs + && adev->powerplay.pp_funcs->set_powergating_by_smu) + amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_SDMA, true); + return 0; } @@ -1334,7 +1633,7 @@ static bool sdma_v4_0_is_idle(void *handle) u32 i; for (i = 0; i < adev->sdma.num_instances; i++) { - u32 tmp = RREG32(sdma_v4_0_get_reg_offset(adev, i, mmSDMA0_STATUS_REG)); + u32 tmp = RREG32_SDMA(i, mmSDMA0_STATUS_REG); if (!(tmp & SDMA0_STATUS_REG__IDLE_MASK)) return false; @@ -1350,8 +1649,8 @@ static int sdma_v4_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; for (i = 0; i < adev->usec_timeout; i++) { - sdma0 = RREG32(sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_STATUS_REG)); - sdma1 = RREG32(sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_STATUS_REG)); + sdma0 = RREG32_SDMA(0, mmSDMA0_STATUS_REG); + sdma1 = RREG32_SDMA(1, mmSDMA0_STATUS_REG); if (sdma0 & sdma1 & SDMA0_STATUS_REG__IDLE_MASK) return 0; @@ -1372,16 +1671,13 @@ static int sdma_v4_0_set_trap_irq_state(struct amdgpu_device *adev, unsigned type, enum amdgpu_interrupt_state state) { + unsigned int instance = (type == AMDGPU_SDMA_IRQ_TRAP0) ? 0 : 1; u32 sdma_cntl; - u32 reg_offset = (type == AMDGPU_SDMA_IRQ_TRAP0) ? - sdma_v4_0_get_reg_offset(adev, 0, mmSDMA0_CNTL) : - sdma_v4_0_get_reg_offset(adev, 1, mmSDMA0_CNTL); - - sdma_cntl = RREG32(reg_offset); + sdma_cntl = RREG32_SDMA(instance, mmSDMA0_CNTL); sdma_cntl = REG_SET_FIELD(sdma_cntl, SDMA0_CNTL, TRAP_ENABLE, state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0); - WREG32(reg_offset, sdma_cntl); + WREG32_SDMA(instance, mmSDMA0_CNTL, sdma_cntl); return 0; } @@ -1390,39 +1686,34 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + uint32_t instance; + DRM_DEBUG("IH: SDMA trap\n"); switch (entry->client_id) { case SOC15_IH_CLIENTID_SDMA0: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[0].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 0; break; case SOC15_IH_CLIENTID_SDMA1: - switch (entry->ring_id) { - case 0: - amdgpu_fence_process(&adev->sdma.instance[1].ring); - break; - case 1: - /* XXX compute */ - break; - case 2: - /* XXX compute */ - break; - case 3: - /* XXX page queue*/ - break; - } + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + amdgpu_fence_process(&adev->sdma.instance[instance].ring); + break; + case 1: + if (adev->asic_type == CHIP_VEGA20) + amdgpu_fence_process(&adev->sdma.instance[instance].page); + break; + case 2: + /* XXX compute */ + break; + case 3: + if (adev->asic_type != CHIP_VEGA20) + amdgpu_fence_process(&adev->sdma.instance[instance].page); break; } return 0; @@ -1432,12 +1723,29 @@ static int sdma_v4_0_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + int instance; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + + switch (entry->client_id) { + case SOC15_IH_CLIENTID_SDMA0: + instance = 0; + break; + case SOC15_IH_CLIENTID_SDMA1: + instance = 1; + break; + default: + return 0; + } + + switch (entry->ring_id) { + case 0: + drm_sched_fault(&adev->sdma.instance[instance].ring.sched); + break; + } return 0; } - static void sdma_v4_0_update_medium_grain_clock_gating( struct amdgpu_device *adev, bool enable) @@ -1650,6 +1958,38 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = { .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; +static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = { + .type = AMDGPU_RING_TYPE_SDMA, + .align_mask = 0xf, + .nop = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP), + .support_64bit_ptrs = true, + .vmhub = AMDGPU_MMHUB, + .get_rptr = sdma_v4_0_ring_get_rptr, + .get_wptr = sdma_v4_0_page_ring_get_wptr, + .set_wptr = sdma_v4_0_page_ring_set_wptr, + .emit_frame_size = + 6 + /* sdma_v4_0_ring_emit_hdp_flush */ + 3 + /* hdp invalidate */ + 6 + /* sdma_v4_0_ring_emit_pipeline_sync */ + /* sdma_v4_0_ring_emit_vm_flush */ + SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 + + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 6 + + 10 + 10 + 10, /* sdma_v4_0_ring_emit_fence x3 for user fence, vm fence */ + .emit_ib_size = 7 + 6, /* sdma_v4_0_ring_emit_ib */ + .emit_ib = sdma_v4_0_ring_emit_ib, + .emit_fence = sdma_v4_0_ring_emit_fence, + .emit_pipeline_sync = sdma_v4_0_ring_emit_pipeline_sync, + .emit_vm_flush = sdma_v4_0_ring_emit_vm_flush, + .emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush, + .test_ring = sdma_v4_0_ring_test_ring, + .test_ib = sdma_v4_0_ring_test_ib, + .insert_nop = sdma_v4_0_ring_insert_nop, + .pad_ib = sdma_v4_0_ring_pad_ib, + .emit_wreg = sdma_v4_0_ring_emit_wreg, + .emit_reg_wait = sdma_v4_0_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, +}; + static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) { int i; @@ -1657,6 +1997,10 @@ static void sdma_v4_0_set_ring_funcs(struct amdgpu_device *adev) for (i = 0; i < adev->sdma.num_instances; i++) { adev->sdma.instance[i].ring.funcs = &sdma_v4_0_ring_funcs; adev->sdma.instance[i].ring.me = i; + if (adev->sdma.has_page_queue) { + adev->sdma.instance[i].page.funcs = &sdma_v4_0_page_ring_funcs; + adev->sdma.instance[i].page.me = i; + } } } @@ -1737,10 +2081,11 @@ static const struct amdgpu_buffer_funcs sdma_v4_0_buffer_funcs = { static void sdma_v4_0_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; + adev->mman.buffer_funcs = &sdma_v4_0_buffer_funcs; + if (adev->sdma.has_page_queue) + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].page; + else adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } } static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { @@ -1753,16 +2098,19 @@ static const struct amdgpu_vm_pte_funcs sdma_v4_0_vm_pte_funcs = { static void sdma_v4_0_set_vm_pte_funcs(struct amdgpu_device *adev) { + struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) - adev->vm_manager.vm_pte_rings[i] = - &adev->sdma.instance[i].ring; - - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &sdma_v4_0_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + if (adev->sdma.has_page_queue) + sched = &adev->sdma.instance[i].page.sched; + else + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version sdma_v4_0_ip_block = { diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c index c364ef94cc36..f8408f88cd37 100644 --- a/drivers/gpu/drm/amd/amdgpu/si.c +++ b/drivers/gpu/drm/amd/amdgpu/si.c @@ -2057,13 +2057,13 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else amdgpu_device_ip_block_add(adev, &dce_v6_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_device_ip_block_add(adev, &si_dma_ip_block); /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; @@ -2071,13 +2071,14 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); else amdgpu_device_ip_block_add(adev, &dce_v6_4_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_device_ip_block_add(adev, &si_dma_ip_block); + /* amdgpu_device_ip_block_add(adev, &uvd_v3_1_ip_block); */ /* amdgpu_device_ip_block_add(adev, &vce_v1_0_ip_block); */ break; @@ -2085,11 +2086,11 @@ int si_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &si_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &si_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); + amdgpu_device_ip_block_add(adev, &si_dma_ip_block); amdgpu_device_ip_block_add(adev, &si_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v6_0_ip_block); - amdgpu_device_ip_block_add(adev, &si_dma_ip_block); break; default: BUG(); diff --git a/drivers/gpu/drm/amd/amdgpu/si_dma.c b/drivers/gpu/drm/amd/amdgpu/si_dma.c index b75d901ba3c4..b6e473134e19 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dma.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dma.c @@ -61,9 +61,11 @@ static void si_dma_ring_set_wptr(struct amdgpu_ring *ring) } static void si_dma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); /* The indirect buffer packet must end on an 8 DW boundary in the DMA ring. * Pad as necessary with NOPs. */ @@ -122,7 +124,7 @@ static void si_dma_stop(struct amdgpu_device *adev) if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, false); - ring->ready = false; + ring->sched.ready = false; } } @@ -175,13 +177,11 @@ static int si_dma_start(struct amdgpu_device *adev) WREG32(DMA_RB_WPTR + sdma_offsets[i], lower_32_bits(ring->wptr) << 2); WREG32(DMA_RB_CNTL + sdma_offsets[i], rb_cntl | DMA_RB_ENABLE); - ring->ready = true; + ring->sched.ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); @@ -209,21 +209,16 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 4); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; amdgpu_ring_write(ring, DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); @@ -238,15 +233,11 @@ static int si_dma_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } @@ -269,20 +260,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = DMA_PACKET(DMA_PACKET_WRITE, 0, 0, 0, 1); ib.ptr[1] = lower_32_bits(gpu_addr); @@ -295,21 +282,16 @@ static int si_dma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); @@ -502,12 +484,14 @@ static int si_dma_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* DMA0 trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 224, &adev->sdma.trap_irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 224, + &adev->sdma.trap_irq); if (r) return r; /* DMA1 trap event */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 244, &adev->sdma.trap_irq_1); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 244, + &adev->sdma.trap_irq); if (r) return r; @@ -649,26 +633,10 @@ static int si_dma_process_trap_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { - amdgpu_fence_process(&adev->sdma.instance[0].ring); - - return 0; -} - -static int si_dma_process_trap_irq_1(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - amdgpu_fence_process(&adev->sdma.instance[1].ring); - - return 0; -} - -static int si_dma_process_illegal_inst_irq(struct amdgpu_device *adev, - struct amdgpu_irq_src *source, - struct amdgpu_iv_entry *entry) -{ - DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + if (entry->src_id == 224) + amdgpu_fence_process(&adev->sdma.instance[0].ring); + else + amdgpu_fence_process(&adev->sdma.instance[1].ring); return 0; } @@ -786,21 +754,10 @@ static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs = { .process = si_dma_process_trap_irq, }; -static const struct amdgpu_irq_src_funcs si_dma_trap_irq_funcs_1 = { - .set = si_dma_set_trap_irq_state, - .process = si_dma_process_trap_irq_1, -}; - -static const struct amdgpu_irq_src_funcs si_dma_illegal_inst_irq_funcs = { - .process = si_dma_process_illegal_inst_irq, -}; - static void si_dma_set_irq_funcs(struct amdgpu_device *adev) { adev->sdma.trap_irq.num_types = AMDGPU_SDMA_IRQ_LAST; adev->sdma.trap_irq.funcs = &si_dma_trap_irq_funcs; - adev->sdma.trap_irq_1.funcs = &si_dma_trap_irq_funcs_1; - adev->sdma.illegal_inst_irq.funcs = &si_dma_illegal_inst_irq_funcs; } /** @@ -863,10 +820,8 @@ static const struct amdgpu_buffer_funcs si_dma_buffer_funcs = { static void si_dma_set_buffer_funcs(struct amdgpu_device *adev) { - if (adev->mman.buffer_funcs == NULL) { - adev->mman.buffer_funcs = &si_dma_buffer_funcs; - adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; - } + adev->mman.buffer_funcs = &si_dma_buffer_funcs; + adev->mman.buffer_funcs_ring = &adev->sdma.instance[0].ring; } static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { @@ -879,16 +834,16 @@ static const struct amdgpu_vm_pte_funcs si_dma_vm_pte_funcs = { static void si_dma_set_vm_pte_funcs(struct amdgpu_device *adev) { + struct drm_gpu_scheduler *sched; unsigned i; - if (adev->vm_manager.vm_pte_funcs == NULL) { - adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; - for (i = 0; i < adev->sdma.num_instances; i++) - adev->vm_manager.vm_pte_rings[i] = - &adev->sdma.instance[i].ring; - - adev->vm_manager.vm_pte_num_rings = adev->sdma.num_instances; + adev->vm_manager.vm_pte_funcs = &si_dma_vm_pte_funcs; + for (i = 0; i < adev->sdma.num_instances; i++) { + sched = &adev->sdma.instance[i].ring.sched; + adev->vm_manager.vm_pte_rqs[i] = + &sched->sched_rq[DRM_SCHED_PRIORITY_KERNEL]; } + adev->vm_manager.vm_pte_num_rqs = adev->sdma.num_instances; } const struct amdgpu_ip_block_version si_dma_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/si_dpm.c b/drivers/gpu/drm/amd/amdgpu/si_dpm.c index 1de96995e690..da58040fdbdc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_dpm.c +++ b/drivers/gpu/drm/amd/amdgpu/si_dpm.c @@ -7687,11 +7687,11 @@ static int si_dpm_sw_init(void *handle) int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 230, &adev->pm.dpm.thermal.irq); if (ret) return ret; - ret = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); + ret = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 231, &adev->pm.dpm.thermal.irq); if (ret) return ret; diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h index dc9e0e6b4558..790ba46eaebb 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_enums.h +++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h @@ -46,6 +46,26 @@ #define GRPH_ENDIAN_8IN16 1 #define GRPH_ENDIAN_8IN32 2 #define GRPH_ENDIAN_8IN64 3 +#define GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) +#define GRPH_RED_SEL_R 0 +#define GRPH_RED_SEL_G 1 +#define GRPH_RED_SEL_B 2 +#define GRPH_RED_SEL_A 3 +#define GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) +#define GRPH_GREEN_SEL_G 0 +#define GRPH_GREEN_SEL_B 1 +#define GRPH_GREEN_SEL_A 2 +#define GRPH_GREEN_SEL_R 3 +#define GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) +#define GRPH_BLUE_SEL_B 0 +#define GRPH_BLUE_SEL_A 1 +#define GRPH_BLUE_SEL_R 2 +#define GRPH_BLUE_SEL_G 3 +#define GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) +#define GRPH_ALPHA_SEL_A 0 +#define GRPH_ALPHA_SEL_R 1 +#define GRPH_ALPHA_SEL_G 2 +#define GRPH_ALPHA_SEL_B 3 #define GRPH_DEPTH(x) (((x) & 0x3) << 0) #define GRPH_DEPTH_8BPP 0 diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c index 60dad63098a2..2938fb9f17cc 100644 --- a/drivers/gpu/drm/amd/amdgpu/si_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c @@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev) return (wptr & adev->irq.ih.ptr_mask); } -/** - * si_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool si_ih_prescreen_iv(struct amdgpu_device *adev) -{ - /* Process all interrupts */ - return true; -} - static void si_ih_decode_iv(struct amdgpu_device *adev, struct amdgpu_iv_entry *entry) { @@ -142,7 +129,7 @@ static void si_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -170,7 +157,7 @@ static int si_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, false); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, false); if (r) return r; @@ -182,7 +169,7 @@ static int si_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); return 0; } @@ -301,15 +288,13 @@ static const struct amd_ip_funcs si_ih_ip_funcs = { static const struct amdgpu_ih_funcs si_ih_funcs = { .get_wptr = si_ih_get_wptr, - .prescreen_iv = si_ih_prescreen_iv, .decode_iv = si_ih_decode_iv, .set_rptr = si_ih_set_rptr }; static void si_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &si_ih_funcs; + adev->irq.ih_funcs = &si_ih_funcs; } const struct amdgpu_ip_block_version si_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/sid.h b/drivers/gpu/drm/amd/amdgpu/sid.h index c57eff159374..7cf12adb3915 100644 --- a/drivers/gpu/drm/amd/amdgpu/sid.h +++ b/drivers/gpu/drm/amd/amdgpu/sid.h @@ -2201,6 +2201,26 @@ # define EVERGREEN_GRPH_ENDIAN_8IN16 1 # define EVERGREEN_GRPH_ENDIAN_8IN32 2 # define EVERGREEN_GRPH_ENDIAN_8IN64 3 +#define EVERGREEN_GRPH_RED_CROSSBAR(x) (((x) & 0x3) << 4) +# define EVERGREEN_GRPH_RED_SEL_R 0 +# define EVERGREEN_GRPH_RED_SEL_G 1 +# define EVERGREEN_GRPH_RED_SEL_B 2 +# define EVERGREEN_GRPH_RED_SEL_A 3 +#define EVERGREEN_GRPH_GREEN_CROSSBAR(x) (((x) & 0x3) << 6) +# define EVERGREEN_GRPH_GREEN_SEL_G 0 +# define EVERGREEN_GRPH_GREEN_SEL_B 1 +# define EVERGREEN_GRPH_GREEN_SEL_A 2 +# define EVERGREEN_GRPH_GREEN_SEL_R 3 +#define EVERGREEN_GRPH_BLUE_CROSSBAR(x) (((x) & 0x3) << 8) +# define EVERGREEN_GRPH_BLUE_SEL_B 0 +# define EVERGREEN_GRPH_BLUE_SEL_A 1 +# define EVERGREEN_GRPH_BLUE_SEL_R 2 +# define EVERGREEN_GRPH_BLUE_SEL_G 3 +#define EVERGREEN_GRPH_ALPHA_CROSSBAR(x) (((x) & 0x3) << 10) +# define EVERGREEN_GRPH_ALPHA_SEL_A 0 +# define EVERGREEN_GRPH_ALPHA_SEL_R 1 +# define EVERGREEN_GRPH_ALPHA_SEL_G 2 +# define EVERGREEN_GRPH_ALPHA_SEL_B 3 #define EVERGREEN_D3VGA_CONTROL 0xf8 #define EVERGREEN_D4VGA_CONTROL 0xf9 diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c index 83f2717fcf81..8849b74078d6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.c +++ b/drivers/gpu/drm/amd/amdgpu/soc15.c @@ -65,6 +65,13 @@ #define mmMP0_MISC_LIGHT_SLEEP_CTRL 0x01ba #define mmMP0_MISC_LIGHT_SLEEP_CTRL_BASE_IDX 0 +/* for Vega20 register name change */ +#define mmHDP_MEM_POWER_CTRL 0x00d4 +#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK 0x00000001L +#define HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK 0x00000002L +#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK 0x00010000L +#define HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK 0x00020000L +#define mmHDP_MEM_POWER_CTRL_BASE_IDX 0 /* * Indirect registers accessor */ @@ -479,6 +486,11 @@ static const struct amdgpu_ip_block_version vega10_common_ip_block = .funcs = &soc15_common_ip_funcs, }; +static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) +{ + return adev->nbio_funcs->get_rev_id(adev); +} + int soc15_set_ip_blocks(struct amdgpu_device *adev) { /* Set IP register base before any HW register access */ @@ -495,10 +507,13 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return -EINVAL; } + if (adev->asic_type == CHIP_VEGA20) + adev->gmc.xgmi.supported = true; + if (adev->flags & AMD_IS_APU) adev->nbio_funcs = &nbio_v7_0_funcs; else if (adev->asic_type == CHIP_VEGA20) - adev->nbio_funcs = &nbio_v7_0_funcs; + adev->nbio_funcs = &nbio_v7_4_funcs; else adev->nbio_funcs = &nbio_v6_1_funcs; @@ -506,6 +521,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) adev->df_funcs = &df_v3_6_funcs; else adev->df_funcs = &df_v1_7_funcs; + + adev->rev_id = soc15_get_rev_id(adev); adev->nbio_funcs->detect_hw_virt(adev); if (amdgpu_sriov_vf(adev)) @@ -518,11 +535,14 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); - if (adev->asic_type != CHIP_VEGA20) { + if (adev->asic_type == CHIP_VEGA20) + amdgpu_device_ip_block_add(adev, &psp_v11_0_ip_block); + else amdgpu_device_ip_block_add(adev, &psp_v3_1_ip_block); - if (!amdgpu_sriov_vf(adev)) - amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); - } + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); + if (!amdgpu_sriov_vf(adev)) + amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); #if defined(CONFIG_DRM_AMD_DC) @@ -531,16 +551,18 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); - amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); - amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); + if (!(adev->asic_type == CHIP_VEGA20 && amdgpu_sriov_vf(adev))) { + amdgpu_device_ip_block_add(adev, &uvd_v7_0_ip_block); + amdgpu_device_ip_block_add(adev, &vce_v4_0_ip_block); + } break; case CHIP_RAVEN: amdgpu_device_ip_block_add(adev, &vega10_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v9_0_ip_block); amdgpu_device_ip_block_add(adev, &vega10_ih_ip_block); amdgpu_device_ip_block_add(adev, &psp_v10_0_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -550,8 +572,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) #else # warning "Enable CONFIG_DRM_AMD_DC for display support on SOC15." #endif - amdgpu_device_ip_block_add(adev, &gfx_v9_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v4_0_ip_block); amdgpu_device_ip_block_add(adev, &vcn_v1_0_ip_block); break; default: @@ -561,11 +581,6 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev) return 0; } -static uint32_t soc15_get_rev_id(struct amdgpu_device *adev) -{ - return adev->nbio_funcs->get_rev_id(adev); -} - static void soc15_flush_hdp(struct amdgpu_device *adev, struct amdgpu_ring *ring) { adev->nbio_funcs->hdp_flush(adev, ring); @@ -601,6 +616,24 @@ static const struct amdgpu_asic_funcs soc15_asic_funcs = .flush_hdp = &soc15_flush_hdp, .invalidate_hdp = &soc15_invalidate_hdp, .need_full_reset = &soc15_need_full_reset, + .init_doorbell_index = &vega10_doorbell_index_init, +}; + +static const struct amdgpu_asic_funcs vega20_asic_funcs = +{ + .read_disabled_bios = &soc15_read_disabled_bios, + .read_bios_from_rom = &soc15_read_bios_from_rom, + .read_register = &soc15_read_register, + .reset = &soc15_asic_reset, + .set_vga_state = &soc15_vga_set_state, + .get_xclk = &soc15_get_xclk, + .set_uvd_clocks = &soc15_set_uvd_clocks, + .set_vce_clocks = &soc15_set_vce_clocks, + .get_config_memsize = &soc15_get_config_memsize, + .flush_hdp = &soc15_flush_hdp, + .invalidate_hdp = &soc15_invalidate_hdp, + .need_full_reset = &soc15_need_full_reset, + .init_doorbell_index = &vega20_doorbell_index_init, }; static int soc15_common_early_init(void *handle) @@ -620,12 +653,11 @@ static int soc15_common_early_init(void *handle) adev->se_cac_rreg = &soc15_se_cac_rreg; adev->se_cac_wreg = &soc15_se_cac_wreg; - adev->asic_funcs = &soc15_asic_funcs; - adev->rev_id = soc15_get_rev_id(adev); adev->external_rev_id = 0xFF; switch (adev->asic_type) { case CHIP_VEGA10: + adev->asic_funcs = &soc15_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_RLC_LS | @@ -649,6 +681,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = 0x1; break; case CHIP_VEGA12: + adev->asic_funcs = &soc15_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | @@ -671,6 +704,7 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x14; break; case CHIP_VEGA20: + adev->asic_funcs = &vega20_asic_funcs; adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | AMD_CG_SUPPORT_GFX_MGLS | AMD_CG_SUPPORT_GFX_CGCG | @@ -693,35 +727,80 @@ static int soc15_common_early_init(void *handle) adev->external_rev_id = adev->rev_id + 0x28; break; case CHIP_RAVEN: - adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | - AMD_CG_SUPPORT_GFX_MGLS | - AMD_CG_SUPPORT_GFX_RLC_LS | - AMD_CG_SUPPORT_GFX_CP_LS | - AMD_CG_SUPPORT_GFX_3D_CGCG | - AMD_CG_SUPPORT_GFX_3D_CGLS | - AMD_CG_SUPPORT_GFX_CGCG | - AMD_CG_SUPPORT_GFX_CGLS | - AMD_CG_SUPPORT_BIF_MGCG | - AMD_CG_SUPPORT_BIF_LS | - AMD_CG_SUPPORT_HDP_MGCG | - AMD_CG_SUPPORT_HDP_LS | - AMD_CG_SUPPORT_DRM_MGCG | - AMD_CG_SUPPORT_DRM_LS | - AMD_CG_SUPPORT_ROM_MGCG | - AMD_CG_SUPPORT_MC_MGCG | - AMD_CG_SUPPORT_MC_LS | - AMD_CG_SUPPORT_SDMA_MGCG | - AMD_CG_SUPPORT_SDMA_LS | - AMD_CG_SUPPORT_VCN_MGCG; - - adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + adev->asic_funcs = &soc15_asic_funcs; + if (adev->rev_id >= 0x8) + adev->external_rev_id = adev->rev_id + 0x81; + else if (adev->pdev->device == 0x15d8) + adev->external_rev_id = adev->rev_id + 0x41; + else + adev->external_rev_id = 0x1; + + if (adev->rev_id >= 0x8) { + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCN_MGCG; + + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + } else if (adev->pdev->device == 0x15d8) { + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS; + + adev->pg_flags = AMD_PG_SUPPORT_SDMA | + AMD_PG_SUPPORT_MMHUB | + AMD_PG_SUPPORT_VCN | + AMD_PG_SUPPORT_VCN_DPG; + } else { + adev->cg_flags = AMD_CG_SUPPORT_GFX_MGCG | + AMD_CG_SUPPORT_GFX_MGLS | + AMD_CG_SUPPORT_GFX_RLC_LS | + AMD_CG_SUPPORT_GFX_CP_LS | + AMD_CG_SUPPORT_GFX_3D_CGCG | + AMD_CG_SUPPORT_GFX_3D_CGLS | + AMD_CG_SUPPORT_GFX_CGCG | + AMD_CG_SUPPORT_GFX_CGLS | + AMD_CG_SUPPORT_BIF_MGCG | + AMD_CG_SUPPORT_BIF_LS | + AMD_CG_SUPPORT_HDP_MGCG | + AMD_CG_SUPPORT_HDP_LS | + AMD_CG_SUPPORT_DRM_MGCG | + AMD_CG_SUPPORT_DRM_LS | + AMD_CG_SUPPORT_ROM_MGCG | + AMD_CG_SUPPORT_MC_MGCG | + AMD_CG_SUPPORT_MC_LS | + AMD_CG_SUPPORT_SDMA_MGCG | + AMD_CG_SUPPORT_SDMA_LS | + AMD_CG_SUPPORT_VCN_MGCG; + + adev->pg_flags = AMD_PG_SUPPORT_SDMA | AMD_PG_SUPPORT_VCN; + } if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) adev->pg_flags |= AMD_PG_SUPPORT_GFX_PG | AMD_PG_SUPPORT_CP | AMD_PG_SUPPORT_RLC_SMU_HS; - - adev->external_rev_id = 0x1; break; default: /* FIXME: not supported yet */ @@ -822,15 +901,33 @@ static void soc15_update_hdp_light_sleep(struct amdgpu_device *adev, bool enable { uint32_t def, data; - def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); + if (adev->asic_type == CHIP_VEGA20) { + def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL)); + + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK; + else + data &= ~(HDP_MEM_POWER_CTRL__IPH_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__IPH_MEM_POWER_LS_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_CTRL_EN_MASK | + HDP_MEM_POWER_CTRL__RC_MEM_POWER_LS_EN_MASK); + + if (def != data) + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_CTRL), data); + } else { + def = data = RREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS)); - if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) - data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; - else - data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; + if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS)) + data |= HDP_MEM_POWER_LS__LS_ENABLE_MASK; + else + data &= ~HDP_MEM_POWER_LS__LS_ENABLE_MASK; - if (def != data) - WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data); + if (def != data) + WREG32(SOC15_REG_OFFSET(HDP, 0, mmHDP_MEM_POWER_LS), data); + } } static void soc15_update_drm_clock_gating(struct amdgpu_device *adev, bool enable) diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.h b/drivers/gpu/drm/amd/amdgpu/soc15.h index 1f714b7af520..a66c8bfbbaa6 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15.h @@ -26,6 +26,7 @@ #include "nbio_v6_1.h" #include "nbio_v7_0.h" +#include "nbio_v7_4.h" #define SOC15_FLUSH_GPU_TLB_NUM_WREG 4 #define SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT 1 @@ -57,4 +58,6 @@ void soc15_program_register_sequence(struct amdgpu_device *adev, int vega10_reg_base_init(struct amdgpu_device *adev); int vega20_reg_base_init(struct amdgpu_device *adev); +void vega10_doorbell_index_init(struct amdgpu_device *adev); +void vega20_doorbell_index_init(struct amdgpu_device *adev); #endif diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h index 0942f492d2e1..49c262540940 100644 --- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h +++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h @@ -49,19 +49,46 @@ #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \ do { \ + uint32_t old_ = 0; \ uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ uint32_t loop = adev->usec_timeout; \ while ((tmp_ & (mask)) != (expected_value)) { \ - udelay(2); \ + if (old_ != tmp_) { \ + loop = adev->usec_timeout; \ + old_ = tmp_; \ + } else \ + udelay(1); \ tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \ loop--; \ if (!loop) { \ + DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \ + inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \ ret = -ETIMEDOUT; \ break; \ } \ } \ } while (0) +#define RREG32_SOC15_DPG_MODE(ip, inst, reg, mask, sram_sel) \ + ({ WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__MASK_EN_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + RREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA); }) + +#define WREG32_SOC15_DPG_MODE(ip, inst, reg, value, mask, sram_sel) \ + do { \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_DATA, value); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_MASK, mask); \ + WREG32_SOC15(ip, inst, mmUVD_DPG_LMA_CTL, \ + UVD_DPG_LMA_CTL__READ_WRITE_MASK | \ + ((adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg) \ + << UVD_DPG_LMA_CTL__READ_WRITE_ADDR__SHIFT) | \ + (sram_sel << UVD_DPG_LMA_CTL__SRAM_SEL__SHIFT)); \ + } while (0) + #endif diff --git a/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h new file mode 100644 index 000000000000..ac2c27b7630c --- /dev/null +++ b/drivers/gpu/drm/amd/amdgpu/ta_xgmi_if.h @@ -0,0 +1,130 @@ +/* + * Copyright 2018 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#ifndef _TA_XGMI_IF_H +#define _TA_XGMI_IF_H + +/* Responses have bit 31 set */ +#define RSP_ID_MASK (1U << 31) +#define RSP_ID(cmdId) (((uint32_t)(cmdId)) | RSP_ID_MASK) + +enum ta_command_xgmi { + TA_COMMAND_XGMI__INITIALIZE = 0x00, + TA_COMMAND_XGMI__GET_NODE_ID = 0x01, + TA_COMMAND_XGMI__GET_HIVE_ID = 0x02, + TA_COMMAND_XGMI__GET_GET_TOPOLOGY_INFO = 0x03, + TA_COMMAND_XGMI__SET_TOPOLOGY_INFO = 0x04 +}; + +/* XGMI related enumerations */ +/**********************************************************/; +enum ta_xgmi_connected_nodes { + TA_XGMI__MAX_CONNECTED_NODES = 64 +}; + +enum ta_xgmi_status { + TA_XGMI_STATUS__SUCCESS = 0x00, + TA_XGMI_STATUS__GENERIC_FAILURE = 0x01, + TA_XGMI_STATUS__NULL_POINTER = 0x02, + TA_XGMI_STATUS__INVALID_PARAMETER = 0x03, + TA_XGMI_STATUS__NOT_INITIALIZED = 0x04, + TA_XGMI_STATUS__INVALID_NODE_NUM = 0x05, + TA_XGMI_STATUS__INVALID_NODE_ID = 0x06, + TA_XGMI_STATUS__INVALID_TOPOLOGY = 0x07, + TA_XGMI_STATUS__FAILED_ID_GEN = 0x08, + TA_XGMI_STATUS__FAILED_TOPOLOGY_INIT = 0x09, + TA_XGMI_STATUS__SET_SHARING_ERROR = 0x0A +}; + +enum ta_xgmi_assigned_sdma_engine { + TA_XGMI_ASSIGNED_SDMA_ENGINE__NOT_ASSIGNED = -1, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA0 = 0, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA1 = 1, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA2 = 2, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA3 = 3, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA4 = 4, + TA_XGMI_ASSIGNED_SDMA_ENGINE__SDMA5 = 5 +}; + +/* input/output structures for XGMI commands */ +/**********************************************************/ +struct ta_xgmi_node_info { + uint64_t node_id; + uint8_t num_hops; + uint8_t is_sharing_enabled; + enum ta_xgmi_assigned_sdma_engine sdma_engine; +}; + +struct ta_xgmi_cmd_initialize_output { + uint32_t status; +}; + +struct ta_xgmi_cmd_get_node_id_output { + uint64_t node_id; +}; + +struct ta_xgmi_cmd_get_hive_id_output { + uint64_t hive_id; +}; + +struct ta_xgmi_cmd_get_topology_info_input { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +struct ta_xgmi_cmd_get_topology_info_output { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +struct ta_xgmi_cmd_set_topology_info_input { + uint32_t num_nodes; + struct ta_xgmi_node_info nodes[TA_XGMI__MAX_CONNECTED_NODES]; +}; + +/**********************************************************/ +/* Common input structure for XGMI callbacks */ +union ta_xgmi_cmd_input { + struct ta_xgmi_cmd_get_topology_info_input get_topology_info; + struct ta_xgmi_cmd_set_topology_info_input set_topology_info; +}; + +/* Common output structure for XGMI callbacks */ +union ta_xgmi_cmd_output { + struct ta_xgmi_cmd_initialize_output initialize; + struct ta_xgmi_cmd_get_node_id_output get_node_id; + struct ta_xgmi_cmd_get_hive_id_output get_hive_id; + struct ta_xgmi_cmd_get_topology_info_output get_topology_info; +}; +/**********************************************************/ + +struct ta_xgmi_shared_memory { + uint32_t cmd_id; + uint32_t resp_id; + enum ta_xgmi_status xgmi_status; + uint32_t reserved; + union ta_xgmi_cmd_input xgmi_in_message; + union ta_xgmi_cmd_output xgmi_out_message; +}; + +#endif //_TA_XGMI_IF_H diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c index 52853d8a8fdd..15da06ddeb75 100644 --- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c @@ -219,34 +219,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev) } /** - * tonga_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u16 pasid; - - switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) { - case 146: - case 147: - pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16; - if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid)) - return true; - break; - default: - /* Not a VM fault */ - return true; - } - - adev->irq.ih.rptr += 16; - return false; -} - -/** * tonga_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -266,7 +238,7 @@ static void tonga_ih_decode_iv(struct amdgpu_device *adev, dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]); dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - entry->client_id = AMDGPU_IH_CLIENTID_LEGACY; + entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY; entry->src_id = dw[0] & 0xff; entry->src_data[0] = dw[1] & 0xfffffff; entry->ring_id = dw[2] & 0xff; @@ -317,12 +289,12 @@ static int tonga_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 64 * 1024, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 64 * 1024, true); if (r) return r; adev->irq.ih.use_doorbell = true; - adev->irq.ih.doorbell_index = AMDGPU_DOORBELL_IH; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih; r = amdgpu_irq_init(adev); @@ -334,7 +306,7 @@ static int tonga_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); + amdgpu_ih_ring_fini(adev, &adev->irq.ih); amdgpu_irq_remove_domain(adev); return 0; @@ -506,15 +478,13 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = { static const struct amdgpu_ih_funcs tonga_ih_funcs = { .get_wptr = tonga_ih_get_wptr, - .prescreen_iv = tonga_ih_prescreen_iv, .decode_iv = tonga_ih_decode_iv, .set_rptr = tonga_ih_set_rptr }; static void tonga_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &tonga_ih_funcs; + adev->irq.ih_funcs = &tonga_ih_funcs; } const struct amdgpu_ip_block_version tonga_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c index 8a926d1df939..d69c8f6daaf8 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c @@ -108,7 +108,7 @@ static int uvd_v4_2_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 124, &adev->uvd.inst->irq); if (r) return r; @@ -116,16 +116,16 @@ static int uvd_v4_2_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); return r; @@ -162,12 +162,9 @@ static int uvd_v4_2_hw_init(void *handle) uvd_v4_2_enable_mgcg(adev, true); amdgpu_asic_set_uvd_clocks(adev, 10000, 10000); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -218,7 +215,7 @@ static int uvd_v4_2_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v4_2_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -484,11 +481,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -499,14 +494,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -519,8 +509,9 @@ static int uvd_v4_2_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v4_2_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_RBC_IB_BASE, 0)); amdgpu_ring_write(ring, ib->gpu_addr); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c index 50248059412e..ee8cd06ddc38 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c @@ -105,7 +105,7 @@ static int uvd_v5_0_sw_init(void *handle) int r; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; @@ -113,16 +113,16 @@ static int uvd_v5_0_sw_init(void *handle) if (r) return r; - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); return r; @@ -158,12 +158,9 @@ static int uvd_v5_0_hw_init(void *handle) uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v5_0_enable_mgcg(adev, true); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -215,7 +212,7 @@ static int uvd_v5_0_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v5_0_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -500,11 +497,8 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -515,14 +509,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -535,8 +524,9 @@ static int uvd_v5_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v5_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_64BIT_BAR_LOW, 0)); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c index 6ae82cc2e55e..d4f4a66f8324 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c @@ -175,11 +175,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) int r; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -189,14 +186,8 @@ static int uvd_v6_0_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed\n", - ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -274,7 +265,7 @@ err: */ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; @@ -310,11 +301,7 @@ static int uvd_v6_0_enc_get_destroy_msg(struct amdgpu_ring *ring, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) - r = amdgpu_job_submit_direct(job, ring, &f); - else - r = amdgpu_job_submit(job, &ring->adev->vce.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -340,31 +327,24 @@ static int uvd_v6_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = uvd_v6_0_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r); + if (r) goto error; - } - r = uvd_v6_0_enc_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r); + r = uvd_v6_0_enc_get_destroy_msg(ring, 1, &fence); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out.\n"); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); - } else { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; } + static int uvd_v6_0_early_init(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; @@ -393,14 +373,14 @@ static int uvd_v6_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* UVD TRAP */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_UVD_SYSTEM_MESSAGE, &adev->uvd.inst->irq); if (r) return r; /* UVD ENC TRAP */ if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, i + VISLANDS30_IV_SRCID_UVD_ENC_GEN_PURP, &adev->uvd.inst->irq); if (r) return r; } @@ -420,16 +400,16 @@ static int uvd_v6_0_sw_init(void *handle) DRM_INFO("UVD ENC is disabled\n"); } - r = amdgpu_uvd_resume(adev); - if (r) - return r; - ring = &adev->uvd.inst->ring; sprintf(ring->name, "uvd"); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0); if (r) return r; + r = amdgpu_uvd_resume(adev); + if (r) + return r; + if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; @@ -480,12 +460,9 @@ static int uvd_v6_0_hw_init(void *handle) uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE); uvd_v6_0_enable_mgcg(adev, true); - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -517,12 +494,9 @@ static int uvd_v6_0_hw_init(void *handle) if (uvd_v6_0_enc_support(adev)) { for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst->ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } } @@ -552,7 +526,7 @@ static int uvd_v6_0_hw_fini(void *handle) if (RREG32(mmUVD_STATUS) != 0) uvd_v6_0_stop(adev); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -973,11 +947,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) WREG32(mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n", - ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(mmUVD_CONTEXT_ID, 0)); amdgpu_ring_write(ring, 0xDEADBEEF); amdgpu_ring_commit(ring); @@ -988,14 +960,9 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", - ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -1008,9 +975,12 @@ static int uvd_v6_0_ring_test_ring(struct amdgpu_ring *ring) * Write ring commands to execute the indirect buffer */ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, PACKET0(mmUVD_LMI_RBC_IB_VMID, 0)); amdgpu_ring_write(ring, vmid); @@ -1031,8 +1001,12 @@ static void uvd_v6_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v6_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c index 9b7f8469bc5c..aef924026a28 100644 --- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c +++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c @@ -183,11 +183,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) return 0; r = amdgpu_ring_alloc(ring, 16); - if (r) { - DRM_ERROR("amdgpu: uvd enc failed to lock (%d)ring %d (%d).\n", - ring->me, ring->idx, r); + if (r) return r; - } amdgpu_ring_write(ring, HEVC_ENC_CMD_END); amdgpu_ring_commit(ring); @@ -197,14 +194,8 @@ static int uvd_v7_0_enc_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", - ring->me, ring->idx, i); - } else { - DRM_ERROR("amdgpu: (%d)ring %d test failed\n", - ring->me, ring->idx); + if (i >= adev->usec_timeout) r = -ETIMEDOUT; - } return r; } @@ -280,8 +271,8 @@ err: * * Close up a stream for HW test or if userspace failed to do so */ -int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, - bool direct, struct dma_fence **fence) +static int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, + struct dma_fence **fence) { const unsigned ib_size_dw = 16; struct amdgpu_job *job; @@ -317,11 +308,7 @@ int uvd_v7_0_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle, for (i = ib->length_dw; i < ib_size_dw; ++i) ib->ptr[i] = 0x0; - if (direct) - r = amdgpu_job_submit_direct(job, ring, &f); - else - r = amdgpu_job_submit(job, &ring->adev->vce.entity, - AMDGPU_FENCE_OWNER_UNDEFINED, &f); + r = amdgpu_job_submit_direct(job, ring, &f); if (r) goto err; @@ -347,27 +334,19 @@ static int uvd_v7_0_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = uvd_v7_0_enc_get_create_msg(ring, 1, NULL); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get create msg (%ld).\n", ring->me, r); + if (r) goto error; - } - r = uvd_v7_0_enc_get_destroy_msg(ring, 1, true, &fence); - if (r) { - DRM_ERROR("amdgpu: (%d)failed to get destroy ib (%ld).\n", ring->me, r); + r = uvd_v7_0_enc_get_destroy_msg(ring, 1, &fence); + if (r) goto error; - } r = dma_fence_wait_timeout(fence, false, timeout); - if (r == 0) { - DRM_ERROR("amdgpu: (%d)IB test timed out.\n", ring->me); + if (r == 0) r = -ETIMEDOUT; - } else if (r < 0) { - DRM_ERROR("amdgpu: (%d)fence wait failed (%ld).\n", ring->me, r); - } else { - DRM_DEBUG("ib test on (%d)ring %d succeeded\n", ring->me, ring->idx); + else if (r > 0) r = 0; - } + error: dma_fence_put(fence); return r; @@ -441,19 +420,22 @@ static int uvd_v7_0_sw_init(void *handle) adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].fw = adev->uvd.fw; adev->firmware.fw_size += ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + + if (adev->uvd.num_uvd_inst == UVD7_MAX_HW_INSTANCES_VEGA20) { + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].ucode_id = AMDGPU_UCODE_ID_UVD1; + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].fw = adev->uvd.fw; + adev->firmware.fw_size += + ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE); + } DRM_INFO("PSP loading UVD firmware\n"); } - r = amdgpu_uvd_resume(adev); - if (r) - return r; - for (j = 0; j < adev->uvd.num_uvd_inst; j++) { if (adev->uvd.harvest_config & (1 << j)) continue; if (!amdgpu_sriov_vf(adev)) { ring = &adev->uvd.inst[j].ring; - sprintf(ring->name, "uvd<%d>", j); + sprintf(ring->name, "uvd_%d", ring->me); r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); if (r) return r; @@ -461,7 +443,7 @@ static int uvd_v7_0_sw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - sprintf(ring->name, "uvd_enc%d<%d>", i, j); + sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i); if (amdgpu_sriov_vf(adev)) { ring->use_doorbell = true; @@ -469,9 +451,9 @@ static int uvd_v7_0_sw_init(void *handle) * sriov, so set unused location for other unused rings. */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING0_1 * 2; + ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_UVD_RING2_3 * 2 + 1; + ring->doorbell_index = adev->doorbell_index.uvd_vce.uvd_ring2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0); if (r) @@ -479,6 +461,10 @@ static int uvd_v7_0_sw_init(void *handle) } } + r = amdgpu_uvd_resume(adev); + if (r) + return r; + r = amdgpu_uvd_entity_init(adev); if (r) return r; @@ -537,12 +523,9 @@ static int uvd_v7_0_hw_init(void *handle) ring = &adev->uvd.inst[j].ring; if (!amdgpu_sriov_vf(adev)) { - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } r = amdgpu_ring_alloc(ring, 10); if (r) { @@ -579,12 +562,9 @@ static int uvd_v7_0_hw_init(void *handle) for (i = 0; i < adev->uvd.num_enc_rings; ++i) { ring = &adev->uvd.inst[j].ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } } done: @@ -616,7 +596,7 @@ static int uvd_v7_0_hw_fini(void *handle) for (i = 0; i < adev->uvd.num_uvd_inst; ++i) { if (adev->uvd.harvest_config & (1 << i)) continue; - adev->uvd.inst[i].ring.ready = false; + adev->uvd.inst[i].ring.sched.ready = false; } return 0; @@ -664,9 +644,14 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) continue; if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, - lower_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + i == 0 ? + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_lo: + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_lo); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, - upper_32_bits(adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].mc_addr)); + i == 0 ? + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD].tmr_mc_addr_hi: + adev->firmware.ucode[AMDGPU_UCODE_ID_UVD1].tmr_mc_addr_hi); + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, 0); offset = 0; } else { WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, @@ -674,10 +659,10 @@ static void uvd_v7_0_mc_resume(struct amdgpu_device *adev) WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, upper_32_bits(adev->uvd.inst[i].gpu_addr)); offset = size; + WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3); } - WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_OFFSET0, - AMDGPU_UVD_FIRMWARE_OFFSET >> 3); WREG32_SOC15(UVD, i, mmUVD_VCPU_CACHE_SIZE0, size); WREG32_SOC15(UVD, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, @@ -1227,11 +1212,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) WREG32_SOC15(UVD, ring->me, mmUVD_CONTEXT_ID, 0xCAFEDEAD); r = amdgpu_ring_alloc(ring, 3); - if (r) { - DRM_ERROR("amdgpu: (%d)cp failed to lock ring %d (%d).\n", - ring->me, ring->idx, r); + if (r) return r; - } + amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_CONTEXT_ID), 0)); amdgpu_ring_write(ring, 0xDEADBEEF); @@ -1243,14 +1226,9 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("(%d)ring test on %d succeeded in %d usecs\n", - ring->me, ring->idx, i); - } else { - DRM_ERROR("(%d)amdgpu: ring %d test failed (0x%08X)\n", - ring->me, ring->idx, tmp); - r = -EINVAL; - } + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; + return r; } @@ -1264,11 +1242,12 @@ static int uvd_v7_0_ring_test_ring(struct amdgpu_ring *ring) static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, uint32_t ib_idx) { + struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched); struct amdgpu_ib *ib = &p->job->ibs[ib_idx]; unsigned i; /* No patching necessary for the first instance */ - if (!p->ring->me) + if (!ring->me) return 0; for (i = 0; i < ib->length_dw; i += 2) { @@ -1291,10 +1270,12 @@ static int uvd_v7_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p, * Write ring commands to execute the indirect buffer */ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, ring->me, mmUVD_LMI_RBC_IB_VMID), 0)); @@ -1320,8 +1301,12 @@ static void uvd_v7_0_ring_emit_ib(struct amdgpu_ring *ring, * Write enc ring commands to execute the indirect buffer */ static void uvd_v7_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, HEVC_ENC_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c index 7eaa54ba016b..bed78a778e3f 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c @@ -417,7 +417,7 @@ static int vce_v2_0_sw_init(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; /* VCE */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 167, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, 167, &adev->vce.irq); if (r) return r; @@ -463,15 +463,11 @@ static int vce_v2_0_hw_init(void *handle) amdgpu_asic_set_vce_clocks(adev, 10000, 10000); vce_v2_0_enable_mgcg(adev, true, false); - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c index c8390f9adfd6..2668effadd27 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c @@ -37,7 +37,6 @@ #include "gca/gfx_8_0_d.h" #include "smu/smu_7_1_2_d.h" #include "smu/smu_7_1_2_sh_mask.h" -#include "gca/gfx_8_0_d.h" #include "gca/gfx_8_0_sh_mask.h" #include "ivsrcid/ivsrcid_vislands30.h" @@ -423,7 +422,7 @@ static int vce_v3_0_sw_init(void *handle) int r, i; /* VCE */ - r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); + r = amdgpu_irq_add_id(adev, AMDGPU_IRQ_CLIENTID_LEGACY, VISLANDS30_IV_SRCID_VCE_TRAP, &adev->vce.irq); if (r) return r; @@ -474,15 +473,10 @@ static int vce_v3_0_hw_init(void *handle) amdgpu_asic_set_vce_clocks(adev, 10000, 10000); - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; - for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); @@ -838,8 +832,12 @@ out: } static void vce_v3_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCE_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c index 2e4d1b5f6243..9fb34b7d8e03 100644 --- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c @@ -466,9 +466,9 @@ static int vce_v4_0_sw_init(void *handle) * so set unused location for other unused rings. */ if (i == 0) - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING0_1 * 2; + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring0_1 * 2; else - ring->doorbell_index = AMDGPU_DOORBELL64_VCE_RING2_3 * 2 + 1; + ring->doorbell_index = adev->doorbell_index.uvd_vce.vce_ring2_3 * 2 + 1; } r = amdgpu_ring_init(adev, ring, 512, &adev->vce.irq, 0); if (r) @@ -519,15 +519,10 @@ static int vce_v4_0_hw_init(void *handle) if (r) return r; - for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; - for (i = 0; i < adev->vce.num_rings; i++) { - r = amdgpu_ring_test_ring(&adev->vce.ring[i]); + r = amdgpu_ring_test_helper(&adev->vce.ring[i]); if (r) return r; - else - adev->vce.ring[i].ready = true; } DRM_INFO("VCE initialized successfully.\n"); @@ -549,7 +544,7 @@ static int vce_v4_0_hw_fini(void *handle) } for (i = 0; i < adev->vce.num_rings; i++) - adev->vce.ring[i].ready = false; + adev->vce.ring[i].sched.ready = false; return 0; } @@ -601,6 +596,7 @@ static int vce_v4_0_resume(void *handle) static void vce_v4_0_mc_resume(struct amdgpu_device *adev) { uint32_t offset, size; + uint64_t tmr_mc_addr; WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_CLOCK_GATING_A), 0, ~(1 << 16)); WREG32_P(SOC15_REG_OFFSET(VCE, 0, mmVCE_UENC_CLOCK_GATING), 0x1FF000, ~0xFF9FF000); @@ -613,21 +609,25 @@ static void vce_v4_0_mc_resume(struct amdgpu_device *adev) WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_SWAP_CNTL1), 0); WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VM_CTRL), 0); + offset = AMDGPU_VCE_FIRMWARE_OFFSET; + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + tmr_mc_addr = (uint64_t)(adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_hi) << 32 | + adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].tmr_mc_addr_lo; WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 8)); + (tmr_mc_addr >> 8)); WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), - (adev->firmware.ucode[AMDGPU_UCODE_ID_VCE].mc_addr >> 40) & 0xff); + (tmr_mc_addr >> 40) & 0xff); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), 0); } else { WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR0), (adev->vce.gpu_addr >> 8)); WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_64BIT_BAR0), (adev->vce.gpu_addr >> 40) & 0xff); + WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); } - offset = AMDGPU_VCE_FIRMWARE_OFFSET; size = VCE_V4_0_FW_SIZE; - WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_OFFSET0), offset & ~0x0f000000); WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_VCPU_CACHE_SIZE0), size); WREG32(SOC15_REG_OFFSET(VCE, 0, mmVCE_LMI_VCPU_CACHE_40BIT_BAR1), (adev->vce.gpu_addr >> 8)); @@ -946,9 +946,11 @@ static int vce_v4_0_set_powergating_state(void *handle, } #endif -static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) +static void vce_v4_0_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job, + struct amdgpu_ib *ib, bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCE_CMD_IB_VM); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c index 072371ef5975..89bb2fef90eb 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c @@ -37,12 +37,18 @@ #include "ivsrcid/vcn/irqsrcs_vcn_1_0.h" +#define mmUVD_RBC_XX_IB_REG_CHECK 0x05ab +#define mmUVD_RBC_XX_IB_REG_CHECK_BASE_IDX 1 +#define mmUVD_REG_XX_MASK 0x05ac +#define mmUVD_REG_XX_MASK_BASE_IDX 1 + static int vcn_v1_0_stop(struct amdgpu_device *adev); static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev); static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev); static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr); +static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state); /** * vcn_v1_0_early_init - set function pointers @@ -171,34 +177,27 @@ static int vcn_v1_0_hw_init(void *handle) struct amdgpu_ring *ring = &adev->vcn.ring_dec; int i, r; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } for (i = 0; i < adev->vcn.num_enc_rings; ++i) { ring = &adev->vcn.ring_enc[i]; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + ring->sched.ready = true; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } } ring = &adev->vcn.ring_jpeg; - ring->ready = true; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) goto done; - } done: if (!r) - DRM_INFO("VCN decode and encode initialized successfully.\n"); + DRM_INFO("VCN decode and encode initialized successfully(under %s).\n", + (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode"); return r; } @@ -215,10 +214,11 @@ static int vcn_v1_0_hw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; struct amdgpu_ring *ring = &adev->vcn.ring_dec; - if (RREG32_SOC15(VCN, 0, mmUVD_STATUS)) - vcn_v1_0_stop(adev); + if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) || + RREG32_SOC15(VCN, 0, mmUVD_STATUS)) + vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE); - ring->ready = false; + ring->sched.ready = false; return 0; } @@ -266,17 +266,18 @@ static int vcn_v1_0_resume(void *handle) } /** - * vcn_v1_0_mc_resume - memory controller programming + * vcn_v1_0_mc_resume_spg_mode - memory controller programming * * @adev: amdgpu_device pointer * * Let the VCN memory controller know it's offsets */ -static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) +static void vcn_v1_0_mc_resume_spg_mode(struct amdgpu_device *adev) { uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); uint32_t offset; + /* cache window 0: fw */ if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo)); @@ -296,20 +297,21 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size); + /* cache window 1: stack */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, lower_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, upper_32_bits(adev->vcn.gpu_addr + offset)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE); + /* cache window 2: context */ WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, - lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, - upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE)); + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE)); WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0); - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, - AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40)); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE); WREG32_SOC15(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, adev->gfx.config.gb_addr_config); @@ -317,6 +319,96 @@ static void vcn_v1_0_mc_resume(struct amdgpu_device *adev) adev->gfx.config.gb_addr_config); WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); + WREG32_SOC15(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config); +} + +static void vcn_v1_0_mc_resume_dpg_mode(struct amdgpu_device *adev) +{ + uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4); + uint32_t offset; + + /* cache window 0: fw */ + if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) { + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0, + 0xFFFFFFFF, 0); + offset = 0; + } else { + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr), 0xFFFFFFFF, 0); + offset = size; + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, + AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0xFFFFFFFF, 0); + } + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size, 0xFFFFFFFF, 0); + + /* cache window 1: stack */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset), 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0, + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE, + 0xFFFFFFFF, 0); + + /* cache window 2: context */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW, + lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH, + upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), + 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE, + 0xFFFFFFFF, 0); + + /* VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DB_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_UDEC_DBW_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_CURR_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_RECON1_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MIF_REF_UV_ADDR_CONFIG, + adev->gfx.config.gb_addr_config, 0xFFFFFFFF, 0); } /** @@ -519,6 +611,60 @@ static void vcn_v1_0_enable_clock_gating(struct amdgpu_device *adev) WREG32_SOC15(VCN, 0, mmUVD_SUVD_CGC_CTRL, data); } +static void vcn_v1_0_clock_gating_dpg_mode(struct amdgpu_device *adev, uint8_t sram_sel) +{ + uint32_t reg_data = 0; + + /* disable JPEG CGC */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << JPEG_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << JPEG_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << JPEG_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmJPEG_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + + /* enable sw clock gating control */ + if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG) + reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + else + reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT; + reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT; + reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT; + reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK | + UVD_CGC_CTRL__UDEC_CM_MODE_MASK | + UVD_CGC_CTRL__UDEC_IT_MODE_MASK | + UVD_CGC_CTRL__UDEC_DB_MODE_MASK | + UVD_CGC_CTRL__UDEC_MP_MODE_MASK | + UVD_CGC_CTRL__SYS_MODE_MASK | + UVD_CGC_CTRL__UDEC_MODE_MASK | + UVD_CGC_CTRL__MPEG2_MODE_MASK | + UVD_CGC_CTRL__REGS_MODE_MASK | + UVD_CGC_CTRL__RBC_MODE_MASK | + UVD_CGC_CTRL__LMI_MC_MODE_MASK | + UVD_CGC_CTRL__LMI_UMC_MODE_MASK | + UVD_CGC_CTRL__IDCT_MODE_MASK | + UVD_CGC_CTRL__MPRD_MODE_MASK | + UVD_CGC_CTRL__MPC_MODE_MASK | + UVD_CGC_CTRL__LBSI_MODE_MASK | + UVD_CGC_CTRL__LRBBM_MODE_MASK | + UVD_CGC_CTRL__WCB_MODE_MASK | + UVD_CGC_CTRL__VCPU_MODE_MASK | + UVD_CGC_CTRL__SCPU_MODE_MASK); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_CTRL, reg_data, 0xFFFFFFFF, sram_sel); + + /* turn off clock gating */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_CGC_GATE, 0, 0xFFFFFFFF, sram_sel); + + /* turn on SUVD clock gating */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_GATE, 1, 0xFFFFFFFF, sram_sel); + + /* turn on sw mode in UVD_SUVD_CGC_CTRL */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SUVD_CGC_CTRL, 0, 0xFFFFFFFF, sram_sel); +} + static void vcn_1_0_disable_static_power_gating(struct amdgpu_device *adev) { uint32_t data = 0; @@ -614,7 +760,7 @@ static void vcn_1_0_enable_static_power_gating(struct amdgpu_device *adev) * * Setup and start the VCN block */ -static int vcn_v1_0_start(struct amdgpu_device *adev) +static int vcn_v1_0_start_spg_mode(struct amdgpu_device *adev) { struct amdgpu_ring *ring = &adev->vcn.ring_dec; uint32_t rb_bufsz, tmp; @@ -625,41 +771,24 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) lmi_swap_cntl = 0; vcn_1_0_disable_static_power_gating(adev); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); + /* disable clock gating */ vcn_v1_0_disable_clock_gating(adev); - vcn_v1_0_mc_resume(adev); - /* disable interupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), 0, ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); - - /* put LMI, VCPU, RBC etc... into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__LMI_SOFT_RESET_MASK | - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | - UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK | - UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | - UVD_SOFT_RESET__CSM_SOFT_RESET_MASK | - UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | - UVD_SOFT_RESET__TAP_SOFT_RESET_MASK | - UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); - mdelay(5); - /* initialize VCN memory controller */ - WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, - (0x40 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | - UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | - UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | - UVD_LMI_CTRL__REQ_MODE_MASK | - 0x00100000L); + tmp = RREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL); + WREG32_SOC15(UVD, 0, mmUVD_LMI_CTRL, tmp | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK); #ifdef __BIG_ENDIAN /* swap (8 in 32) RB and IB */ @@ -667,41 +796,61 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) #endif WREG32_SOC15(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, 0x40c2040); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB1, 0x0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_ALU, 0); - WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, 0x88); + tmp = RREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL); + tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK; + tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT; + WREG32_SOC15(UVD, 0, mmUVD_MPC_CNTL, tmp); + + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT))); + + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT))); + + WREG32_SOC15(UVD, 0, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT))); + + vcn_v1_0_mc_resume_spg_mode(adev); - /* take all subblocks out of reset, except VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + WREG32_SOC15(UVD, 0, mmUVD_REG_XX_MASK, 0x10); + WREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, + RREG32_SOC15(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK) | 0x3); /* enable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, - UVD_VCPU_CNTL__CLK_EN_MASK); + WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, UVD_VCPU_CNTL__CLK_EN_MASK); + + /* boot up the VCPU */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), 0, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); /* enable UMC */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - /* boot up the VCPU */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, 0); - mdelay(10); + tmp = RREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET); + tmp &= ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK; + tmp &= ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK; + WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, tmp); for (i = 0; i < 10; ++i) { uint32_t status; for (j = 0; j < 100; ++j) { status = RREG32_SOC15(UVD, 0, mmUVD_STATUS); - if (status & 2) + if (status & UVD_STATUS__IDLE) break; mdelay(10); } r = 0; - if (status & 2) + if (status & UVD_STATUS__IDLE) break; DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n"); @@ -721,19 +870,22 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) } /* enable master interrupt */ WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_MASTINT_EN), - (UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK), - ~(UVD_MASTINT_EN__VCPU_EN_MASK|UVD_MASTINT_EN__SYS_EN_MASK)); + UVD_MASTINT_EN__VCPU_EN_MASK, ~UVD_MASTINT_EN__VCPU_EN_MASK); - /* clear the bit 4 of VCN_STATUS */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_STATUS), 0, - ~(2 << UVD_STATUS__VCPU_REPORT__SHIFT)); + /* enable system interrupt for JRBC, TODO: move to set interrupt*/ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SYS_INT_EN), + UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, + ~UVD_SYS_INT_EN__UVD_JRBC_EN_MASK); + + /* clear the busy bit of UVD_STATUS */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_STATUS) & ~UVD_STATUS__UVD_BUSY; + WREG32_SOC15(UVD, 0, mmUVD_STATUS, tmp); /* force RBC into idle state */ rb_bufsz = order_base_2(ring->ring_size); tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); - tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_WPTR_POLL_EN, 0); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); @@ -754,6 +906,8 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) /* Initialize the ring buffer's read and write pointers */ WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0); + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); @@ -777,12 +931,13 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) ring = &adev->vcn.ring_jpeg; WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_VMID, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, (0x00000001L | 0x00000002L)); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_NO_FETCH_MASK | + UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_LOW, lower_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_LMI_JRBC_RB_64BIT_BAR_HIGH, upper_32_bits(ring->gpu_addr)); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_RPTR, 0); WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR, 0); - WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, 0x00000002L); + WREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_CNTL, UVD_JRBC_RB_CNTL__RB_RPTR_WR_EN_MASK); /* initialize wptr */ ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); @@ -794,6 +949,167 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) return 0; } +static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev) +{ + struct amdgpu_ring *ring = &adev->vcn.ring_dec; + uint32_t rb_bufsz, tmp; + uint32_t lmi_swap_cntl; + + /* disable byte swapping */ + lmi_swap_cntl = 0; + + vcn_1_0_enable_static_power_gating(adev); + + /* enable dynamic power gating mode */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS); + tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK; + tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK; + WREG32_SOC15(UVD, 0, mmUVD_POWER_STATUS, tmp); + + /* enable clock gating */ + vcn_v1_0_clock_gating_dpg_mode(adev, 0); + + /* enable VCPU clock */ + tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT); + tmp |= UVD_VCPU_CNTL__CLK_EN_MASK; + tmp |= UVD_VCPU_CNTL__MIF_WR_LOW_THRESHOLD_BP_MASK; + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_VCPU_CNTL, tmp, 0xFFFFFFFF, 0); + + /* disable interupt */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + 0, UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + /* initialize VCN memory controller */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + 0x00100000L, 0xFFFFFFFF, 0); + +#ifdef __BIG_ENDIAN + /* swap (8 in 32) RB and IB */ + lmi_swap_cntl = 0xa; +#endif + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl, 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_CNTL, + 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXA0, + ((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUXB0, + ((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) | + (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) | + (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0xFFFFFFFF, 0); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MPC_SET_MUX, + ((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) | + (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) | + (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0xFFFFFFFF, 0); + + vcn_v1_0_mc_resume_dpg_mode(adev); + + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_REG_XX_MASK, 0x10, 0xFFFFFFFF, 0); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_RBC_XX_IB_REG_CHECK, 0x3, 0xFFFFFFFF, 0); + + /* boot up the VCPU */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SOFT_RESET, 0, 0xFFFFFFFF, 0); + + /* enable UMC */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL2, + 0x1F << UVD_LMI_CTRL2__RE_OFLD_MIF_WR_REQ_NUM__SHIFT, + 0xFFFFFFFF, 0); + + /* enable master interrupt */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_MASTINT_EN, + UVD_MASTINT_EN__VCPU_EN_MASK, UVD_MASTINT_EN__VCPU_EN_MASK, 0); + + vcn_v1_0_clock_gating_dpg_mode(adev, 1); + /* setup mmUVD_LMI_CTRL */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_LMI_CTRL, + (8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) | + UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK | + UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK | + UVD_LMI_CTRL__REQ_MODE_MASK | + UVD_LMI_CTRL__CRC_RESET_MASK | + UVD_LMI_CTRL__MASK_MC_URGENT_MASK | + 0x00100000L, 0xFFFFFFFF, 1); + + tmp = adev->gfx.config.gb_addr_config; + /* setup VCN global tiling registers */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_JPEG_UV_ADDR_CONFIG, tmp, 0xFFFFFFFF, 1); + + /* enable System Interrupt for JRBC */ + WREG32_SOC15_DPG_MODE(UVD, 0, mmUVD_SYS_INT_EN, + UVD_SYS_INT_EN__UVD_JRBC_EN_MASK, 0xFFFFFFFF, 1); + + /* force RBC into idle state */ + rb_bufsz = order_base_2(ring->ring_size); + tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1); + tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, tmp); + + /* set the write pointer delay */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR_CNTL, 0); + + /* set the wb address */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR_ADDR, + (upper_32_bits(ring->gpu_addr) >> 2)); + + /* programm the RB_BASE for ring buffer */ + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW, + lower_32_bits(ring->gpu_addr)); + WREG32_SOC15(UVD, 0, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH, + upper_32_bits(ring->gpu_addr)); + + /* Initialize the ring buffer's read and write pointers */ + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR, 0); + + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, 0); + + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_RPTR); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, + lower_32_bits(ring->wptr)); + + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0, + ~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK); + + /* initialize JPEG wptr */ + ring = &adev->vcn.ring_jpeg; + ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + + /* copy patch commands to the jpeg ring */ + vcn_v1_0_jpeg_ring_set_patch_ring(ring, + (ring->wptr + ring->max_dw * amdgpu_sched_hw_submission)); + + return 0; +} + +static int vcn_v1_0_start(struct amdgpu_device *adev) +{ + int r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + r = vcn_v1_0_start_dpg_mode(adev); + else + r = vcn_v1_0_start_spg_mode(adev); + return r; +} + /** * vcn_v1_0_stop - stop VCN block * @@ -801,41 +1117,98 @@ static int vcn_v1_0_start(struct amdgpu_device *adev) * * stop the VCN block */ -static int vcn_v1_0_stop(struct amdgpu_device *adev) +static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev) { - /* force RBC into idle state */ - WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_CNTL, 0x11010101); + int ret_code, tmp; - /* Stall UMC and register bus before resetting VCPU */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), - UVD_LMI_CTRL2__STALL_ARB_UMC_MASK, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); - mdelay(1); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, ret_code); + + tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK | + UVD_LMI_STATUS__READ_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_MASK | + UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code); /* put VCPU into reset */ - WREG32_SOC15(UVD, 0, mmUVD_SOFT_RESET, - UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); - mdelay(5); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK); + + tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK | + UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_LMI_STATUS, tmp, tmp, ret_code); /* disable VCPU clock */ - WREG32_SOC15(UVD, 0, mmUVD_VCPU_CNTL, 0x0); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_VCPU_CNTL), 0, + ~UVD_VCPU_CNTL__CLK_EN_MASK); - /* Unstall UMC and register bus */ - WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_CTRL2), 0, - ~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK); + /* reset LMI UMC/LMI */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK); - WREG32_SOC15(VCN, 0, mmUVD_STATUS, 0); + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_SOFT_RESET), + UVD_SOFT_RESET__LMI_SOFT_RESET_MASK, + ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK); + + WREG32_SOC15(UVD, 0, mmUVD_STATUS, 0); vcn_v1_0_enable_clock_gating(adev); vcn_1_0_enable_static_power_gating(adev); return 0; } +static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev) +{ + int ret_code = 0; + uint32_t tmp; + + /* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */ + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* wait for read ptr to be equal to write ptr */ + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR); + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF; + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code); + + SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS, + UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF, + UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code); + + /* disable dynamic power gating mode */ + WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0, + ~UVD_POWER_STATUS__UVD_PG_MODE_MASK); + + return 0; +} + +static int vcn_v1_0_stop(struct amdgpu_device *adev) +{ + int r; + + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + r = vcn_v1_0_stop_dpg_mode(adev); + else + r = vcn_v1_0_stop_spg_mode(adev); + + return r; +} + static bool vcn_v1_0_is_idle(void *handle) { struct amdgpu_device *adev = (struct amdgpu_device *)handle; - return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == 0x2); + return (RREG32_SOC15(VCN, 0, mmUVD_STATUS) == UVD_STATUS__IDLE); } static int vcn_v1_0_wait_for_idle(void *handle) @@ -843,7 +1216,8 @@ static int vcn_v1_0_wait_for_idle(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; int ret = 0; - SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, 0x2, 0x2, ret); + SOC15_WAIT_ON_RREG(VCN, 0, mmUVD_STATUS, UVD_STATUS__IDLE, + UVD_STATUS__IDLE, ret); return ret; } @@ -905,6 +1279,10 @@ static void vcn_v1_0_dec_ring_set_wptr(struct amdgpu_ring *ring) { struct amdgpu_device *adev = ring->adev; + if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) + WREG32_SOC15(UVD, 0, mmUVD_SCRATCH2, + lower_32_bits(ring->wptr) | 0x80000000); + WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr)); } @@ -991,10 +1369,12 @@ static void vcn_v1_0_dec_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 * Write ring commands to execute the indirect buffer */ static void vcn_v1_0_dec_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKET0(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_RBC_IB_VMID), 0)); @@ -1149,8 +1529,12 @@ static void vcn_v1_0_enc_ring_insert_end(struct amdgpu_ring *ring) * Write enc ring commands to execute the indirect buffer */ static void vcn_v1_0_enc_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, unsigned int vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); + amdgpu_ring_write(ring, VCN_ENC_CMD_IB); amdgpu_ring_write(ring, vmid); amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr)); @@ -1335,6 +1719,10 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE0)); amdgpu_ring_write(ring, 0x1); + + /* emit trap */ + amdgpu_ring_write(ring, PACKETJ(0, 0, 0, PACKETJ_TYPE7)); + amdgpu_ring_write(ring, 0); } /** @@ -1346,10 +1734,12 @@ static void vcn_v1_0_jpeg_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u6 * Write ring commands to execute the indirect buffer. */ static void vcn_v1_0_jpeg_ring_emit_ib(struct amdgpu_ring *ring, - struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + struct amdgpu_job *job, + struct amdgpu_ib *ib, + bool ctx_switch) { struct amdgpu_device *adev = ring->adev; + unsigned vmid = AMDGPU_JOB_GET_VMID(job); amdgpu_ring_write(ring, PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_LMI_JRBC_IB_VMID), 0, 0, PACKETJ_TYPE0)); @@ -1624,12 +2014,20 @@ static int vcn_v1_0_set_powergating_state(void *handle, * revisit this when there is a cleaner line between * the smc and the hw blocks */ + int ret; struct amdgpu_device *adev = (struct amdgpu_device *)handle; + if(state == adev->vcn.cur_state) + return 0; + if (state == AMD_PG_STATE_GATE) - return vcn_v1_0_stop(adev); + ret = vcn_v1_0_stop(adev); else - return vcn_v1_0_start(adev); + ret = vcn_v1_0_start(adev); + + if(!ret) + adev->vcn.cur_state = state; + return ret; } static const struct amd_ip_funcs vcn_v1_0_ip_funcs = { @@ -1729,10 +2127,10 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { 6 + 6 + /* hdp invalidate / flush */ SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 + SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 + - 8 + /* vcn_v1_0_dec_ring_emit_vm_flush */ - 14 + 14 + /* vcn_v1_0_dec_ring_emit_fence x2 vm fence */ + 8 + /* vcn_v1_0_jpeg_ring_emit_vm_flush */ + 26 + 26 + /* vcn_v1_0_jpeg_ring_emit_fence x2 vm fence */ 6, - .emit_ib_size = 22, /* vcn_v1_0_dec_ring_emit_ib */ + .emit_ib_size = 22, /* vcn_v1_0_jpeg_ring_emit_ib */ .emit_ib = vcn_v1_0_jpeg_ring_emit_ib, .emit_fence = vcn_v1_0_jpeg_ring_emit_fence, .emit_vm_flush = vcn_v1_0_jpeg_ring_emit_vm_flush, @@ -1746,6 +2144,7 @@ static const struct amdgpu_ring_funcs vcn_v1_0_jpeg_ring_vm_funcs = { .end_use = amdgpu_vcn_ring_end_use, .emit_wreg = vcn_v1_0_jpeg_ring_emit_wreg, .emit_reg_wait = vcn_v1_0_jpeg_ring_emit_reg_wait, + .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper, }; static void vcn_v1_0_set_dec_ring_funcs(struct amdgpu_device *adev) @@ -1777,7 +2176,7 @@ static const struct amdgpu_irq_src_funcs vcn_v1_0_irq_funcs = { static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev) { - adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 1; + adev->vcn.irq.num_types = adev->vcn.num_enc_rings + 2; adev->vcn.irq.funcs = &vcn_v1_0_irq_funcs; } diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c index 5ae5ed2e62d6..2c250b01a903 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c @@ -129,7 +129,7 @@ static int vega10_ih_irq_init(struct amdgpu_device *adev) else wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4); WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off)); - WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF); + WREG32_SOC15(OSSSYS, 0, mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFFFF); /* set rptr, wptr to 0 */ WREG32_SOC15(OSSSYS, 0, mmIH_RB_RPTR, 0); @@ -220,89 +220,6 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev) } /** - * vega10_ih_prescreen_iv - prescreen an interrupt vector - * - * @adev: amdgpu_device pointer - * - * Returns true if the interrupt vector should be further processed. - */ -static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev) -{ - u32 ring_index = adev->irq.ih.rptr >> 2; - u32 dw0, dw3, dw4, dw5; - u16 pasid; - u64 addr, key; - struct amdgpu_vm *vm; - int r; - - dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]); - dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]); - dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]); - dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]); - - /* Filter retry page faults, let only the first one pass. If - * there are too many outstanding faults, ignore them until - * some faults get cleared. - */ - switch (dw0 & 0xff) { - case SOC15_IH_CLIENTID_VMC: - case SOC15_IH_CLIENTID_UTCL2: - break; - default: - /* Not a VM fault */ - return true; - } - - pasid = dw3 & 0xffff; - /* No PASID, can't identify faulting process */ - if (!pasid) - return true; - - /* Not a retry fault, check fault credit */ - if (!(dw5 & 0x80)) { - if (!amdgpu_vm_pasid_fault_credit(adev, pasid)) - goto ignore_iv; - return true; - } - - addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12); - key = AMDGPU_VM_FAULT(pasid, addr); - r = amdgpu_ih_add_fault(adev, key); - - /* Hash table is full or the fault is already being processed, - * ignore further page faults - */ - if (r != 0) - goto ignore_iv; - - /* Track retry faults in per-VM fault FIFO. */ - spin_lock(&adev->vm_manager.pasid_lock); - vm = idr_find(&adev->vm_manager.pasid_idr, pasid); - if (!vm) { - /* VM not found, process it normally */ - spin_unlock(&adev->vm_manager.pasid_lock); - amdgpu_ih_clear_fault(adev, key); - return true; - } - /* No locking required with single writer and single reader */ - r = kfifo_put(&vm->faults, key); - if (!r) { - /* FIFO is full. Ignore it until there is space */ - spin_unlock(&adev->vm_manager.pasid_lock); - amdgpu_ih_clear_fault(adev, key); - goto ignore_iv; - } - spin_unlock(&adev->vm_manager.pasid_lock); - - /* It's the first fault for this address, process it normally */ - return true; - -ignore_iv: - adev->irq.ih.rptr += 32; - return false; -} - -/** * vega10_ih_decode_iv - decode an interrupt vector * * @adev: amdgpu_device pointer @@ -379,20 +296,12 @@ static int vega10_ih_sw_init(void *handle) int r; struct amdgpu_device *adev = (struct amdgpu_device *)handle; - r = amdgpu_ih_ring_init(adev, 256 * 1024, true); + r = amdgpu_ih_ring_init(adev, &adev->irq.ih, 256 * 1024, true); if (r) return r; adev->irq.ih.use_doorbell = true; - adev->irq.ih.doorbell_index = AMDGPU_DOORBELL64_IH << 1; - - adev->irq.ih.faults = kmalloc(sizeof(*adev->irq.ih.faults), GFP_KERNEL); - if (!adev->irq.ih.faults) - return -ENOMEM; - INIT_CHASH_TABLE(adev->irq.ih.faults->hash, - AMDGPU_PAGEFAULT_HASH_BITS, 8, 0); - spin_lock_init(&adev->irq.ih.faults->lock); - adev->irq.ih.faults->count = 0; + adev->irq.ih.doorbell_index = adev->doorbell_index.ih << 1; r = amdgpu_irq_init(adev); @@ -404,10 +313,7 @@ static int vega10_ih_sw_fini(void *handle) struct amdgpu_device *adev = (struct amdgpu_device *)handle; amdgpu_irq_fini(adev); - amdgpu_ih_ring_fini(adev); - - kfree(adev->irq.ih.faults); - adev->irq.ih.faults = NULL; + amdgpu_ih_ring_fini(adev, &adev->irq.ih); return 0; } @@ -497,15 +403,13 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = { static const struct amdgpu_ih_funcs vega10_ih_funcs = { .get_wptr = vega10_ih_get_wptr, - .prescreen_iv = vega10_ih_prescreen_iv, .decode_iv = vega10_ih_decode_iv, .set_rptr = vega10_ih_set_rptr }; static void vega10_ih_set_interrupt_funcs(struct amdgpu_device *adev) { - if (adev->irq.ih_funcs == NULL) - adev->irq.ih_funcs = &vega10_ih_funcs; + adev->irq.ih_funcs = &vega10_ih_funcs; } const struct amdgpu_ip_block_version vega10_ih_ip_block = diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c index c5c9b2bc190d..422674bb3cdf 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega10_reg_init.c @@ -56,4 +56,32 @@ int vega10_reg_base_init(struct amdgpu_device *adev) return 0; } +void vega10_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_DOORBELL64_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL64_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL64_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL64_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL64_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL64_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL64_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL64_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL64_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_DOORBELL64_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_DOORBELL64_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL64_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL64_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL64_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_DOORBELL64_IH; + adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_DOORBELL64_UVD_RING0_1; + adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_DOORBELL64_UVD_RING2_3; + adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_DOORBELL64_UVD_RING4_5; + adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_DOORBELL64_UVD_RING6_7; + adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_DOORBELL64_VCE_RING0_1; + adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_DOORBELL64_VCE_RING2_3; + adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_DOORBELL64_VCE_RING4_5; + adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_DOORBELL64_VCE_RING6_7; + /* In unit of dword doorbell */ + adev->doorbell_index.max_assignment = AMDGPU_DOORBELL64_MAX_ASSIGNMENT << 1; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c index 2d4473557b0d..edce413fda9a 100644 --- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c +++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c @@ -49,8 +49,42 @@ int vega20_reg_base_init(struct amdgpu_device *adev) adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i])); adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i])); adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i])); + adev->reg_offset[CLK_HWIP][i] = (uint32_t *)(&(CLK_BASE.instance[i])); } return 0; } +void vega20_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_VEGA20_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_VEGA20_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_VEGA20_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_VEGA20_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_VEGA20_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_VEGA20_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_VEGA20_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_VEGA20_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_VEGA20_DOORBELL_MEC_RING7; + adev->doorbell_index.userqueue_start = AMDGPU_VEGA20_DOORBELL_USERQUEUE_START; + adev->doorbell_index.userqueue_end = AMDGPU_VEGA20_DOORBELL_USERQUEUE_END; + adev->doorbell_index.gfx_ring0 = AMDGPU_VEGA20_DOORBELL_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.sdma_engine2 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE2; + adev->doorbell_index.sdma_engine3 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE3; + adev->doorbell_index.sdma_engine4 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE4; + adev->doorbell_index.sdma_engine5 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE5; + adev->doorbell_index.sdma_engine6 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE6; + adev->doorbell_index.sdma_engine7 = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE7; + adev->doorbell_index.ih = AMDGPU_VEGA20_DOORBELL_IH; + adev->doorbell_index.uvd_vce.uvd_ring0_1 = AMDGPU_VEGA20_DOORBELL64_UVD_RING0_1; + adev->doorbell_index.uvd_vce.uvd_ring2_3 = AMDGPU_VEGA20_DOORBELL64_UVD_RING2_3; + adev->doorbell_index.uvd_vce.uvd_ring4_5 = AMDGPU_VEGA20_DOORBELL64_UVD_RING4_5; + adev->doorbell_index.uvd_vce.uvd_ring6_7 = AMDGPU_VEGA20_DOORBELL64_UVD_RING6_7; + adev->doorbell_index.uvd_vce.vce_ring0_1 = AMDGPU_VEGA20_DOORBELL64_VCE_RING0_1; + adev->doorbell_index.uvd_vce.vce_ring2_3 = AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3; + adev->doorbell_index.uvd_vce.vce_ring4_5 = AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5; + adev->doorbell_index.uvd_vce.vce_ring6_7 = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7; + adev->doorbell_index.max_assignment = AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT << 1; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c index 88b57a5e9489..77e367459101 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.c +++ b/drivers/gpu/drm/amd/amdgpu/vi.c @@ -87,9 +87,9 @@ static u32 vi_pcie_rreg(struct amdgpu_device *adev, u32 reg) u32 r; spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(mmPCIE_INDEX, reg); - (void)RREG32(mmPCIE_INDEX); - r = RREG32(mmPCIE_DATA); + WREG32_NO_KIQ(mmPCIE_INDEX, reg); + (void)RREG32_NO_KIQ(mmPCIE_INDEX); + r = RREG32_NO_KIQ(mmPCIE_DATA); spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); return r; } @@ -99,10 +99,10 @@ static void vi_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags; spin_lock_irqsave(&adev->pcie_idx_lock, flags); - WREG32(mmPCIE_INDEX, reg); - (void)RREG32(mmPCIE_INDEX); - WREG32(mmPCIE_DATA, v); - (void)RREG32(mmPCIE_DATA); + WREG32_NO_KIQ(mmPCIE_INDEX, reg); + (void)RREG32_NO_KIQ(mmPCIE_INDEX); + WREG32_NO_KIQ(mmPCIE_DATA, v); + (void)RREG32_NO_KIQ(mmPCIE_DATA); spin_unlock_irqrestore(&adev->pcie_idx_lock, flags); } @@ -123,8 +123,8 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v) unsigned long flags; spin_lock_irqsave(&adev->smc_idx_lock, flags); - WREG32(mmSMC_IND_INDEX_11, (reg)); - WREG32(mmSMC_IND_DATA_11, (v)); + WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg)); + WREG32_NO_KIQ(mmSMC_IND_DATA_11, (v)); spin_unlock_irqrestore(&adev->smc_idx_lock, flags); } @@ -955,6 +955,7 @@ static const struct amdgpu_asic_funcs vi_asic_funcs = .flush_hdp = &vi_flush_hdp, .invalidate_hdp = &vi_invalidate_hdp, .need_full_reset = &vi_need_full_reset, + .init_doorbell_index = &legacy_doorbell_index_init, }; #define CZ_REV_BRISTOL(rev) \ @@ -1596,16 +1597,18 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v7_4_ip_block); amdgpu_device_ip_block_add(adev, &iceland_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v2_4_ip_block); break; case CHIP_FIJI: amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_5_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -1615,8 +1618,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v10_1_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); @@ -1626,6 +1627,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display || amdgpu_sriov_vf(adev)) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -1635,8 +1638,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v10_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); if (!amdgpu_sriov_vf(adev)) { amdgpu_device_ip_block_add(adev, &uvd_v5_0_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_0_ip_block); @@ -1649,6 +1650,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_1_ip_block); amdgpu_device_ip_block_add(adev, &tonga_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -1658,8 +1661,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v11_2_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v3_1_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_3_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); break; @@ -1667,6 +1668,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -1676,8 +1679,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_0_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_0_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_1_ip_block); #if defined(CONFIG_DRM_AMD_ACP) @@ -1688,6 +1689,8 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) amdgpu_device_ip_block_add(adev, &vi_common_ip_block); amdgpu_device_ip_block_add(adev, &gmc_v8_0_ip_block); amdgpu_device_ip_block_add(adev, &cz_ih_ip_block); + amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block); + amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &pp_smu_ip_block); if (adev->enable_virtual_display) amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block); @@ -1697,8 +1700,6 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) #endif else amdgpu_device_ip_block_add(adev, &dce_v11_0_ip_block); - amdgpu_device_ip_block_add(adev, &gfx_v8_1_ip_block); - amdgpu_device_ip_block_add(adev, &sdma_v3_0_ip_block); amdgpu_device_ip_block_add(adev, &uvd_v6_2_ip_block); amdgpu_device_ip_block_add(adev, &vce_v3_4_ip_block); #if defined(CONFIG_DRM_AMD_ACP) @@ -1712,3 +1713,21 @@ int vi_set_ip_blocks(struct amdgpu_device *adev) return 0; } + +void legacy_doorbell_index_init(struct amdgpu_device *adev) +{ + adev->doorbell_index.kiq = AMDGPU_DOORBELL_KIQ; + adev->doorbell_index.mec_ring0 = AMDGPU_DOORBELL_MEC_RING0; + adev->doorbell_index.mec_ring1 = AMDGPU_DOORBELL_MEC_RING1; + adev->doorbell_index.mec_ring2 = AMDGPU_DOORBELL_MEC_RING2; + adev->doorbell_index.mec_ring3 = AMDGPU_DOORBELL_MEC_RING3; + adev->doorbell_index.mec_ring4 = AMDGPU_DOORBELL_MEC_RING4; + adev->doorbell_index.mec_ring5 = AMDGPU_DOORBELL_MEC_RING5; + adev->doorbell_index.mec_ring6 = AMDGPU_DOORBELL_MEC_RING6; + adev->doorbell_index.mec_ring7 = AMDGPU_DOORBELL_MEC_RING7; + adev->doorbell_index.gfx_ring0 = AMDGPU_DOORBELL_GFX_RING0; + adev->doorbell_index.sdma_engine0 = AMDGPU_DOORBELL_sDMA_ENGINE0; + adev->doorbell_index.sdma_engine1 = AMDGPU_DOORBELL_sDMA_ENGINE1; + adev->doorbell_index.ih = AMDGPU_DOORBELL_IH; + adev->doorbell_index.max_assignment = AMDGPU_DOORBELL_MAX_ASSIGNMENT; +} diff --git a/drivers/gpu/drm/amd/amdgpu/vi.h b/drivers/gpu/drm/amd/amdgpu/vi.h index 0429fe332269..8de0772f986c 100644 --- a/drivers/gpu/drm/amd/amdgpu/vi.h +++ b/drivers/gpu/drm/amd/amdgpu/vi.h @@ -30,4 +30,5 @@ void vi_srbm_select(struct amdgpu_device *adev, u32 me, u32 pipe, u32 queue, u32 vmid); int vi_set_ip_blocks(struct amdgpu_device *adev); +void legacy_doorbell_index_init(struct amdgpu_device *adev); #endif |