summaryrefslogtreecommitdiffstats
path: root/drivers/gpu/drm/amd/amdgpu
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu.h144
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c57
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c26
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c6
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c118
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_device.c168
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_display.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c398
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c69
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c39
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_job.c24
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_object.c16
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c35
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c4
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c27
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c58
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c88
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h19
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c103
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c11
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c9
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c229
-rw-r--r--drivers/gpu/drm/amd/amdgpu/atombios_dp.c20
-rw-r--r--drivers/gpu/drm/amd/amdgpu/ci_dpm.c60
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik.c159
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cik_sdma.c45
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cikd.h3
-rw-r--r--drivers/gpu/drm/amd/amdgpu/cz_dpm.c13
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v10_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v11_0.c38
-rw-r--r--drivers/gpu/drm/amd/amdgpu/dce_v8_0.c33
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c166
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c64
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/kv_dpm.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c46
-rw-r--r--drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c43
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c10
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c8
-rw-r--r--drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v2_0.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vce_v3_0.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vi.c7
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vid.h2
50 files changed, 1420 insertions, 1077 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index f5bac97a438b..c4a21c6428f5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -85,6 +85,8 @@ extern int amdgpu_vm_debug;
extern int amdgpu_sched_jobs;
extern int amdgpu_sched_hw_submission;
extern int amdgpu_powerplay;
+extern unsigned amdgpu_pcie_gen_cap;
+extern unsigned amdgpu_pcie_lane_cap;
#define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS 3000
#define AMDGPU_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -127,47 +129,6 @@ extern int amdgpu_powerplay;
#define AMDGPU_RESET_VCE (1 << 13)
#define AMDGPU_RESET_VCE1 (1 << 14)
-/* CG block flags */
-#define AMDGPU_CG_BLOCK_GFX (1 << 0)
-#define AMDGPU_CG_BLOCK_MC (1 << 1)
-#define AMDGPU_CG_BLOCK_SDMA (1 << 2)
-#define AMDGPU_CG_BLOCK_UVD (1 << 3)
-#define AMDGPU_CG_BLOCK_VCE (1 << 4)
-#define AMDGPU_CG_BLOCK_HDP (1 << 5)
-#define AMDGPU_CG_BLOCK_BIF (1 << 6)
-
-/* CG flags */
-#define AMDGPU_CG_SUPPORT_GFX_MGCG (1 << 0)
-#define AMDGPU_CG_SUPPORT_GFX_MGLS (1 << 1)
-#define AMDGPU_CG_SUPPORT_GFX_CGCG (1 << 2)
-#define AMDGPU_CG_SUPPORT_GFX_CGLS (1 << 3)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS (1 << 4)
-#define AMDGPU_CG_SUPPORT_GFX_CGTS_LS (1 << 5)
-#define AMDGPU_CG_SUPPORT_GFX_CP_LS (1 << 6)
-#define AMDGPU_CG_SUPPORT_GFX_RLC_LS (1 << 7)
-#define AMDGPU_CG_SUPPORT_MC_LS (1 << 8)
-#define AMDGPU_CG_SUPPORT_MC_MGCG (1 << 9)
-#define AMDGPU_CG_SUPPORT_SDMA_LS (1 << 10)
-#define AMDGPU_CG_SUPPORT_SDMA_MGCG (1 << 11)
-#define AMDGPU_CG_SUPPORT_BIF_LS (1 << 12)
-#define AMDGPU_CG_SUPPORT_UVD_MGCG (1 << 13)
-#define AMDGPU_CG_SUPPORT_VCE_MGCG (1 << 14)
-#define AMDGPU_CG_SUPPORT_HDP_LS (1 << 15)
-#define AMDGPU_CG_SUPPORT_HDP_MGCG (1 << 16)
-
-/* PG flags */
-#define AMDGPU_PG_SUPPORT_GFX_PG (1 << 0)
-#define AMDGPU_PG_SUPPORT_GFX_SMG (1 << 1)
-#define AMDGPU_PG_SUPPORT_GFX_DMG (1 << 2)
-#define AMDGPU_PG_SUPPORT_UVD (1 << 3)
-#define AMDGPU_PG_SUPPORT_VCE (1 << 4)
-#define AMDGPU_PG_SUPPORT_CP (1 << 5)
-#define AMDGPU_PG_SUPPORT_GDS (1 << 6)
-#define AMDGPU_PG_SUPPORT_RLC_SMU_HS (1 << 7)
-#define AMDGPU_PG_SUPPORT_SDMA (1 << 8)
-#define AMDGPU_PG_SUPPORT_ACP (1 << 9)
-#define AMDGPU_PG_SUPPORT_SAMU (1 << 10)
-
/* GFX current status */
#define AMDGPU_GFX_NORMAL_MODE 0x00000000L
#define AMDGPU_GFX_SAFE_MODE 0x00000001L
@@ -180,7 +141,6 @@ extern int amdgpu_powerplay;
#define CIK_CURSOR_HEIGHT 128
struct amdgpu_device;
-struct amdgpu_fence;
struct amdgpu_ib;
struct amdgpu_vm;
struct amdgpu_ring;
@@ -326,9 +286,11 @@ struct amdgpu_ring_funcs {
struct amdgpu_ib *ib);
void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
uint64_t seq, unsigned flags);
+ void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
void (*emit_vm_flush)(struct amdgpu_ring *ring, unsigned vm_id,
uint64_t pd_addr);
void (*emit_hdp_flush)(struct amdgpu_ring *ring);
+ void (*emit_hdp_invalidate)(struct amdgpu_ring *ring);
void (*emit_gds_switch)(struct amdgpu_ring *ring, uint32_t vmid,
uint32_t gds_base, uint32_t gds_size,
uint32_t gws_base, uint32_t gws_size,
@@ -385,13 +347,15 @@ struct amdgpu_fence_driver {
uint64_t gpu_addr;
volatile uint32_t *cpu_addr;
/* sync_seq is protected by ring emission lock */
- uint64_t sync_seq;
- atomic64_t last_seq;
+ uint32_t sync_seq;
+ atomic_t last_seq;
bool initialized;
struct amdgpu_irq_src *irq_src;
unsigned irq_type;
struct timer_list fallback_timer;
- wait_queue_head_t fence_queue;
+ unsigned num_fences_mask;
+ spinlock_t lock;
+ struct fence **fences;
};
/* some special values for the owner field */
@@ -401,19 +365,6 @@ struct amdgpu_fence_driver {
#define AMDGPU_FENCE_FLAG_64BIT (1 << 0)
#define AMDGPU_FENCE_FLAG_INT (1 << 1)
-struct amdgpu_fence {
- struct fence base;
-
- /* RB, DMA, etc. */
- struct amdgpu_ring *ring;
- uint64_t seq;
-
- /* filp or special value for fence creator */
- void *owner;
-
- wait_queue_t fence_wake;
-};
-
struct amdgpu_user_fence {
/* write-back bo */
struct amdgpu_bo *bo;
@@ -425,16 +376,15 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev);
void amdgpu_fence_driver_fini(struct amdgpu_device *adev);
void amdgpu_fence_driver_force_completion(struct amdgpu_device *adev);
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring);
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission);
int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src,
unsigned irq_type);
void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
- struct amdgpu_fence **fence);
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **fence);
void amdgpu_fence_process(struct amdgpu_ring *ring);
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring);
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
@@ -473,6 +423,8 @@ struct amdgpu_bo_list_entry {
struct ttm_validate_buffer tv;
struct amdgpu_bo_va *bo_va;
uint32_t priority;
+ struct page **user_pages;
+ int user_invalidated;
};
struct amdgpu_bo_va_mapping {
@@ -484,7 +436,6 @@ struct amdgpu_bo_va_mapping {
/* bo virtual addresses in a specific vm */
struct amdgpu_bo_va {
- struct mutex mutex;
/* protected by bo being reserved */
struct list_head bo_list;
struct fence *last_pt_update;
@@ -579,11 +530,14 @@ int amdgpu_gem_debugfs_init(struct amdgpu_device *adev);
* Assumption is that there won't be hole (all object on same
* alignment).
*/
+
+#define AMDGPU_SA_NUM_FENCE_LISTS 32
+
struct amdgpu_sa_manager {
wait_queue_head_t wq;
struct amdgpu_bo *bo;
struct list_head *hole;
- struct list_head flist[AMDGPU_MAX_RINGS];
+ struct list_head flist[AMDGPU_SA_NUM_FENCE_LISTS];
struct list_head olist;
unsigned size;
uint64_t gpu_addr;
@@ -592,8 +546,6 @@ struct amdgpu_sa_manager {
uint32_t align;
};
-struct amdgpu_sa_bo;
-
/* sub-allocation buffer */
struct amdgpu_sa_bo {
struct list_head olist;
@@ -637,6 +589,8 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
int amdgpu_sync_wait(struct amdgpu_sync *sync);
void amdgpu_sync_free(struct amdgpu_sync *sync);
+int amdgpu_sync_init(void);
+void amdgpu_sync_fini(void);
/*
* GART structures, functions & helpers
@@ -767,10 +721,10 @@ struct amdgpu_ib {
uint32_t length_dw;
uint64_t gpu_addr;
uint32_t *ptr;
- struct amdgpu_fence *fence;
struct amdgpu_user_fence *user;
- bool grabbed_vmid;
struct amdgpu_vm *vm;
+ unsigned vm_id;
+ uint64_t vm_pd_addr;
struct amdgpu_ctx *ctx;
uint32_t gds_base, gds_size;
uint32_t gws_base, gws_size;
@@ -877,15 +831,14 @@ struct amdgpu_vm_pt {
};
struct amdgpu_vm_id {
- unsigned id;
- uint64_t pd_gpu_addr;
+ struct amdgpu_vm_manager_id *mgr_id;
+ uint64_t pd_gpu_addr;
/* last flushed PD/PT update */
- struct fence *flushed_updates;
+ struct fence *flushed_updates;
};
struct amdgpu_vm {
/* tree of virtual addresses mapped */
- spinlock_t it_lock;
struct rb_root va;
/* protecting invalidated */
@@ -922,6 +875,13 @@ struct amdgpu_vm_manager_id {
struct list_head list;
struct fence *active;
atomic_long_t owner;
+
+ uint32_t gds_base;
+ uint32_t gds_size;
+ uint32_t gws_base;
+ uint32_t gws_size;
+ uint32_t oa_base;
+ uint32_t oa_size;
};
struct amdgpu_vm_manager {
@@ -954,10 +914,14 @@ void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct fence *fence);
+ struct amdgpu_sync *sync, struct fence *fence,
+ unsigned *vm_id, uint64_t *vm_pd_addr);
void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates);
+ unsigned vm_id, uint64_t pd_addr,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size);
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
struct amdgpu_vm *vm);
@@ -1045,7 +1009,7 @@ struct amdgpu_bo_list {
struct amdgpu_bo *gds_obj;
struct amdgpu_bo *gws_obj;
struct amdgpu_bo *oa_obj;
- bool has_userptr;
+ unsigned first_userptr;
unsigned num_entries;
struct amdgpu_bo_list_entry *array;
};
@@ -1172,10 +1136,9 @@ struct amdgpu_gfx {
int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
unsigned size, struct amdgpu_ib *ib);
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib);
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f);
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
- struct amdgpu_ib *ib, void *owner,
- struct fence *last_vm_update,
+ struct amdgpu_ib *ib, struct fence *last_vm_update,
struct fence **f);
int amdgpu_ib_pool_init(struct amdgpu_device *adev);
void amdgpu_ib_pool_fini(struct amdgpu_device *adev);
@@ -1194,7 +1157,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
struct amdgpu_irq_src *irq_src, unsigned irq_type,
enum amdgpu_ring_type ring_type);
void amdgpu_ring_fini(struct amdgpu_ring *ring);
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f);
/*
* CS.
@@ -1236,6 +1198,7 @@ struct amdgpu_job {
struct amdgpu_ring *ring;
struct amdgpu_sync sync;
struct amdgpu_ib *ibs;
+ struct fence *fence; /* the hw fence */
uint32_t num_ibs;
void *owner;
struct amdgpu_user_fence uf;
@@ -2051,7 +2014,6 @@ struct amdgpu_device {
struct amdgpu_sdma sdma;
/* uvd */
- bool has_uvd;
struct amdgpu_uvd uvd;
/* vce */
@@ -2098,20 +2060,6 @@ u32 amdgpu_mm_rdoorbell(struct amdgpu_device *adev, u32 index);
void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
/*
- * Cast helper
- */
-extern const struct fence_ops amdgpu_fence_ops;
-static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
-{
- struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
-
- if (__f->base.ops == &amdgpu_fence_ops)
- return __f;
-
- return NULL;
-}
-
-/*
* Registers read & write functions.
*/
#define RREG32(reg) amdgpu_mm_rreg(adev, (reg), false)
@@ -2225,10 +2173,12 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
#define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
#define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
#define amdgpu_ring_emit_ib(r, ib) (r)->funcs->emit_ib((r), (ib))
+#define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
#define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
#define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
#define amdgpu_ring_emit_gds_switch(r, v, db, ds, wb, ws, ab, as) (r)->funcs->emit_gds_switch((r), (v), (db), (ds), (wb), (ws), (ab), (as))
#define amdgpu_ring_emit_hdp_flush(r) (r)->funcs->emit_hdp_flush((r))
+#define amdgpu_ring_emit_hdp_invalidate(r) (r)->funcs->emit_hdp_invalidate((r))
#define amdgpu_ring_pad_ib(r, ib) ((r)->funcs->pad_ib((r), (ib)))
#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
@@ -2353,11 +2303,15 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
struct amdgpu_ring **out_ring);
void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *rbo, u32 domain);
bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
uint32_t flags);
+bool amdgpu_ttm_tt_has_userptr(struct ttm_tt *ttm);
struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm);
bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end);
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+ int *last_invalidated);
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm);
uint32_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
struct ttm_mem_reg *mem);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
index 9f8cfaab3004..d6b0bff510aa 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@@ -240,12 +240,10 @@ static int acp_poweron(struct generic_pm_domain *genpd)
static struct device *get_mfd_cell_dev(const char *device_name, int r)
{
char auto_dev_name[25];
- char buf[8];
struct device *dev;
- sprintf(buf, ".%d.auto", r);
- strcpy(auto_dev_name, device_name);
- strcat(auto_dev_name, buf);
+ snprintf(auto_dev_name, sizeof(auto_dev_name),
+ "%s.%d.auto", device_name, r);
dev = bus_find_device_by_name(&platform_bus_type, NULL, auto_dev_name);
dev_info(dev, "device %s added to pm domain\n", auto_dev_name);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index 84d68d658f8a..32809f749903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -30,25 +30,38 @@ const struct kfd2kgd_calls *kfd2kgd;
const struct kgd2kfd_calls *kgd2kfd;
bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-bool amdgpu_amdkfd_init(void)
+int amdgpu_amdkfd_init(void)
{
+ int ret;
+
#if defined(CONFIG_HSA_AMD_MODULE)
- bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
+ int (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
kgd2kfd_init_p = symbol_request(kgd2kfd_init);
if (kgd2kfd_init_p == NULL)
- return false;
+ return -ENOENT;
+
+ ret = kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd);
+ if (ret) {
+ symbol_put(kgd2kfd_init);
+ kgd2kfd = NULL;
+ }
+
+#elif defined(CONFIG_HSA_AMD)
+ ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
+ if (ret)
+ kgd2kfd = NULL;
+
+#else
+ ret = -ENOENT;
#endif
- return true;
+
+ return ret;
}
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
{
-#if defined(CONFIG_HSA_AMD_MODULE)
- bool (*kgd2kfd_init_p)(unsigned, const struct kgd2kfd_calls**);
-#endif
-
switch (rdev->asic_type) {
#ifdef CONFIG_DRM_AMDGPU_CIK
case CHIP_KAVERI:
@@ -62,35 +75,7 @@ bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev)
return false;
}
-#if defined(CONFIG_HSA_AMD_MODULE)
- kgd2kfd_init_p = symbol_request(kgd2kfd_init);
-
- if (kgd2kfd_init_p == NULL) {
- kfd2kgd = NULL;
- return false;
- }
-
- if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kgd2kfd)) {
- symbol_put(kgd2kfd_init);
- kfd2kgd = NULL;
- kgd2kfd = NULL;
-
- return false;
- }
-
return true;
-#elif defined(CONFIG_HSA_AMD)
- if (!kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd)) {
- kfd2kgd = NULL;
- kgd2kfd = NULL;
- return false;
- }
-
- return true;
-#else
- kfd2kgd = NULL;
- return false;
-#endif
}
void amdgpu_amdkfd_fini(void)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index a8be765542e6..de530f68d4e3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -36,7 +36,7 @@ struct kgd_mem {
void *cpu_ptr;
};
-bool amdgpu_amdkfd_init(void);
+int amdgpu_amdkfd_init(void);
void amdgpu_amdkfd_fini(void);
bool amdgpu_amdkfd_load_interface(struct amdgpu_device *rdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index fa948dcbdd5d..0020a0ea43ff 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -63,6 +63,10 @@ bool amdgpu_has_atpx(void) {
return amdgpu_atpx_priv.atpx_detected;
}
+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+ return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
/**
* amdgpu_atpx_call - call an ATPX method
*
@@ -142,10 +146,6 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
*/
static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
{
- /* make sure required functions are enabled */
- /* dGPU power control is required */
- atpx->functions.power_cntl = true;
-
if (atpx->functions.px_params) {
union acpi_object *info;
struct atpx_px_params output;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 90d6fc1618aa..eacd810fc09b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -91,7 +91,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
- bool has_userptr = false;
+ unsigned last_entry = 0, first_userptr = num_entries;
unsigned i;
int r;
@@ -101,8 +101,9 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
for (i = 0; i < num_entries; ++i) {
- struct amdgpu_bo_list_entry *entry = &array[i];
+ struct amdgpu_bo_list_entry *entry;
struct drm_gem_object *gobj;
+ struct amdgpu_bo *bo;
struct mm_struct *usermm;
gobj = drm_gem_object_lookup(adev->ddev, filp, info[i].bo_handle);
@@ -111,18 +112,24 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
goto error_free;
}
- entry->robj = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
+ bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
drm_gem_object_unreference_unlocked(gobj);
- entry->priority = min(info[i].bo_priority,
- AMDGPU_BO_LIST_MAX_PRIORITY);
- usermm = amdgpu_ttm_tt_get_usermm(entry->robj->tbo.ttm);
+
+ usermm = amdgpu_ttm_tt_get_usermm(bo->tbo.ttm);
if (usermm) {
if (usermm != current->mm) {
+ amdgpu_bo_unref(&bo);
r = -EPERM;
goto error_free;
}
- has_userptr = true;
+ entry = &array[--first_userptr];
+ } else {
+ entry = &array[last_entry++];
}
+
+ entry->robj = bo;
+ entry->priority = min(info[i].bo_priority,
+ AMDGPU_BO_LIST_MAX_PRIORITY);
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
@@ -144,13 +151,15 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
list->gds_obj = gds_obj;
list->gws_obj = gws_obj;
list->oa_obj = oa_obj;
- list->has_userptr = has_userptr;
+ list->first_userptr = first_userptr;
list->array = array;
list->num_entries = num_entries;
return 0;
error_free:
+ while (i--)
+ amdgpu_bo_unref(&array[i].robj);
drm_free_large(array);
return r;
}
@@ -191,6 +200,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
list_add_tail(&list->array[i].tv.head,
&bucket[priority]);
+ list->array[i].user_pages = NULL;
}
/* Connect the sorted buckets in the output list. */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index a081dda9fa2f..7a4b101e10c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -795,6 +795,12 @@ static int amdgpu_cgs_query_system_info(void *cgs_device,
case CGS_SYSTEM_INFO_PCIE_MLW:
sys_info->value = adev->pm.pcie_mlw_mask;
break;
+ case CGS_SYSTEM_INFO_CG_FLAGS:
+ sys_info->value = adev->cg_flags;
+ break;
+ case CGS_SYSTEM_INFO_PG_FLAGS:
+ sys_info->value = adev->pg_flags;
+ break;
default:
return -ENODEV;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
index 89c3dd62ba21..119cdc2c43e7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@@ -77,7 +77,7 @@ void amdgpu_connector_hotplug(struct drm_connector *connector)
} else if (amdgpu_atombios_dp_needs_link_train(amdgpu_connector)) {
/* Don't try to start link training before we
* have the dpcd */
- if (!amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
+ if (amdgpu_atombios_dp_get_dpcd(amdgpu_connector))
return;
/* set it to OFF so that drm_helper_connector_dpms()
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 52c3eb96b199..9392e50a7ba4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -25,6 +25,7 @@
* Jerome Glisse <glisse@freedesktop.org>
*/
#include <linux/list_sort.h>
+#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
@@ -111,6 +112,7 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
p->uf_entry.priority = 0;
p->uf_entry.tv.bo = &p->uf_entry.robj->tbo;
p->uf_entry.tv.shared = true;
+ p->uf_entry.user_pages = NULL;
drm_gem_object_unreference_unlocked(gobj);
return 0;
@@ -297,6 +299,7 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
list_for_each_entry(lobj, validated, tv.head) {
struct amdgpu_bo *bo = lobj->robj;
+ bool binding_userptr = false;
struct mm_struct *usermm;
uint32_t domain;
@@ -304,6 +307,15 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
if (usermm && usermm != current->mm)
return -EPERM;
+ /* Check if we have user pages and nobody bound the BO already */
+ if (lobj->user_pages && bo->tbo.ttm->state != tt_bound) {
+ size_t size = sizeof(struct page *);
+
+ size *= bo->tbo.ttm->num_pages;
+ memcpy(bo->tbo.ttm->pages, lobj->user_pages, size);
+ binding_userptr = true;
+ }
+
if (bo->pin_count)
continue;
@@ -334,6 +346,11 @@ int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
}
return r;
}
+
+ if (binding_userptr) {
+ drm_free_large(lobj->user_pages);
+ lobj->user_pages = NULL;
+ }
}
return 0;
}
@@ -342,15 +359,18 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
union drm_amdgpu_cs *cs)
{
struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+ struct amdgpu_bo_list_entry *e;
struct list_head duplicates;
bool need_mmap_lock = false;
+ unsigned i, tries = 10;
int r;
INIT_LIST_HEAD(&p->validated);
p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
if (p->bo_list) {
- need_mmap_lock = p->bo_list->has_userptr;
+ need_mmap_lock = p->bo_list->first_userptr !=
+ p->bo_list->num_entries;
amdgpu_bo_list_get_list(p->bo_list, &p->validated);
}
@@ -363,9 +383,81 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
if (need_mmap_lock)
down_read(&current->mm->mmap_sem);
- r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true, &duplicates);
- if (unlikely(r != 0))
- goto error_reserve;
+ while (1) {
+ struct list_head need_pages;
+ unsigned i;
+
+ r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+ &duplicates);
+ if (unlikely(r != 0))
+ goto error_free_pages;
+
+ /* Without a BO list we don't have userptr BOs */
+ if (!p->bo_list)
+ break;
+
+ INIT_LIST_HEAD(&need_pages);
+ for (i = p->bo_list->first_userptr;
+ i < p->bo_list->num_entries; ++i) {
+
+ e = &p->bo_list->array[i];
+
+ if (amdgpu_ttm_tt_userptr_invalidated(e->robj->tbo.ttm,
+ &e->user_invalidated) && e->user_pages) {
+
+ /* We acquired a page array, but somebody
+ * invalidated it. Free it an try again
+ */
+ release_pages(e->user_pages,
+ e->robj->tbo.ttm->num_pages,
+ false);
+ drm_free_large(e->user_pages);
+ e->user_pages = NULL;
+ }
+
+ if (e->robj->tbo.ttm->state != tt_bound &&
+ !e->user_pages) {
+ list_del(&e->tv.head);
+ list_add(&e->tv.head, &need_pages);
+
+ amdgpu_bo_unreserve(e->robj);
+ }
+ }
+
+ if (list_empty(&need_pages))
+ break;
+
+ /* Unreserve everything again. */
+ ttm_eu_backoff_reservation(&p->ticket, &p->validated);
+
+ /* We tried to often, just abort */
+ if (!--tries) {
+ r = -EDEADLK;
+ goto error_free_pages;
+ }
+
+ /* Fill the page arrays for all useptrs. */
+ list_for_each_entry(e, &need_pages, tv.head) {
+ struct ttm_tt *ttm = e->robj->tbo.ttm;
+
+ e->user_pages = drm_calloc_large(ttm->num_pages,
+ sizeof(struct page*));
+ if (!e->user_pages) {
+ r = -ENOMEM;
+ goto error_free_pages;
+ }
+
+ r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
+ if (r) {
+ drm_free_large(e->user_pages);
+ e->user_pages = NULL;
+ goto error_free_pages;
+ }
+ }
+
+ /* And try again. */
+ list_splice(&need_pages, &p->validated);
+ }
amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
@@ -397,10 +489,26 @@ error_validate:
ttm_eu_backoff_reservation(&p->ticket, &p->validated);
}
-error_reserve:
+error_free_pages:
+
if (need_mmap_lock)
up_read(&current->mm->mmap_sem);
+ if (p->bo_list) {
+ for (i = p->bo_list->first_userptr;
+ i < p->bo_list->num_entries; ++i) {
+ e = &p->bo_list->array[i];
+
+ if (!e->user_pages)
+ continue;
+
+ release_pages(e->user_pages,
+ e->robj->tbo.ttm->num_pages,
+ false);
+ drm_free_large(e->user_pages);
+ }
+ }
+
return r;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index db20d2783def..612117478b57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -62,6 +62,12 @@ static const char *amdgpu_asic_name[] = {
"LAST",
};
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+#else
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+#endif
+
bool amdgpu_device_is_px(struct drm_device *dev)
{
struct amdgpu_device *adev = dev->dev_private;
@@ -1479,7 +1485,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
if (amdgpu_runtime_pm == 1)
runtime = true;
- if (amdgpu_device_is_px(ddev))
+ if (amdgpu_device_is_px(ddev) && amdgpu_has_atpx_dgpu_power_cntl())
runtime = true;
vga_switcheroo_register_client(adev->pdev, &amdgpu_switcheroo_ops, runtime);
if (runtime)
@@ -1762,15 +1768,20 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
}
/* post card */
- amdgpu_atom_asic_init(adev->mode_info.atom_context);
+ if (!amdgpu_card_posted(adev))
+ amdgpu_atom_asic_init(adev->mode_info.atom_context);
r = amdgpu_resume(adev);
+ if (r)
+ DRM_ERROR("amdgpu_resume failed (%d).\n", r);
amdgpu_fence_driver_resume(adev);
- r = amdgpu_ib_ring_tests(adev);
- if (r)
- DRM_ERROR("ib ring test failed (%d).\n", r);
+ if (resume) {
+ r = amdgpu_ib_ring_tests(adev);
+ if (r)
+ DRM_ERROR("ib ring test failed (%d).\n", r);
+ }
r = amdgpu_late_init(adev);
if (r)
@@ -1903,80 +1914,97 @@ retry:
return r;
}
+#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007 /* gen: chipset 1/2, asic 1/2/3 */
+#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
+
void amdgpu_get_pcie_info(struct amdgpu_device *adev)
{
u32 mask;
int ret;
- if (pci_is_root_bus(adev->pdev->bus))
- return;
+ if (amdgpu_pcie_gen_cap)
+ adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
- if (amdgpu_pcie_gen2 == 0)
- return;
+ if (amdgpu_pcie_lane_cap)
+ adev->pm.pcie_mlw_mask = amdgpu_pcie_lane_cap;
- if (adev->flags & AMD_IS_APU)
+ /* covers APUs as well */
+ if (pci_is_root_bus(adev->pdev->bus)) {
+ if (adev->pm.pcie_gen_mask == 0)
+ adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+ if (adev->pm.pcie_mlw_mask == 0)
+ adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
return;
+ }
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (!ret) {
- adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
- CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
- CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
- if (mask & DRM_PCIE_SPEED_25)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
- if (mask & DRM_PCIE_SPEED_50)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
- if (mask & DRM_PCIE_SPEED_80)
- adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
- }
- ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
- if (!ret) {
- switch (mask) {
- case 32:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 16:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 12:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 8:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 4:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 2:
- adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
- CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
- break;
- case 1:
- adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
- break;
- default:
- break;
+ if (adev->pm.pcie_gen_mask == 0) {
+ ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
+ if (!ret) {
+ adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+ CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+
+ if (mask & DRM_PCIE_SPEED_25)
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+ if (mask & DRM_PCIE_SPEED_50)
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
+ if (mask & DRM_PCIE_SPEED_80)
+ adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
+ } else {
+ adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+ }
+ }
+ if (adev->pm.pcie_mlw_mask == 0) {
+ ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
+ if (!ret) {
+ switch (mask) {
+ case 32:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 16:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 12:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 8:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 4:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 2:
+ adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
+ CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
+ break;
+ case 1:
+ adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
+ break;
+ default:
+ break;
+ }
+ } else {
+ adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
}
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 2cb53c24dec0..f0ed974bd4e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -70,8 +70,8 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
struct drm_crtc *crtc = &amdgpuCrtc->base;
unsigned long flags;
- unsigned i;
- int vpos, hpos, stat, min_udelay;
+ unsigned i, repcnt = 4;
+ int vpos, hpos, stat, min_udelay = 0;
struct drm_vblank_crtc *vblank = &crtc->dev->vblank[work->crtc_id];
if (amdgpu_flip_handle_fence(work, &work->excl))
@@ -97,7 +97,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
* In practice this won't execute very often unless on very fast
* machines because the time window for this to happen is very small.
*/
- for (;;) {
+ while (amdgpuCrtc->enabled && --repcnt) {
/* GET_DISTANCE_TO_VBLANKSTART returns distance to real vblank
* start in hpos, and to the "fudged earlier" vblank start in
* vpos.
@@ -113,12 +113,24 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
break;
/* Sleep at least until estimated real start of hw vblank */
- spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
min_udelay = (-hpos + 1) * max(vblank->linedur_ns / 1000, 5);
+ if (min_udelay > vblank->framedur_ns / 2000) {
+ /* Don't wait ridiculously long - something is wrong */
+ repcnt = 0;
+ break;
+ }
+ spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
usleep_range(min_udelay, 2 * min_udelay);
spin_lock_irqsave(&crtc->dev->event_lock, flags);
};
+ if (!repcnt)
+ DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
+ "framedur %d, linedur %d, stat %d, vpos %d, "
+ "hpos %d\n", work->crtc_id, min_udelay,
+ vblank->framedur_ns / 1000,
+ vblank->linedur_ns / 1000, stat, vpos, hpos);
+
/* set the flip status */
amdgpuCrtc->pflip_status = AMDGPU_FLIP_SUBMITTED;
spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index ce79a8b605a0..f1e17d60055a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -80,6 +80,8 @@ int amdgpu_exp_hw_support = 0;
int amdgpu_sched_jobs = 32;
int amdgpu_sched_hw_submission = 2;
int amdgpu_powerplay = -1;
+unsigned amdgpu_pcie_gen_cap = 0;
+unsigned amdgpu_pcie_lane_cap = 0;
MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@@ -158,6 +160,12 @@ MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 =
module_param_named(powerplay, amdgpu_powerplay, int, 0444);
#endif
+MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
+module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
+
+MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
+module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);
+
static struct pci_device_id pciidlist[] = {
#ifdef CONFIG_DRM_AMDGPU_CIK
/* Kaveri */
@@ -310,6 +318,14 @@ static int amdgpu_pci_probe(struct pci_dev *pdev,
return -ENODEV;
}
+ /*
+ * Initialize amdkfd before starting radeon. If it was not loaded yet,
+ * defer radeon probing
+ */
+ ret = amdgpu_amdkfd_init();
+ if (ret == -EPROBE_DEFER)
+ return ret;
+
/* Get rid of things like offb */
ret = amdgpu_kick_out_firmware_fb(pdev);
if (ret)
@@ -539,6 +555,7 @@ static struct pci_driver amdgpu_kms_pci_driver = {
static int __init amdgpu_init(void)
{
+ amdgpu_sync_init();
#ifdef CONFIG_VGA_CONSOLE
if (vgacon_text_force()) {
DRM_ERROR("VGACON disables amdgpu kernel modesetting.\n");
@@ -552,8 +569,6 @@ static int __init amdgpu_init(void)
driver->num_ioctls = amdgpu_max_kms_ioctl;
amdgpu_register_atpx_handler();
- amdgpu_amdkfd_init();
-
/* let modprobe override vga console setting */
return drm_pci_init(driver, pdriver);
}
@@ -563,6 +578,7 @@ static void __exit amdgpu_exit(void)
amdgpu_amdkfd_fini();
drm_pci_exit(driver, pdriver);
amdgpu_unregister_atpx_handler();
+ amdgpu_sync_fini();
}
module_init(amdgpu_init);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
index 97db196dc6f8..4303b447efe8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@@ -47,9 +47,30 @@
* that the the relevant GPU caches have been flushed.
*/
+struct amdgpu_fence {
+ struct fence base;
+
+ /* RB, DMA, etc. */
+ struct amdgpu_ring *ring;
+};
+
static struct kmem_cache *amdgpu_fence_slab;
static atomic_t amdgpu_fence_slab_ref = ATOMIC_INIT(0);
+/*
+ * Cast helper
+ */
+static const struct fence_ops amdgpu_fence_ops;
+static inline struct amdgpu_fence *to_amdgpu_fence(struct fence *f)
+{
+ struct amdgpu_fence *__f = container_of(f, struct amdgpu_fence, base);
+
+ if (__f->base.ops == &amdgpu_fence_ops)
+ return __f;
+
+ return NULL;
+}
+
/**
* amdgpu_fence_write - write a fence value
*
@@ -82,7 +103,7 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
if (drv->cpu_addr)
seq = le32_to_cpu(*drv->cpu_addr);
else
- seq = lower_32_bits(atomic64_read(&drv->last_seq));
+ seq = atomic_read(&drv->last_seq);
return seq;
}
@@ -91,32 +112,41 @@ static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
* amdgpu_fence_emit - emit a fence on the requested ring
*
* @ring: ring the fence is associated with
- * @owner: creator of the fence
- * @fence: amdgpu fence object
+ * @f: resulting fence object
*
* Emits a fence command on the requested ring (all asics).
* Returns 0 on success, -ENOMEM on failure.
*/
-int amdgpu_fence_emit(struct amdgpu_ring *ring, void *owner,
- struct amdgpu_fence **fence)
+int amdgpu_fence_emit(struct amdgpu_ring *ring, struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_fence *fence;
+ struct fence **ptr;
+ uint32_t seq;
- /* we are protected by the ring emission mutex */
- *fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
- if ((*fence) == NULL) {
+ fence = kmem_cache_alloc(amdgpu_fence_slab, GFP_KERNEL);
+ if (fence == NULL)
return -ENOMEM;
- }
- (*fence)->seq = ++ring->fence_drv.sync_seq;
- (*fence)->ring = ring;
- (*fence)->owner = owner;
- fence_init(&(*fence)->base, &amdgpu_fence_ops,
- &ring->fence_drv.fence_queue.lock,
- adev->fence_context + ring->idx,
- (*fence)->seq);
+
+ seq = ++ring->fence_drv.sync_seq;
+ fence->ring = ring;
+ fence_init(&fence->base, &amdgpu_fence_ops,
+ &ring->fence_drv.lock,
+ adev->fence_context + ring->idx,
+ seq);
amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
- (*fence)->seq,
- AMDGPU_FENCE_FLAG_INT);
+ seq, AMDGPU_FENCE_FLAG_INT);
+
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ /* This function can't be called concurrently anyway, otherwise
+ * emitting the fence would mess up the hardware ring buffer.
+ */
+ BUG_ON(rcu_dereference_protected(*ptr, 1));
+
+ rcu_assign_pointer(*ptr, fence_get(&fence->base));
+
+ *f = &fence->base;
+
return 0;
}
@@ -134,89 +164,48 @@ static void amdgpu_fence_schedule_fallback(struct amdgpu_ring *ring)
}
/**
- * amdgpu_fence_activity - check for fence activity
+ * amdgpu_fence_process - check for fence activity
*
* @ring: pointer to struct amdgpu_ring
*
* Checks the current fence value and calculates the last
- * signalled fence value. Returns true if activity occured
- * on the ring, and the fence_queue should be waken up.
+ * signalled fence value. Wakes the fence queue if the
+ * sequence number has increased.
*/
-static bool amdgpu_fence_activity(struct amdgpu_ring *ring)
+void amdgpu_fence_process(struct amdgpu_ring *ring)
{
- uint64_t seq, last_seq, last_emitted;
- unsigned count_loop = 0;
- bool wake = false;
-
- /* Note there is a scenario here for an infinite loop but it's
- * very unlikely to happen. For it to happen, the current polling
- * process need to be interrupted by another process and another
- * process needs to update the last_seq btw the atomic read and
- * xchg of the current process.
- *
- * More over for this to go in infinite loop there need to be
- * continuously new fence signaled ie amdgpu_fence_read needs
- * to return a different value each time for both the currently
- * polling process and the other process that xchg the last_seq
- * btw atomic read and xchg of the current process. And the
- * value the other process set as last seq must be higher than
- * the seq value we just read. Which means that current process
- * need to be interrupted after amdgpu_fence_read and before
- * atomic xchg.
- *
- * To be even more safe we count the number of time we loop and
- * we bail after 10 loop just accepting the fact that we might
- * have temporarly set the last_seq not to the true real last
- * seq but to an older one.
- */
- last_seq = atomic64_read(&ring->fence_drv.last_seq);
+ struct amdgpu_fence_driver *drv = &ring->fence_drv;
+ uint32_t seq, last_seq;
+ int r;
+
do {
- last_emitted = ring->fence_drv.sync_seq;
+ last_seq = atomic_read(&ring->fence_drv.last_seq);
seq = amdgpu_fence_read(ring);
- seq |= last_seq & 0xffffffff00000000LL;
- if (seq < last_seq) {
- seq &= 0xffffffff;
- seq |= last_emitted & 0xffffffff00000000LL;
- }
- if (seq <= last_seq || seq > last_emitted) {
- break;
- }
- /* If we loop over we don't want to return without
- * checking if a fence is signaled as it means that the
- * seq we just read is different from the previous on.
- */
- wake = true;
- last_seq = seq;
- if ((count_loop++) > 10) {
- /* We looped over too many time leave with the
- * fact that we might have set an older fence
- * seq then the current real last seq as signaled
- * by the hw.
- */
- break;
- }
- } while (atomic64_xchg(&ring->fence_drv.last_seq, seq) > seq);
+ } while (atomic_cmpxchg(&drv->last_seq, last_seq, seq) != last_seq);
- if (seq < last_emitted)
+ if (seq != ring->fence_drv.sync_seq)
amdgpu_fence_schedule_fallback(ring);
- return wake;
-}
+ while (last_seq != seq) {
+ struct fence *fence, **ptr;
-/**
- * amdgpu_fence_process - process a fence
- *
- * @adev: amdgpu_device pointer
- * @ring: ring index the fence is associated with
- *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
- */
-void amdgpu_fence_process(struct amdgpu_ring *ring)
-{
- if (amdgpu_fence_activity(ring))
- wake_up_all(&ring->fence_drv.fence_queue);
+ ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+
+ /* There is always exactly one thread signaling this fence slot */
+ fence = rcu_dereference_protected(*ptr, 1);
+ rcu_assign_pointer(*ptr, NULL);
+
+ BUG_ON(!fence);
+
+ r = fence_signal(fence);
+ if (!r)
+ FENCE_TRACE(fence, "signaled from irq context\n");
+ else
+ BUG();
+
+ fence_put(fence);
+ }
}
/**
@@ -234,77 +223,6 @@ static void amdgpu_fence_fallback(unsigned long arg)
}
/**
- * amdgpu_fence_seq_signaled - check if a fence sequence number has signaled
- *
- * @ring: ring the fence is associated with
- * @seq: sequence number
- *
- * Check if the last signaled fence sequnce number is >= the requested
- * sequence number (all asics).
- * Returns true if the fence has signaled (current fence value
- * is >= requested value) or false if it has not (current fence
- * value is < the requested value. Helper function for
- * amdgpu_fence_signaled().
- */
-static bool amdgpu_fence_seq_signaled(struct amdgpu_ring *ring, u64 seq)
-{
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- /* poll new last sequence at least once */
- amdgpu_fence_process(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return true;
-
- return false;
-}
-
-/*
- * amdgpu_ring_wait_seq - wait for seq of the specific ring to signal
- * @ring: ring to wait on for the seq number
- * @seq: seq number wait for
- *
- * return value:
- * 0: seq signaled, and gpu not hang
- * -EINVAL: some paramter is not valid
- */
-static int amdgpu_fence_ring_wait_seq(struct amdgpu_ring *ring, uint64_t seq)
-{
- BUG_ON(!ring);
- if (seq > ring->fence_drv.sync_seq)
- return -EINVAL;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= seq)
- return 0;
-
- amdgpu_fence_schedule_fallback(ring);
- wait_event(ring->fence_drv.fence_queue,
- amdgpu_fence_seq_signaled(ring, seq));
-
- return 0;
-}
-
-/**
- * amdgpu_fence_wait_next - wait for the next fence to signal
- *
- * @adev: amdgpu device pointer
- * @ring: ring index the fence is associated with
- *
- * Wait for the next fence on the requested ring to signal (all asics).
- * Returns 0 if the next fence has passed, error for all other cases.
- * Caller must hold ring lock.
- */
-int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
-{
- uint64_t seq = atomic64_read(&ring->fence_drv.last_seq) + 1ULL;
-
- if (seq >= ring->fence_drv.sync_seq)
- return -ENOENT;
-
- return amdgpu_fence_ring_wait_seq(ring, seq);
-}
-
-/**
* amdgpu_fence_wait_empty - wait for all fences to signal
*
* @adev: amdgpu device pointer
@@ -312,16 +230,28 @@ int amdgpu_fence_wait_next(struct amdgpu_ring *ring)
*
* Wait for all fences on the requested ring to signal (all asics).
* Returns 0 if the fences have passed, error for all other cases.
- * Caller must hold ring lock.
*/
int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
{
- uint64_t seq = ring->fence_drv.sync_seq;
+ uint64_t seq = ACCESS_ONCE(ring->fence_drv.sync_seq);
+ struct fence *fence, **ptr;
+ int r;
if (!seq)
return 0;
- return amdgpu_fence_ring_wait_seq(ring, seq);
+ ptr = &ring->fence_drv.fences[seq & ring->fence_drv.num_fences_mask];
+ rcu_read_lock();
+ fence = rcu_dereference(*ptr);
+ if (!fence || !fence_get_rcu(fence)) {
+ rcu_read_unlock();
+ return 0;
+ }
+ rcu_read_unlock();
+
+ r = fence_wait(fence, false);
+ fence_put(fence);
+ return r;
}
/**
@@ -341,13 +271,10 @@ unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring)
* but it's ok to report slightly wrong fence count here.
*/
amdgpu_fence_process(ring);
- emitted = ring->fence_drv.sync_seq
- - atomic64_read(&ring->fence_drv.last_seq);
- /* to avoid 32bits warp around */
- if (emitted > 0x10000000)
- emitted = 0x10000000;
-
- return (unsigned)emitted;
+ emitted = 0x100000000ull;
+ emitted -= atomic_read(&ring->fence_drv.last_seq);
+ emitted += ACCESS_ONCE(ring->fence_drv.sync_seq);
+ return lower_32_bits(emitted);
}
/**
@@ -379,7 +306,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
ring->fence_drv.cpu_addr = adev->uvd.cpu_addr + index;
ring->fence_drv.gpu_addr = adev->uvd.gpu_addr + index;
}
- amdgpu_fence_write(ring, atomic64_read(&ring->fence_drv.last_seq));
+ amdgpu_fence_write(ring, atomic_read(&ring->fence_drv.last_seq));
amdgpu_irq_get(adev, irq_src, irq_type);
ring->fence_drv.irq_src = irq_src;
@@ -397,25 +324,36 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
* for the requested ring.
*
* @ring: ring to init the fence driver on
+ * @num_hw_submission: number of entries on the hardware queue
*
* Init the fence driver for the requested ring (all asics).
* Helper function for amdgpu_fence_driver_init().
*/
-int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
+int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
+ unsigned num_hw_submission)
{
long timeout;
int r;
+ /* Check that num_hw_submission is a power of two */
+ if ((num_hw_submission & (num_hw_submission - 1)) != 0)
+ return -EINVAL;
+
ring->fence_drv.cpu_addr = NULL;
ring->fence_drv.gpu_addr = 0;
ring->fence_drv.sync_seq = 0;
- atomic64_set(&ring->fence_drv.last_seq, 0);
+ atomic_set(&ring->fence_drv.last_seq, 0);
ring->fence_drv.initialized = false;
setup_timer(&ring->fence_drv.fallback_timer, amdgpu_fence_fallback,
(unsigned long)ring);
- init_waitqueue_head(&ring->fence_drv.fence_queue);
+ ring->fence_drv.num_fences_mask = num_hw_submission - 1;
+ spin_lock_init(&ring->fence_drv.lock);
+ ring->fence_drv.fences = kcalloc(num_hw_submission, sizeof(void *),
+ GFP_KERNEL);
+ if (!ring->fence_drv.fences)
+ return -ENOMEM;
timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
if (timeout == 0) {
@@ -429,7 +367,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring)
timeout = MAX_SCHEDULE_TIMEOUT;
}
r = amd_sched_init(&ring->sched, &amdgpu_sched_ops,
- amdgpu_sched_hw_submission,
+ num_hw_submission,
timeout, ring->name);
if (r) {
DRM_ERROR("Failed to create scheduler on ring %s.\n",
@@ -477,10 +415,9 @@ int amdgpu_fence_driver_init(struct amdgpu_device *adev)
*/
void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
{
- int i, r;
+ unsigned i, j;
+ int r;
- if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
- kmem_cache_destroy(amdgpu_fence_slab);
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
struct amdgpu_ring *ring = adev->rings[i];
@@ -491,13 +428,18 @@ void amdgpu_fence_driver_fini(struct amdgpu_device *adev)
/* no need to trigger GPU reset as we are unloading */
amdgpu_fence_driver_force_completion(adev);
}
- wake_up_all(&ring->fence_drv.fence_queue);
amdgpu_irq_put(adev, ring->fence_drv.irq_src,
ring->fence_drv.irq_type);
amd_sched_fini(&ring->sched);
del_timer_sync(&ring->fence_drv.fallback_timer);
+ for (j = 0; j <= ring->fence_drv.num_fences_mask; ++j)
+ fence_put(ring->fence_drv.fences[i]);
+ kfree(ring->fence_drv.fences);
ring->fence_drv.initialized = false;
}
+
+ if (atomic_dec_and_test(&amdgpu_fence_slab_ref))
+ kmem_cache_destroy(amdgpu_fence_slab);
}
/**
@@ -594,103 +536,57 @@ static const char *amdgpu_fence_get_timeline_name(struct fence *f)
}
/**
- * amdgpu_fence_is_signaled - test if fence is signaled
- *
- * @f: fence to test
+ * amdgpu_fence_enable_signaling - enable signalling on fence
+ * @fence: fence
*
- * Test the fence sequence number if it is already signaled. If it isn't
- * signaled start fence processing. Returns True if the fence is signaled.
+ * This function is called with fence_queue lock held, and adds a callback
+ * to fence_queue that checks if this fence is signaled, and if so it
+ * signals the fence and removes itself.
*/
-static bool amdgpu_fence_is_signaled(struct fence *f)
+static bool amdgpu_fence_enable_signaling(struct fence *f)
{
struct amdgpu_fence *fence = to_amdgpu_fence(f);
struct amdgpu_ring *ring = fence->ring;
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
-
- amdgpu_fence_process(ring);
+ if (!timer_pending(&ring->fence_drv.fallback_timer))
+ amdgpu_fence_schedule_fallback(ring);
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return true;
+ FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return false;
+ return true;
}
/**
- * amdgpu_fence_check_signaled - callback from fence_queue
+ * amdgpu_fence_free - free up the fence memory
+ *
+ * @rcu: RCU callback head
*
- * this function is called with fence_queue lock held, which is also used
- * for the fence locking itself, so unlocked variants are used for
- * fence_signal, and remove_wait_queue.
+ * Free up the fence memory after the RCU grace period.
*/
-static int amdgpu_fence_check_signaled(wait_queue_t *wait, unsigned mode, int flags, void *key)
+static void amdgpu_fence_free(struct rcu_head *rcu)
{
- struct amdgpu_fence *fence;
- struct amdgpu_device *adev;
- u64 seq;
- int ret;
-
- fence = container_of(wait, struct amdgpu_fence, fence_wake);
- adev = fence->ring->adev;
-
- /*
- * We cannot use amdgpu_fence_process here because we're already
- * in the waitqueue, in a call from wake_up_all.
- */
- seq = atomic64_read(&fence->ring->fence_drv.last_seq);
- if (seq >= fence->seq) {
- ret = fence_signal_locked(&fence->base);
- if (!ret)
- FENCE_TRACE(&fence->base, "signaled from irq context\n");
- else
- FENCE_TRACE(&fence->base, "was already signaled\n");
-
- __remove_wait_queue(&fence->ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_put(&fence->base);
- } else
- FENCE_TRACE(&fence->base, "pending\n");
- return 0;
+ struct fence *f = container_of(rcu, struct fence, rcu);
+ struct amdgpu_fence *fence = to_amdgpu_fence(f);
+ kmem_cache_free(amdgpu_fence_slab, fence);
}
/**
- * amdgpu_fence_enable_signaling - enable signalling on fence
+ * amdgpu_fence_release - callback that fence can be freed
+ *
* @fence: fence
*
- * This function is called with fence_queue lock held, and adds a callback
- * to fence_queue that checks if this fence is signaled, and if so it
- * signals the fence and removes itself.
+ * This function is called when the reference count becomes zero.
+ * It just RCU schedules freeing up the fence.
*/
-static bool amdgpu_fence_enable_signaling(struct fence *f)
-{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- struct amdgpu_ring *ring = fence->ring;
-
- if (atomic64_read(&ring->fence_drv.last_seq) >= fence->seq)
- return false;
-
- fence->fence_wake.flags = 0;
- fence->fence_wake.private = NULL;
- fence->fence_wake.func = amdgpu_fence_check_signaled;
- __add_wait_queue(&ring->fence_drv.fence_queue, &fence->fence_wake);
- fence_get(f);
- if (!timer_pending(&ring->fence_drv.fallback_timer))
- amdgpu_fence_schedule_fallback(ring);
- FENCE_TRACE(&fence->base, "armed on ring %i!\n", ring->idx);
- return true;
-}
-
static void amdgpu_fence_release(struct fence *f)
{
- struct amdgpu_fence *fence = to_amdgpu_fence(f);
- kmem_cache_free(amdgpu_fence_slab, fence);
+ call_rcu(&f->rcu, amdgpu_fence_free);
}
-const struct fence_ops amdgpu_fence_ops = {
+static const struct fence_ops amdgpu_fence_ops = {
.get_driver_name = amdgpu_fence_get_driver_name,
.get_timeline_name = amdgpu_fence_get_timeline_name,
.enable_signaling = amdgpu_fence_enable_signaling,
- .signaled = amdgpu_fence_is_signaled,
.wait = fence_default_wait,
.release = amdgpu_fence_release,
};
@@ -714,9 +610,9 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
amdgpu_fence_process(ring);
seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
- seq_printf(m, "Last signaled fence 0x%016llx\n",
- (unsigned long long)atomic64_read(&ring->fence_drv.last_seq));
- seq_printf(m, "Last emitted 0x%016llx\n",
+ seq_printf(m, "Last signaled fence 0x%08x\n",
+ atomic_read(&ring->fence_drv.last_seq));
+ seq_printf(m, "Last emitted 0x%08x\n",
ring->fence_drv.sync_seq);
}
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 2e26a517f2d6..fa6a27bff298 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -26,6 +26,7 @@
* Jerome Glisse
*/
#include <linux/ktime.h>
+#include <linux/pagemap.h>
#include <drm/drmP.h>
#include <drm/amdgpu_drm.h>
#include "amdgpu.h"
@@ -140,25 +141,40 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj, struct drm_file *file_pri
void amdgpu_gem_object_close(struct drm_gem_object *obj,
struct drm_file *file_priv)
{
- struct amdgpu_bo *rbo = gem_to_amdgpu_bo(obj);
- struct amdgpu_device *adev = rbo->adev;
+ struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+ struct amdgpu_device *adev = bo->adev;
struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
struct amdgpu_vm *vm = &fpriv->vm;
+
+ struct amdgpu_bo_list_entry vm_pd;
+ struct list_head list, duplicates;
+ struct ttm_validate_buffer tv;
+ struct ww_acquire_ctx ticket;
struct amdgpu_bo_va *bo_va;
int r;
- r = amdgpu_bo_reserve(rbo, true);
+
+ INIT_LIST_HEAD(&list);
+ INIT_LIST_HEAD(&duplicates);
+
+ tv.bo = &bo->tbo;
+ tv.shared = true;
+ list_add(&tv.head, &list);
+
+ amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
+
+ r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
if (r) {
dev_err(adev->dev, "leaking bo va because "
"we fail to reserve bo (%d)\n", r);
return;
}
- bo_va = amdgpu_vm_bo_find(vm, rbo);
+ bo_va = amdgpu_vm_bo_find(vm, bo);
if (bo_va) {
if (--bo_va->ref_count == 0) {
amdgpu_vm_bo_rmv(adev, bo_va);
}
}
- amdgpu_bo_unreserve(rbo);
+ ttm_eu_backoff_reservation(&ticket, &list);
}
static int amdgpu_gem_handle_lockup(struct amdgpu_device *adev, int r)
@@ -243,12 +259,10 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
AMDGPU_GEM_USERPTR_REGISTER))
return -EINVAL;
- if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) && (
- !(args->flags & AMDGPU_GEM_USERPTR_ANONONLY) ||
- !(args->flags & AMDGPU_GEM_USERPTR_REGISTER))) {
+ if (!(args->flags & AMDGPU_GEM_USERPTR_READONLY) &&
+ !(args->flags & AMDGPU_GEM_USERPTR_REGISTER)) {
- /* if we want to write to it we must require anonymous
- memory and install a MMU notifier */
+ /* if we want to write to it we must install a MMU notifier */
return -EACCES;
}
@@ -274,18 +288,23 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
down_read(&current->mm->mmap_sem);
+
+ r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
+ bo->tbo.ttm->pages);
+ if (r)
+ goto unlock_mmap_sem;
+
r = amdgpu_bo_reserve(bo, true);
- if (r) {
- up_read(&current->mm->mmap_sem);
- goto release_object;
- }
+ if (r)
+ goto free_pages;
amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
amdgpu_bo_unreserve(bo);
- up_read(&current->mm->mmap_sem);
if (r)
- goto release_object;
+ goto free_pages;
+
+ up_read(&current->mm->mmap_sem);
}
r = drm_gem_handle_create(filp, gobj, &handle);
@@ -297,6 +316,12 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
args->handle = handle;
return 0;
+free_pages:
+ release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages, false);
+
+unlock_mmap_sem:
+ up_read(&current->mm->mmap_sem);
+
release_object:
drm_gem_object_unreference_unlocked(gobj);
@@ -569,11 +594,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
tv.shared = true;
list_add(&tv.head, &list);
- if (args->operation == AMDGPU_VA_OP_MAP) {
- tv_pd.bo = &fpriv->vm.page_directory->tbo;
- tv_pd.shared = true;
- list_add(&tv_pd.head, &list);
- }
+ tv_pd.bo = &fpriv->vm.page_directory->tbo;
+ tv_pd.shared = true;
+ list_add(&tv_pd.head, &list);
+
r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
if (r) {
drm_gem_object_unreference_unlocked(gobj);
@@ -606,7 +630,8 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
break;
}
ttm_eu_backoff_reservation(&ticket, &list);
- if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE))
+ if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
+ !amdgpu_vm_debug)
amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
drm_gem_object_unreference_unlocked(gobj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
index b5bdd5d59b58..8443cea6821a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@@ -75,6 +75,7 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
}
ib->vm = vm;
+ ib->vm_id = 0;
return 0;
}
@@ -84,14 +85,13 @@ int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
*
* @adev: amdgpu_device pointer
* @ib: IB object to free
+ * @f: the fence SA bo need wait on for the ib alloation
*
* Free an IB (all asics).
*/
-void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
+void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib, struct fence *f)
{
- amdgpu_sa_bo_free(adev, &ib->sa_bo, &ib->fence->base);
- if (ib->fence)
- fence_put(&ib->fence->base);
+ amdgpu_sa_bo_free(adev, &ib->sa_bo, f);
}
/**
@@ -100,7 +100,6 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* @adev: amdgpu_device pointer
* @num_ibs: number of IBs to schedule
* @ibs: IB objects to schedule
- * @owner: owner for creating the fences
* @f: fence created during this submission
*
* Schedule an IB on the associated ring (all asics).
@@ -117,14 +116,14 @@ void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib)
* to SI there was just a DE IB.
*/
int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
- struct amdgpu_ib *ibs, void *owner,
- struct fence *last_vm_update,
+ struct amdgpu_ib *ibs, struct fence *last_vm_update,
struct fence **f)
{
struct amdgpu_device *adev = ring->adev;
struct amdgpu_ib *ib = &ibs[0];
struct amdgpu_ctx *ctx, *old_ctx;
struct amdgpu_vm *vm;
+ struct fence *hwf;
unsigned i;
int r = 0;
@@ -139,7 +138,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
return -EINVAL;
}
- if (vm && !ibs->grabbed_vmid) {
+ if (vm && !ibs->vm_id) {
dev_err(adev->dev, "VM IB without ID\n");
return -EINVAL;
}
@@ -152,13 +151,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (vm) {
/* do context switch */
- amdgpu_vm_flush(ring, vm, last_vm_update);
-
- if (ring->funcs->emit_gds_switch)
- amdgpu_ring_emit_gds_switch(ring, ib->vm->ids[ring->idx].id,
- ib->gds_base, ib->gds_size,
- ib->gws_base, ib->gws_size,
- ib->oa_base, ib->oa_size);
+ amdgpu_vm_flush(ring, ib->vm_id, ib->vm_pd_addr,
+ ib->gds_base, ib->gds_size,
+ ib->gws_base, ib->gws_size,
+ ib->oa_base, ib->oa_size);
if (ring->funcs->emit_hdp_flush)
amdgpu_ring_emit_hdp_flush(ring);
@@ -170,6 +166,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
if (ib->ctx != ctx || ib->vm != vm) {
ring->current_ctx = old_ctx;
+ if (ib->vm_id)
+ amdgpu_vm_reset_id(adev, ib->vm_id);
amdgpu_ring_undo(ring);
return -EINVAL;
}
@@ -177,10 +175,17 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
ring->current_ctx = ctx;
}
- r = amdgpu_fence_emit(ring, owner, &ib->fence);
+ if (vm) {
+ if (ring->funcs->emit_hdp_invalidate)
+ amdgpu_ring_emit_hdp_invalidate(ring);
+ }
+
+ r = amdgpu_fence_emit(ring, &hwf);
if (r) {
dev_err(adev->dev, "failed to emit fence (%d)\n", r);
ring->current_ctx = old_ctx;
+ if (ib->vm_id)
+ amdgpu_vm_reset_id(adev, ib->vm_id);
amdgpu_ring_undo(ring);
return r;
}
@@ -194,7 +199,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
}
if (f)
- *f = fence_get(&ib->fence->base);
+ *f = fence_get(hwf);
amdgpu_ring_commit(ring);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index f29bbb96a881..9c9b19e2f353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -70,9 +70,13 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
void amdgpu_job_free(struct amdgpu_job *job)
{
unsigned i;
+ struct fence *f;
+ /* use sched fence if available */
+ f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
for (i = 0; i < job->num_ibs; ++i)
- amdgpu_ib_free(job->adev, &job->ibs[i]);
+ amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
+ fence_put(job->fence);
amdgpu_bo_unref(&job->uf.bo);
amdgpu_sync_free(&job->sync);
@@ -105,16 +109,23 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
struct fence *fence = amdgpu_sync_get_fence(&job->sync);
- if (fence == NULL && vm && !job->ibs->grabbed_vmid) {
+ if (fence == NULL && vm && !job->ibs->vm_id) {
struct amdgpu_ring *ring = job->ring;
+ unsigned i, vm_id;
+ uint64_t vm_pd_addr;
int r;
r = amdgpu_vm_grab_id(vm, ring, &job->sync,
- &job->base.s_fence->base);
+ &job->base.s_fence->base,
+ &vm_id, &vm_pd_addr);
if (r)
DRM_ERROR("Error getting VM ID (%d)\n", r);
- else
- job->ibs->grabbed_vmid = true;
+ else {
+ for (i = 0; i < job->num_ibs; ++i) {
+ job->ibs[i].vm_id = vm_id;
+ job->ibs[i].vm_pd_addr = vm_pd_addr;
+ }
+ }
fence = amdgpu_sync_get_fence(&job->sync);
}
@@ -141,7 +152,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
}
trace_amdgpu_sched_run_job(job);
- r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job->owner,
+ r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
job->sync.last_vm_update, &fence);
if (r) {
DRM_ERROR("Error scheduling IBs (%d)\n", r);
@@ -149,6 +160,7 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
}
err:
+ job->fence = fence;
amdgpu_job_free(job);
return fence;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 9a025a77958d..151a2d42c639 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -308,7 +308,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
{
bool is_iomem;
- int r;
+ long r;
if (bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS)
return -EPERM;
@@ -319,14 +319,20 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
}
return 0;
}
+
+ r = reservation_object_wait_timeout_rcu(bo->tbo.resv, false, false,
+ MAX_SCHEDULE_TIMEOUT);
+ if (r < 0)
+ return r;
+
r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
- if (r) {
+ if (r)
return r;
- }
+
bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
- if (ptr) {
+ if (ptr)
*ptr = bo->kptr;
- }
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index d77b2bdbe800..ff9597ce268c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -113,6 +113,10 @@ static ssize_t amdgpu_get_dpm_forced_performance_level(struct device *dev,
struct drm_device *ddev = dev_get_drvdata(dev);
struct amdgpu_device *adev = ddev->dev_private;
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return snprintf(buf, PAGE_SIZE, "off\n");
+
if (adev->pp_enabled) {
enum amd_dpm_forced_level level;
@@ -142,6 +146,11 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
enum amdgpu_dpm_forced_level level;
int ret = 0;
+ /* Can't force performance level when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (strncmp("low", buf, strlen("low")) == 0) {
level = AMDGPU_DPM_FORCED_LEVEL_LOW;
} else if (strncmp("high", buf, strlen("high")) == 0) {
@@ -161,6 +170,7 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
mutex_lock(&adev->pm.mutex);
if (adev->pm.dpm.thermal_active) {
count = -EINVAL;
+ mutex_unlock(&adev->pm.mutex);
goto fail;
}
ret = amdgpu_dpm_force_performance_level(adev, level);
@@ -171,8 +181,6 @@ static ssize_t amdgpu_set_dpm_forced_performance_level(struct device *dev,
mutex_unlock(&adev->pm.mutex);
}
fail:
- mutex_unlock(&adev->pm.mutex);
-
return count;
}
@@ -469,8 +477,14 @@ static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
char *buf)
{
struct amdgpu_device *adev = dev_get_drvdata(dev);
+ struct drm_device *ddev = adev->ddev;
int temp;
+ /* Can't get temperature when the card is off */
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+ return -EINVAL;
+
if (!adev->pp_enabled && !adev->pm.funcs->get_temperature)
temp = 0;
else
@@ -919,11 +933,6 @@ force:
/* update display watermarks based on new power state */
amdgpu_display_bandwidth_update(adev);
- /* update displays */
- amdgpu_dpm_display_configuration_changed(adev);
-
- adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
- adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
/* wait for the rings to drain */
for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
@@ -940,6 +949,12 @@ force:
amdgpu_dpm_post_set_power_state(adev);
+ /* update displays */
+ amdgpu_dpm_display_configuration_changed(adev);
+
+ adev->pm.dpm.current_active_crtcs = adev->pm.dpm.new_active_crtcs;
+ adev->pm.dpm.current_active_crtc_count = adev->pm.dpm.new_active_crtc_count;
+
if (adev->pm.funcs->force_performance_level) {
if (adev->pm.dpm.thermal_active) {
enum amdgpu_dpm_forced_level level = adev->pm.dpm.forced_level;
@@ -1174,12 +1189,16 @@ static int amdgpu_debugfs_pm_info(struct seq_file *m, void *data)
struct drm_info_node *node = (struct drm_info_node *) m->private;
struct drm_device *dev = node->minor->dev;
struct amdgpu_device *adev = dev->dev_private;
+ struct drm_device *ddev = adev->ddev;
if (!adev->pm.dpm_enabled) {
seq_printf(m, "dpm not enabled\n");
return 0;
}
- if (adev->pp_enabled) {
+ if ((adev->flags & AMD_IS_PX) &&
+ (ddev->switch_power_state != DRM_SWITCH_POWER_ON)) {
+ seq_printf(m, "PX asic powered off\n");
+ } else if (adev->pp_enabled) {
amdgpu_dpm_debugfs_print_current_performance_level(adev, m);
} else {
mutex_lock(&adev->pm.mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
index b9d0d55f6b47..3cb6d6c413c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@@ -143,8 +143,10 @@ static int amdgpu_pp_late_init(void *handle)
adev->powerplay.pp_handle);
#ifdef CONFIG_DRM_AMD_POWERPLAY
- if (adev->pp_enabled)
+ if (adev->pp_enabled) {
amdgpu_pm_sysfs_init(adev);
+ amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_COMPLETE_INIT, NULL, NULL);
+ }
#endif
return ret;
}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 56c07e3fdb33..972eed2ef787 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -236,7 +236,8 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
ring->adev = adev;
ring->idx = adev->num_rings++;
adev->rings[ring->idx] = ring;
- r = amdgpu_fence_driver_init_ring(ring);
+ r = amdgpu_fence_driver_init_ring(ring,
+ amdgpu_sched_hw_submission);
if (r)
return r;
}
@@ -352,30 +353,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
}
}
-/**
- * amdgpu_ring_from_fence - get ring from fence
- *
- * @f: fence structure
- *
- * Extract the ring a fence belongs to. Handles both scheduler as
- * well as hardware fences.
- */
-struct amdgpu_ring *amdgpu_ring_from_fence(struct fence *f)
-{
- struct amdgpu_fence *a_fence;
- struct amd_sched_fence *s_fence;
-
- s_fence = to_amd_sched_fence(f);
- if (s_fence)
- return container_of(s_fence->sched, struct amdgpu_ring, sched);
-
- a_fence = to_amdgpu_fence(f);
- if (a_fence)
- return a_fence->ring;
-
- return NULL;
-}
-
/*
* Debugfs info
*/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
index 7d8f8f1e3f7f..8bf84efafb04 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@@ -60,9 +60,8 @@ int amdgpu_sa_bo_manager_init(struct amdgpu_device *adev,
sa_manager->align = align;
sa_manager->hole = &sa_manager->olist;
INIT_LIST_HEAD(&sa_manager->olist);
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
INIT_LIST_HEAD(&sa_manager->flist[i]);
- }
r = amdgpu_bo_create(adev, size, align, true, domain,
0, NULL, NULL, &sa_manager->bo);
@@ -228,11 +227,9 @@ static bool amdgpu_sa_event(struct amdgpu_sa_manager *sa_manager,
unsigned soffset, eoffset, wasted;
int i;
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- if (!list_empty(&sa_manager->flist[i])) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
+ if (!list_empty(&sa_manager->flist[i]))
return true;
- }
- }
soffset = amdgpu_sa_bo_hole_soffset(sa_manager);
eoffset = amdgpu_sa_bo_hole_eoffset(sa_manager);
@@ -265,12 +262,11 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
/* go over all fence list and try to find the closest sa_bo
* of the current last
*/
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
struct amdgpu_sa_bo *sa_bo;
- if (list_empty(&sa_manager->flist[i])) {
+ if (list_empty(&sa_manager->flist[i]))
continue;
- }
sa_bo = list_first_entry(&sa_manager->flist[i],
struct amdgpu_sa_bo, flist);
@@ -299,7 +295,9 @@ static bool amdgpu_sa_bo_next_hole(struct amdgpu_sa_manager *sa_manager,
}
if (best_bo) {
- uint32_t idx = amdgpu_ring_from_fence(best_bo->fence)->idx;
+ uint32_t idx = best_bo->fence->context;
+
+ idx %= AMDGPU_SA_NUM_FENCE_LISTS;
++tries[idx];
sa_manager->hole = best_bo->olist.prev;
@@ -315,8 +313,8 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
struct amdgpu_sa_bo **sa_bo,
unsigned size, unsigned align)
{
- struct fence *fences[AMDGPU_MAX_RINGS];
- unsigned tries[AMDGPU_MAX_RINGS];
+ struct fence *fences[AMDGPU_SA_NUM_FENCE_LISTS];
+ unsigned tries[AMDGPU_SA_NUM_FENCE_LISTS];
unsigned count;
int i, r;
signed long t;
@@ -338,7 +336,7 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
spin_lock(&sa_manager->wq.lock);
do {
- for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+ for (i = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i) {
fences[i] = NULL;
tries[i] = 0;
}
@@ -355,14 +353,17 @@ int amdgpu_sa_bo_new(struct amdgpu_sa_manager *sa_manager,
/* see if we can skip over some allocations */
} while (amdgpu_sa_bo_next_hole(sa_manager, fences, tries));
- for (i = 0, count = 0; i < AMDGPU_MAX_RINGS; ++i)
+ for (i = 0, count = 0; i < AMDGPU_SA_NUM_FENCE_LISTS; ++i)
if (fences[i])
- fences[count++] = fences[i];
+ fences[count++] = fence_get(fences[i]);
if (count) {
spin_unlock(&sa_manager->wq.lock);
t = fence_wait_any_timeout(fences, count, false,
MAX_SCHEDULE_TIMEOUT);
+ for (i = 0; i < count; ++i)
+ fence_put(fences[i]);
+
r = (t > 0) ? 0 : t;
spin_lock(&sa_manager->wq.lock);
} else {
@@ -394,8 +395,9 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
spin_lock(&sa_manager->wq.lock);
if (fence && !fence_is_signaled(fence)) {
uint32_t idx;
+
(*sa_bo)->fence = fence_get(fence);
- idx = amdgpu_ring_from_fence(fence)->idx;
+ idx = fence->context % AMDGPU_SA_NUM_FENCE_LISTS;
list_add_tail(&(*sa_bo)->flist, &sa_manager->flist[idx]);
} else {
amdgpu_sa_bo_remove_locked(*sa_bo);
@@ -407,25 +409,6 @@ void amdgpu_sa_bo_free(struct amdgpu_device *adev, struct amdgpu_sa_bo **sa_bo,
#if defined(CONFIG_DEBUG_FS)
-static void amdgpu_sa_bo_dump_fence(struct fence *fence, struct seq_file *m)
-{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(fence);
- struct amd_sched_fence *s_fence = to_amd_sched_fence(fence);
-
- if (a_fence)
- seq_printf(m, " protected by 0x%016llx on ring %d",
- a_fence->seq, a_fence->ring->idx);
-
- if (s_fence) {
- struct amdgpu_ring *ring;
-
-
- ring = container_of(s_fence->sched, struct amdgpu_ring, sched);
- seq_printf(m, " protected by 0x%016x on ring %d",
- s_fence->base.seqno, ring->idx);
- }
-}
-
void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
struct seq_file *m)
{
@@ -442,8 +425,11 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
}
seq_printf(m, "[0x%010llx 0x%010llx] size %8lld",
soffset, eoffset, eoffset - soffset);
+
if (i->fence)
- amdgpu_sa_bo_dump_fence(i->fence, m);
+ seq_printf(m, " protected by 0x%08x on context %d",
+ i->fence->seqno, i->fence->context);
+
seq_printf(m, "\n");
}
spin_unlock(&sa_manager->wq.lock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
index c15be00de904..c48b4fce5e57 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@@ -37,6 +37,8 @@ struct amdgpu_sync_entry {
struct fence *fence;
};
+static struct kmem_cache *amdgpu_sync_slab;
+
/**
* amdgpu_sync_create - zero init sync object
*
@@ -50,14 +52,18 @@ void amdgpu_sync_create(struct amdgpu_sync *sync)
sync->last_vm_update = NULL;
}
+/**
+ * amdgpu_sync_same_dev - test if fence belong to us
+ *
+ * @adev: amdgpu device to use for the test
+ * @f: fence to test
+ *
+ * Test if the fence was issued by us.
+ */
static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
- if (a_fence)
- return a_fence->ring->adev == adev;
-
if (s_fence) {
struct amdgpu_ring *ring;
@@ -68,17 +74,31 @@ static bool amdgpu_sync_same_dev(struct amdgpu_device *adev, struct fence *f)
return false;
}
-static bool amdgpu_sync_test_owner(struct fence *f, void *owner)
+/**
+ * amdgpu_sync_get_owner - extract the owner of a fence
+ *
+ * @fence: fence get the owner from
+ *
+ * Extract who originally created the fence.
+ */
+static void *amdgpu_sync_get_owner(struct fence *f)
{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
if (s_fence)
- return s_fence->owner == owner;
- if (a_fence)
- return a_fence->owner == owner;
- return false;
+ return s_fence->owner;
+
+ return AMDGPU_FENCE_OWNER_UNDEFINED;
}
+/**
+ * amdgpu_sync_keep_later - Keep the later fence
+ *
+ * @keep: existing fence to test
+ * @fence: new fence
+ *
+ * Either keep the existing fence or the new one, depending which one is later.
+ */
static void amdgpu_sync_keep_later(struct fence **keep, struct fence *fence)
{
if (*keep && fence_is_later(*keep, fence))
@@ -104,7 +124,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
if (amdgpu_sync_same_dev(adev, f) &&
- amdgpu_sync_test_owner(f, AMDGPU_FENCE_OWNER_VM))
+ amdgpu_sync_get_owner(f) == AMDGPU_FENCE_OWNER_VM)
amdgpu_sync_keep_later(&sync->last_vm_update, f);
hash_for_each_possible(sync->fences, e, node, f->context) {
@@ -115,7 +135,7 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
- e = kmalloc(sizeof(struct amdgpu_sync_entry), GFP_KERNEL);
+ e = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
if (!e)
return -ENOMEM;
@@ -124,18 +144,6 @@ int amdgpu_sync_fence(struct amdgpu_device *adev, struct amdgpu_sync *sync,
return 0;
}
-static void *amdgpu_sync_get_owner(struct fence *f)
-{
- struct amdgpu_fence *a_fence = to_amdgpu_fence(f);
- struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
-
- if (s_fence)
- return s_fence->owner;
- else if (a_fence)
- return a_fence->owner;
- return AMDGPU_FENCE_OWNER_UNDEFINED;
-}
-
/**
* amdgpu_sync_resv - sync to a reservation object
*
@@ -208,7 +216,7 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
f = e->fence;
hash_del(&e->node);
- kfree(e);
+ kmem_cache_free(amdgpu_sync_slab, e);
if (!fence_is_signaled(f))
return f;
@@ -231,7 +239,7 @@ int amdgpu_sync_wait(struct amdgpu_sync *sync)
hash_del(&e->node);
fence_put(e->fence);
- kfree(e);
+ kmem_cache_free(amdgpu_sync_slab, e);
}
return 0;
@@ -253,8 +261,34 @@ void amdgpu_sync_free(struct amdgpu_sync *sync)
hash_for_each_safe(sync->fences, i, tmp, e, node) {
hash_del(&e->node);
fence_put(e->fence);
- kfree(e);
+ kmem_cache_free(amdgpu_sync_slab, e);
}
fence_put(sync->last_vm_update);
}
+
+/**
+ * amdgpu_sync_init - init sync object subsystem
+ *
+ * Allocate the slab allocator.
+ */
+int amdgpu_sync_init(void)
+{
+ amdgpu_sync_slab = kmem_cache_create(
+ "amdgpu_sync", sizeof(struct amdgpu_sync_entry), 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!amdgpu_sync_slab)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * amdgpu_sync_fini - fini sync object subsystem
+ *
+ * Free the slab allocator.
+ */
+void amdgpu_sync_fini(void)
+{
+ kmem_cache_destroy(amdgpu_sync_slab);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index 9ca3735c563c..26a5f4acf584 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -100,21 +100,24 @@ TRACE_EVENT(amdgpu_sched_run_job,
TRACE_EVENT(amdgpu_vm_grab_id,
- TP_PROTO(struct amdgpu_vm *vm, unsigned vmid, int ring),
- TP_ARGS(vm, vmid, ring),
+ TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
+ uint64_t pd_addr),
+ TP_ARGS(vm, ring, vmid, pd_addr),
TP_STRUCT__entry(
__field(struct amdgpu_vm *, vm)
- __field(u32, vmid)
__field(u32, ring)
+ __field(u32, vmid)
+ __field(u64, pd_addr)
),
TP_fast_assign(
__entry->vm = vm;
- __entry->vmid = vmid;
__entry->ring = ring;
+ __entry->vmid = vmid;
+ __entry->pd_addr = pd_addr;
),
- TP_printk("vm=%p, id=%u, ring=%u", __entry->vm, __entry->vmid,
- __entry->ring)
+ TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
+ __entry->ring, __entry->vmid, __entry->pd_addr)
);
TRACE_EVENT(amdgpu_vm_bo_map,
@@ -231,8 +234,8 @@ TRACE_EVENT(amdgpu_vm_flush,
__entry->ring = ring;
__entry->id = id;
),
- TP_printk("pd_addr=%010Lx, ring=%u, id=%u",
- __entry->pd_addr, __entry->ring, __entry->id)
+ TP_printk("ring=%u, id=%u, pd_addr=%010Lx",
+ __entry->ring, __entry->id, __entry->pd_addr)
);
TRACE_EVENT(amdgpu_bo_list_set,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index e52fc641edfb..0f42b1a24446 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -494,29 +494,32 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
/*
* TTM backend functions.
*/
+struct amdgpu_ttm_gup_task_list {
+ struct list_head list;
+ struct task_struct *task;
+};
+
struct amdgpu_ttm_tt {
- struct ttm_dma_tt ttm;
- struct amdgpu_device *adev;
- u64 offset;
- uint64_t userptr;
- struct mm_struct *usermm;
- uint32_t userflags;
+ struct ttm_dma_tt ttm;
+ struct amdgpu_device *adev;
+ u64 offset;
+ uint64_t userptr;
+ struct mm_struct *usermm;
+ uint32_t userflags;
+ spinlock_t guptasklock;
+ struct list_head guptasks;
+ atomic_t mmu_invalidations;
};
-/* prepare the sg table with the user pages */
-static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
{
- struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
struct amdgpu_ttm_tt *gtt = (void *)ttm;
- unsigned pinned = 0, nents;
- int r;
-
int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
- enum dma_data_direction direction = write ?
- DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+ unsigned pinned = 0;
+ int r;
if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
- /* check that we only pin down anonymous memory
+ /* check that we only use anonymous memory
to prevent problems with writeback */
unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
struct vm_area_struct *vma;
@@ -529,10 +532,21 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
do {
unsigned num_pages = ttm->num_pages - pinned;
uint64_t userptr = gtt->userptr + pinned * PAGE_SIZE;
- struct page **pages = ttm->pages + pinned;
+ struct page **p = pages + pinned;
+ struct amdgpu_ttm_gup_task_list guptask;
+
+ guptask.task = current;
+ spin_lock(&gtt->guptasklock);
+ list_add(&guptask.list, &gtt->guptasks);
+ spin_unlock(&gtt->guptasklock);
r = get_user_pages(current, current->mm, userptr, num_pages,
- write, 0, pages, NULL);
+ write, 0, p, NULL);
+
+ spin_lock(&gtt->guptasklock);
+ list_del(&guptask.list);
+ spin_unlock(&gtt->guptasklock);
+
if (r < 0)
goto release_pages;
@@ -540,6 +554,25 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
} while (pinned < ttm->num_pages);
+ return 0;
+
+release_pages:
+ release_pages(pages, pinned, 0);
+ return r;
+}
+
+/* prepare the sg table with the user pages */
+static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
+{
+ struct amdgpu_device *adev = amdgpu_get_adev(ttm->bdev);
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ unsigned nents;
+ int r;
+
+ int write = !(gtt->userflags & AMDGPU_GEM_USERPTR_READONLY);
+ enum dma_data_direction direction = write ?
+ DMA_BIDIRECTIONAL : DMA_TO_DEVICE;
+
r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
ttm->num_pages << PAGE_SHIFT,
GFP_KERNEL);
@@ -558,9 +591,6 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_tt *ttm)
release_sg:
kfree(ttm->sg);
-
-release_pages:
- release_pages(ttm->pages, pinned, 0);
return r;
}
@@ -725,7 +755,7 @@ static int amdgpu_ttm_tt_populate(struct ttm_tt *ttm)
0, PAGE_SIZE,
PCI_DMA_BIDIRECTIONAL);
if (pci_dma_mapping_error(adev->pdev, gtt->ttm.dma_address[i])) {
- while (--i) {
+ while (i--) {
pci_unmap_page(adev->pdev, gtt->ttm.dma_address[i],
PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
gtt->ttm.dma_address[i] = 0;
@@ -783,6 +813,10 @@ int amdgpu_ttm_tt_set_userptr(struct ttm_tt *ttm, uint64_t addr,
gtt->userptr = addr;
gtt->usermm = current->mm;
gtt->userflags = flags;
+ spin_lock_init(&gtt->guptasklock);
+ INIT_LIST_HEAD(&gtt->guptasks);
+ atomic_set(&gtt->mmu_invalidations, 0);
+
return 0;
}
@@ -800,21 +834,40 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
unsigned long end)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ struct amdgpu_ttm_gup_task_list *entry;
unsigned long size;
- if (gtt == NULL)
- return false;
-
- if (gtt->ttm.ttm.state != tt_bound || !gtt->userptr)
+ if (gtt == NULL || !gtt->userptr)
return false;
size = (unsigned long)gtt->ttm.ttm.num_pages * PAGE_SIZE;
if (gtt->userptr > end || gtt->userptr + size <= start)
return false;
+ spin_lock(&gtt->guptasklock);
+ list_for_each_entry(entry, &gtt->guptasks, list) {
+ if (entry->task == current) {
+ spin_unlock(&gtt->guptasklock);
+ return false;
+ }
+ }
+ spin_unlock(&gtt->guptasklock);
+
+ atomic_inc(&gtt->mmu_invalidations);
+
return true;
}
+bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
+ int *last_invalidated)
+{
+ struct amdgpu_ttm_tt *gtt = (void *)ttm;
+ int prev_invalidated = *last_invalidated;
+
+ *last_invalidated = atomic_read(&gtt->mmu_invalidations);
+ return prev_invalidated != *last_invalidated;
+}
+
bool amdgpu_ttm_tt_is_readonly(struct ttm_tt *ttm)
{
struct amdgpu_ttm_tt *gtt = (void *)ttm;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 1de82bf4fc79..c1a581044417 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -539,13 +539,6 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
return -EINVAL;
}
- r = reservation_object_wait_timeout_rcu(bo->tbo.resv, true, false,
- MAX_SCHEDULE_TIMEOUT);
- if (r < 0) {
- DRM_ERROR("Failed waiting for UVD message (%ld)!\n", r);
- return r;
- }
-
r = amdgpu_bo_kmap(bo, &ptr);
if (r) {
DRM_ERROR("Failed mapping the UVD message (%ld)!\n", r);
@@ -886,8 +879,8 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
ib->length_dw = 16;
if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib,
- AMDGPU_FENCE_OWNER_UNDEFINED, NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
if (r)
goto err_free;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 39c3aa60381a..4bec0c108cea 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -425,8 +425,8 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
for (i = ib->length_dw; i < ib_size_dw; ++i)
ib->ptr[i] = 0x0;
- r = amdgpu_ib_schedule(ring, 1, ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
if (r)
goto err;
@@ -487,9 +487,8 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
ib->ptr[i] = 0x0;
if (direct) {
- r = amdgpu_ib_schedule(ring, 1, ib,
- AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
+ job->fence = f;
if (r)
goto err;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 264c5968a1d3..b6c011b83641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -50,6 +50,9 @@
* SI supports 16.
*/
+/* Special value that no flush is necessary */
+#define AMDGPU_VM_NO_FLUSH (~0ll)
+
/**
* amdgpu_vm_num_pde - return the number of page directory entries
*
@@ -92,6 +95,7 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
entry->priority = 0;
entry->tv.bo = &vm->page_directory->tbo;
entry->tv.shared = true;
+ entry->user_pages = NULL;
list_add(&entry->tv.head, validated);
}
@@ -157,50 +161,77 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
* Allocate an id for the vm, adding fences to the sync obj as necessary.
*/
int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
- struct amdgpu_sync *sync, struct fence *fence)
+ struct amdgpu_sync *sync, struct fence *fence,
+ unsigned *vm_id, uint64_t *vm_pd_addr)
{
- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
+ uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
struct amdgpu_device *adev = ring->adev;
- struct amdgpu_vm_manager_id *id;
+ struct amdgpu_vm_id *id = &vm->ids[ring->idx];
+ struct fence *updates = sync->last_vm_update;
int r;
mutex_lock(&adev->vm_manager.lock);
/* check if the id is still valid */
- if (vm_id->id) {
+ if (id->mgr_id) {
+ struct fence *flushed = id->flushed_updates;
+ bool is_later;
long owner;
- id = &adev->vm_manager.ids[vm_id->id];
- owner = atomic_long_read(&id->owner);
- if (owner == (long)vm) {
- list_move_tail(&id->list, &adev->vm_manager.ids_lru);
- trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
+ if (!flushed)
+ is_later = true;
+ else if (!updates)
+ is_later = false;
+ else
+ is_later = fence_is_later(updates, flushed);
+
+ owner = atomic_long_read(&id->mgr_id->owner);
+ if (!is_later && owner == (long)id &&
+ pd_addr == id->pd_gpu_addr) {
+
+ r = amdgpu_sync_fence(ring->adev, sync,
+ id->mgr_id->active);
+ if (r) {
+ mutex_unlock(&adev->vm_manager.lock);
+ return r;
+ }
+
+ fence_put(id->mgr_id->active);
+ id->mgr_id->active = fence_get(fence);
+
+ list_move_tail(&id->mgr_id->list,
+ &adev->vm_manager.ids_lru);
- fence_put(id->active);
- id->active = fence_get(fence);
+ *vm_id = id->mgr_id - adev->vm_manager.ids;
+ *vm_pd_addr = AMDGPU_VM_NO_FLUSH;
+ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id,
+ *vm_pd_addr);
mutex_unlock(&adev->vm_manager.lock);
return 0;
}
}
- /* we definately need to flush */
- vm_id->pd_gpu_addr = ~0ll;
+ id->mgr_id = list_first_entry(&adev->vm_manager.ids_lru,
+ struct amdgpu_vm_manager_id,
+ list);
- id = list_first_entry(&adev->vm_manager.ids_lru,
- struct amdgpu_vm_manager_id,
- list);
- list_move_tail(&id->list, &adev->vm_manager.ids_lru);
- atomic_long_set(&id->owner, (long)vm);
+ r = amdgpu_sync_fence(ring->adev, sync, id->mgr_id->active);
+ if (!r) {
+ fence_put(id->mgr_id->active);
+ id->mgr_id->active = fence_get(fence);
- vm_id->id = id - adev->vm_manager.ids;
- trace_amdgpu_vm_grab_id(vm, vm_id->id, ring->idx);
+ fence_put(id->flushed_updates);
+ id->flushed_updates = fence_get(updates);
- r = amdgpu_sync_fence(ring->adev, sync, id->active);
+ id->pd_gpu_addr = pd_addr;
- if (!r) {
- fence_put(id->active);
- id->active = fence_get(fence);
+ list_move_tail(&id->mgr_id->list, &adev->vm_manager.ids_lru);
+ atomic_long_set(&id->mgr_id->owner, (long)id);
+
+ *vm_id = id->mgr_id - adev->vm_manager.ids;
+ *vm_pd_addr = pd_addr;
+ trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
}
mutex_unlock(&adev->vm_manager.lock);
@@ -211,39 +242,71 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
* amdgpu_vm_flush - hardware flush the vm
*
* @ring: ring to use for flush
- * @vm: vm we want to flush
- * @updates: last vm update that we waited for
+ * @vm_id: vmid number to use
+ * @pd_addr: address of the page directory
*
- * Flush the vm.
+ * Emit a VM flush when it is necessary.
*/
void amdgpu_vm_flush(struct amdgpu_ring *ring,
- struct amdgpu_vm *vm,
- struct fence *updates)
+ unsigned vm_id, uint64_t pd_addr,
+ uint32_t gds_base, uint32_t gds_size,
+ uint32_t gws_base, uint32_t gws_size,
+ uint32_t oa_base, uint32_t oa_size)
{
- uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
- struct amdgpu_vm_id *vm_id = &vm->ids[ring->idx];
- struct fence *flushed_updates = vm_id->flushed_updates;
- bool is_later;
-
- if (!flushed_updates)
- is_later = true;
- else if (!updates)
- is_later = false;
- else
- is_later = fence_is_later(updates, flushed_updates);
+ struct amdgpu_device *adev = ring->adev;
+ struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+ bool gds_switch_needed = ring->funcs->emit_gds_switch && (
+ mgr_id->gds_base != gds_base ||
+ mgr_id->gds_size != gds_size ||
+ mgr_id->gws_base != gws_base ||
+ mgr_id->gws_size != gws_size ||
+ mgr_id->oa_base != oa_base ||
+ mgr_id->oa_size != oa_size);
+
+ if (ring->funcs->emit_pipeline_sync && (
+ pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed))
+ amdgpu_ring_emit_pipeline_sync(ring);
+
+ if (pd_addr != AMDGPU_VM_NO_FLUSH) {
+ trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
+ amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+ }
- if (pd_addr != vm_id->pd_gpu_addr || is_later) {
- trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id->id);
- if (is_later) {
- vm_id->flushed_updates = fence_get(updates);
- fence_put(flushed_updates);
- }
- vm_id->pd_gpu_addr = pd_addr;
- amdgpu_ring_emit_vm_flush(ring, vm_id->id, vm_id->pd_gpu_addr);
+ if (gds_switch_needed) {
+ mgr_id->gds_base = gds_base;
+ mgr_id->gds_size = gds_size;
+ mgr_id->gws_base = gws_base;
+ mgr_id->gws_size = gws_size;
+ mgr_id->oa_base = oa_base;
+ mgr_id->oa_size = oa_size;
+ amdgpu_ring_emit_gds_switch(ring, vm_id,
+ gds_base, gds_size,
+ gws_base, gws_size,
+ oa_base, oa_size);
}
}
/**
+ * amdgpu_vm_reset_id - reset VMID to zero
+ *
+ * @adev: amdgpu device structure
+ * @vm_id: vmid number to use
+ *
+ * Reset saved GDW, GWS and OA to force switch on next flush.
+ */
+void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id)
+{
+ struct amdgpu_vm_manager_id *mgr_id = &adev->vm_manager.ids[vm_id];
+
+ mgr_id->gds_base = 0;
+ mgr_id->gds_size = 0;
+ mgr_id->gws_base = 0;
+ mgr_id->gws_size = 0;
+ mgr_id->oa_base = 0;
+ mgr_id->oa_size = 0;
+}
+
+/**
* amdgpu_vm_bo_find - find the bo_va for a specific vm & bo
*
* @vm: requested vm
@@ -804,7 +867,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
while (start != mapping->it.last + 1) {
uint64_t last;
- last = min((uint64_t)mapping->it.last, start + max_size);
+ last = min((uint64_t)mapping->it.last, start + max_size - 1);
r = amdgpu_vm_bo_update_mapping(adev, gtt, gtt_flags, vm,
start, last, flags, addr,
fence);
@@ -812,7 +875,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
return r;
start = last + 1;
- addr += max_size;
+ addr += max_size * AMDGPU_GPU_PAGE_SIZE;
}
return 0;
@@ -908,22 +971,18 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
struct amdgpu_bo_va_mapping *mapping;
int r;
- spin_lock(&vm->freed_lock);
while (!list_empty(&vm->freed)) {
mapping = list_first_entry(&vm->freed,
struct amdgpu_bo_va_mapping, list);
list_del(&mapping->list);
- spin_unlock(&vm->freed_lock);
+
r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, vm, mapping,
0, NULL);
kfree(mapping);
if (r)
return r;
- spin_lock(&vm->freed_lock);
}
- spin_unlock(&vm->freed_lock);
-
return 0;
}
@@ -950,9 +1009,8 @@ int amdgpu_vm_clear_invalids(struct amdgpu_device *adev,
bo_va = list_first_entry(&vm->invalidated,
struct amdgpu_bo_va, vm_status);
spin_unlock(&vm->status_lock);
- mutex_lock(&bo_va->mutex);
+
r = amdgpu_vm_bo_update(adev, bo_va, NULL);
- mutex_unlock(&bo_va->mutex);
if (r)
return r;
@@ -996,7 +1054,7 @@ struct amdgpu_bo_va *amdgpu_vm_bo_add(struct amdgpu_device *adev,
INIT_LIST_HEAD(&bo_va->valids);
INIT_LIST_HEAD(&bo_va->invalids);
INIT_LIST_HEAD(&bo_va->vm_status);
- mutex_init(&bo_va->mutex);
+
list_add_tail(&bo_va->bo_list, &bo->va);
return bo_va;
@@ -1048,9 +1106,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
saddr /= AMDGPU_GPU_PAGE_SIZE;
eaddr /= AMDGPU_GPU_PAGE_SIZE;
- spin_lock(&vm->it_lock);
it = interval_tree_iter_first(&vm->va, saddr, eaddr);
- spin_unlock(&vm->it_lock);
if (it) {
struct amdgpu_bo_va_mapping *tmp;
tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
@@ -1074,13 +1130,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
mapping->offset = offset;
mapping->flags = flags;
- mutex_lock(&bo_va->mutex);
list_add(&mapping->list, &bo_va->invalids);
- mutex_unlock(&bo_va->mutex);
- spin_lock(&vm->it_lock);
interval_tree_insert(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
- trace_amdgpu_vm_bo_map(bo_va, mapping);
/* Make sure the page tables are allocated */
saddr >>= amdgpu_vm_block_size;
@@ -1124,6 +1175,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
entry->priority = 0;
entry->tv.bo = &entry->robj->tbo;
entry->tv.shared = true;
+ entry->user_pages = NULL;
vm->page_tables[pt_idx].addr = 0;
}
@@ -1131,9 +1183,7 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
error_free:
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
kfree(mapping);
@@ -1162,7 +1212,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
bool valid = true;
saddr /= AMDGPU_GPU_PAGE_SIZE;
- mutex_lock(&bo_va->mutex);
+
list_for_each_entry(mapping, &bo_va->valids, list) {
if (mapping->it.start == saddr)
break;
@@ -1176,25 +1226,18 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
break;
}
- if (&mapping->list == &bo_va->invalids) {
- mutex_unlock(&bo_va->mutex);
+ if (&mapping->list == &bo_va->invalids)
return -ENOENT;
- }
}
- mutex_unlock(&bo_va->mutex);
+
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- if (valid) {
- spin_lock(&vm->freed_lock);
+ if (valid)
list_add(&mapping->list, &vm->freed);
- spin_unlock(&vm->freed_lock);
- } else {
+ else
kfree(mapping);
- }
return 0;
}
@@ -1223,23 +1266,17 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
trace_amdgpu_vm_bo_unmap(bo_va, mapping);
- spin_lock(&vm->freed_lock);
list_add(&mapping->list, &vm->freed);
- spin_unlock(&vm->freed_lock);
}
list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
list_del(&mapping->list);
- spin_lock(&vm->it_lock);
interval_tree_remove(&mapping->it, &vm->va);
- spin_unlock(&vm->it_lock);
kfree(mapping);
}
+
fence_put(bo_va->last_pt_update);
- mutex_destroy(&bo_va->mutex);
kfree(bo_va);
}
@@ -1284,7 +1321,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
int i, r;
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- vm->ids[i].id = 0;
+ vm->ids[i].mgr_id = NULL;
vm->ids[i].flushed_updates = NULL;
}
vm->va = RB_ROOT;
@@ -1292,8 +1329,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
INIT_LIST_HEAD(&vm->invalidated);
INIT_LIST_HEAD(&vm->cleared);
INIT_LIST_HEAD(&vm->freed);
- spin_lock_init(&vm->it_lock);
- spin_lock_init(&vm->freed_lock);
+
pd_size = amdgpu_vm_directory_size(adev);
pd_entries = amdgpu_vm_num_pdes(adev);
@@ -1380,14 +1416,15 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
amdgpu_bo_unref(&vm->page_directory);
fence_put(vm->page_directory_fence);
+
for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
- unsigned id = vm->ids[i].id;
+ struct amdgpu_vm_id *id = &vm->ids[i];
- atomic_long_cmpxchg(&adev->vm_manager.ids[id].owner,
- (long)vm, 0);
- fence_put(vm->ids[i].flushed_updates);
+ if (id->mgr_id)
+ atomic_long_cmpxchg(&id->mgr_id->owner,
+ (long)id, 0);
+ fence_put(id->flushed_updates);
}
-
}
/**
@@ -1404,9 +1441,11 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
INIT_LIST_HEAD(&adev->vm_manager.ids_lru);
/* skip over VMID 0, since it is the system VM */
- for (i = 1; i < adev->vm_manager.num_ids; ++i)
+ for (i = 1; i < adev->vm_manager.num_ids; ++i) {
+ amdgpu_vm_reset_id(adev, i);
list_add_tail(&adev->vm_manager.ids[i].list,
&adev->vm_manager.ids_lru);
+ }
atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
}
diff --git a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
index 21aacc1f45c1..bf731e9f643e 100644
--- a/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_dp.c
@@ -265,15 +265,27 @@ static int amdgpu_atombios_dp_get_dp_link_config(struct drm_connector *connector
unsigned max_lane_num = drm_dp_max_lane_count(dpcd);
unsigned lane_num, i, max_pix_clock;
- for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
- for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
- max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+ if (amdgpu_connector_encoder_get_dp_bridge_encoder_id(connector) ==
+ ENCODER_OBJECT_ID_NUTMEG) {
+ for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+ max_pix_clock = (lane_num * 270000 * 8) / bpp;
if (max_pix_clock >= pix_clock) {
*dp_lanes = lane_num;
- *dp_rate = link_rates[i];
+ *dp_rate = 270000;
return 0;
}
}
+ } else {
+ for (lane_num = 1; lane_num <= max_lane_num; lane_num <<= 1) {
+ for (i = 0; i < ARRAY_SIZE(link_rates) && link_rates[i] <= max_link_rate; i++) {
+ max_pix_clock = (lane_num * link_rates[i] * 8) / bpp;
+ if (max_pix_clock >= pix_clock) {
+ *dp_lanes = lane_num;
+ *dp_rate = link_rates[i];
+ return 0;
+ }
+ }
+ }
}
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 8b4731d4e10e..1f9109d3348b 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -31,6 +31,7 @@
#include "ci_dpm.h"
#include "gfx_v7_0.h"
#include "atom.h"
+#include "amd_pcie.h"
#include <linux/seq_file.h>
#include "smu/smu_7_0_1_d.h"
@@ -3016,7 +3017,6 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,
&memory_level->MinVddcPhases);
memory_level->EnabledForThrottle = 1;
- memory_level->EnabledForActivity = 1;
memory_level->UpH = 0;
memory_level->DownH = 100;
memory_level->VoltageDownH = 0;
@@ -3375,7 +3375,6 @@ static int ci_populate_single_graphic_level(struct amdgpu_device *adev,
graphic_level->SpllSpreadSpectrum2 = cpu_to_be32(graphic_level->SpllSpreadSpectrum2);
graphic_level->CcPwrDynRm = cpu_to_be32(graphic_level->CcPwrDynRm);
graphic_level->CcPwrDynRm1 = cpu_to_be32(graphic_level->CcPwrDynRm1);
- graphic_level->EnabledForActivity = 1;
return 0;
}
@@ -3406,6 +3405,7 @@ static int ci_populate_all_graphic_levels(struct amdgpu_device *adev)
pi->smc_state_table.GraphicsLevel[i].DisplayWatermark =
PPSMC_DISPLAY_WATERMARK_HIGH;
}
+ pi->smc_state_table.GraphicsLevel[0].EnabledForActivity = 1;
pi->smc_state_table.GraphicsDpmLevelCount = (u8)dpm_table->sclk_table.count;
pi->dpm_level_enable_mask.sclk_dpm_enable_mask =
@@ -3449,6 +3449,8 @@ static int ci_populate_all_memory_levels(struct amdgpu_device *adev)
return ret;
}
+ pi->smc_state_table.MemoryLevel[0].EnabledForActivity = 1;
+
if ((dpm_table->mclk_table.count >= 2) &&
((adev->pdev->device == 0x67B0) || (adev->pdev->device == 0x67B1))) {
pi->smc_state_table.MemoryLevel[1].MinVddc =
@@ -4380,26 +4382,6 @@ static int ci_dpm_force_performance_level(struct amdgpu_device *adev,
}
}
}
- if ((!pi->pcie_dpm_key_disabled) &&
- pi->dpm_level_enable_mask.pcie_dpm_enable_mask) {
- levels = 0;
- tmp = pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
- while (tmp >>= 1)
- levels++;
- if (levels) {
- ret = ci_dpm_force_state_pcie(adev, level);
- if (ret)
- return ret;
- for (i = 0; i < adev->usec_timeout; i++) {
- tmp = (RREG32_SMC(ixTARGET_AND_CURRENT_PROFILE_INDEX_1) &
- TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX_MASK) >>
- TARGET_AND_CURRENT_PROFILE_INDEX_1__CURR_PCIE_INDEX__SHIFT;
- if (tmp == levels)
- break;
- udelay(1);
- }
- }
- }
} else if (level == AMDGPU_DPM_FORCED_LEVEL_LOW) {
if ((!pi->sclk_dpm_key_disabled) &&
pi->dpm_level_enable_mask.sclk_dpm_enable_mask) {
@@ -5394,30 +5376,6 @@ static int ci_dpm_enable(struct amdgpu_device *adev)
ci_update_current_ps(adev, boot_ps);
- if (adev->irq.installed &&
- amdgpu_is_internal_thermal_sensor(adev->pm.int_thermal_type)) {
-#if 0
- PPSMC_Result result;
-#endif
- ret = ci_thermal_set_temperature_range(adev, CISLANDS_TEMP_RANGE_MIN,
- CISLANDS_TEMP_RANGE_MAX);
- if (ret) {
- DRM_ERROR("ci_thermal_set_temperature_range failed\n");
- return ret;
- }
- amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
- AMDGPU_THERMAL_IRQ_LOW_TO_HIGH);
- amdgpu_irq_get(adev, &adev->pm.dpm.thermal.irq,
- AMDGPU_THERMAL_IRQ_HIGH_TO_LOW);
-
-#if 0
- result = amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_EnableThermalInterrupt);
-
- if (result != PPSMC_Result_OK)
- DRM_DEBUG_KMS("Could not enable thermal interrupts.\n");
-#endif
- }
-
return 0;
}
@@ -5835,18 +5793,16 @@ static int ci_dpm_init(struct amdgpu_device *adev)
u8 frev, crev;
struct ci_power_info *pi;
int ret;
- u32 mask;
pi = kzalloc(sizeof(struct ci_power_info), GFP_KERNEL);
if (pi == NULL)
return -ENOMEM;
adev->pm.dpm.priv = pi;
- ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
- if (ret)
- pi->sys_pcie_mask = 0;
- else
- pi->sys_pcie_mask = mask;
+ pi->sys_pcie_mask =
+ (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_MASK) >>
+ CAIL_PCIE_LINK_SPEED_SUPPORT_SHIFT;
+
pi->force_pcie_gen = AMDGPU_PCIE_GEN_INVALID;
pi->pcie_gen_performance.max = AMDGPU_PCIE_GEN1;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 6b1f0539ce9d..bddc9ba11495 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1462,6 +1462,9 @@ static void cik_program_aspm(struct amdgpu_device *adev)
if (amdgpu_aspm == 0)
return;
+ if (pci_is_root_bus(adev->pdev->bus))
+ return;
+
/* XXX double check APUs */
if (adev->flags & AMD_IS_APU)
return;
@@ -2025,79 +2028,77 @@ static int cik_common_early_init(void *handle)
adev->asic_funcs = &cik_asic_funcs;
- adev->has_uvd = true;
-
adev->rev_id = cik_get_rev_id(adev);
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
case CHIP_BONAIRE:
adev->cg_flags =
- AMDGPU_CG_SUPPORT_GFX_MGCG |
- AMDGPU_CG_SUPPORT_GFX_MGLS |
- /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
- AMDGPU_CG_SUPPORT_GFX_CGLS |
- AMDGPU_CG_SUPPORT_GFX_CGTS |
- AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
- AMDGPU_CG_SUPPORT_GFX_CP_LS |
- AMDGPU_CG_SUPPORT_MC_LS |
- AMDGPU_CG_SUPPORT_MC_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_LS |
- AMDGPU_CG_SUPPORT_BIF_LS |
- AMDGPU_CG_SUPPORT_VCE_MGCG |
- AMDGPU_CG_SUPPORT_UVD_MGCG |
- AMDGPU_CG_SUPPORT_HDP_LS |
- AMDGPU_CG_SUPPORT_HDP_MGCG;
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
case CHIP_HAWAII:
adev->cg_flags =
- AMDGPU_CG_SUPPORT_GFX_MGCG |
- AMDGPU_CG_SUPPORT_GFX_MGLS |
- /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
- AMDGPU_CG_SUPPORT_GFX_CGLS |
- AMDGPU_CG_SUPPORT_GFX_CGTS |
- AMDGPU_CG_SUPPORT_GFX_CP_LS |
- AMDGPU_CG_SUPPORT_MC_LS |
- AMDGPU_CG_SUPPORT_MC_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_LS |
- AMDGPU_CG_SUPPORT_BIF_LS |
- AMDGPU_CG_SUPPORT_VCE_MGCG |
- AMDGPU_CG_SUPPORT_UVD_MGCG |
- AMDGPU_CG_SUPPORT_HDP_LS |
- AMDGPU_CG_SUPPORT_HDP_MGCG;
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_MC_LS |
+ AMD_CG_SUPPORT_MC_MGCG |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
adev->pg_flags = 0;
adev->external_rev_id = 0x28;
break;
case CHIP_KAVERI:
adev->cg_flags =
- AMDGPU_CG_SUPPORT_GFX_MGCG |
- AMDGPU_CG_SUPPORT_GFX_MGLS |
- /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
- AMDGPU_CG_SUPPORT_GFX_CGLS |
- AMDGPU_CG_SUPPORT_GFX_CGTS |
- AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
- AMDGPU_CG_SUPPORT_GFX_CP_LS |
- AMDGPU_CG_SUPPORT_SDMA_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_LS |
- AMDGPU_CG_SUPPORT_BIF_LS |
- AMDGPU_CG_SUPPORT_VCE_MGCG |
- AMDGPU_CG_SUPPORT_UVD_MGCG |
- AMDGPU_CG_SUPPORT_HDP_LS |
- AMDGPU_CG_SUPPORT_HDP_MGCG;
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
adev->pg_flags =
- /*AMDGPU_PG_SUPPORT_GFX_PG |
- AMDGPU_PG_SUPPORT_GFX_SMG |
- AMDGPU_PG_SUPPORT_GFX_DMG |*/
- AMDGPU_PG_SUPPORT_UVD |
- /*AMDGPU_PG_SUPPORT_VCE |
- AMDGPU_PG_SUPPORT_CP |
- AMDGPU_PG_SUPPORT_GDS |
- AMDGPU_PG_SUPPORT_RLC_SMU_HS |
- AMDGPU_PG_SUPPORT_ACP |
- AMDGPU_PG_SUPPORT_SAMU |*/
+ /*AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |*/
+ AMD_PG_SUPPORT_UVD |
+ /*AMD_PG_SUPPORT_VCE |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_ACP |
+ AMD_PG_SUPPORT_SAMU |*/
0;
if (adev->pdev->device == 0x1312 ||
adev->pdev->device == 0x1316 ||
@@ -2109,29 +2110,29 @@ static int cik_common_early_init(void *handle)
case CHIP_KABINI:
case CHIP_MULLINS:
adev->cg_flags =
- AMDGPU_CG_SUPPORT_GFX_MGCG |
- AMDGPU_CG_SUPPORT_GFX_MGLS |
- /*AMDGPU_CG_SUPPORT_GFX_CGCG |*/
- AMDGPU_CG_SUPPORT_GFX_CGLS |
- AMDGPU_CG_SUPPORT_GFX_CGTS |
- AMDGPU_CG_SUPPORT_GFX_CGTS_LS |
- AMDGPU_CG_SUPPORT_GFX_CP_LS |
- AMDGPU_CG_SUPPORT_SDMA_MGCG |
- AMDGPU_CG_SUPPORT_SDMA_LS |
- AMDGPU_CG_SUPPORT_BIF_LS |
- AMDGPU_CG_SUPPORT_VCE_MGCG |
- AMDGPU_CG_SUPPORT_UVD_MGCG |
- AMDGPU_CG_SUPPORT_HDP_LS |
- AMDGPU_CG_SUPPORT_HDP_MGCG;
+ AMD_CG_SUPPORT_GFX_MGCG |
+ AMD_CG_SUPPORT_GFX_MGLS |
+ /*AMD_CG_SUPPORT_GFX_CGCG |*/
+ AMD_CG_SUPPORT_GFX_CGLS |
+ AMD_CG_SUPPORT_GFX_CGTS |
+ AMD_CG_SUPPORT_GFX_CGTS_LS |
+ AMD_CG_SUPPORT_GFX_CP_LS |
+ AMD_CG_SUPPORT_SDMA_MGCG |
+ AMD_CG_SUPPORT_SDMA_LS |
+ AMD_CG_SUPPORT_BIF_LS |
+ AMD_CG_SUPPORT_VCE_MGCG |
+ AMD_CG_SUPPORT_UVD_MGCG |
+ AMD_CG_SUPPORT_HDP_LS |
+ AMD_CG_SUPPORT_HDP_MGCG;
adev->pg_flags =
- /*AMDGPU_PG_SUPPORT_GFX_PG |
- AMDGPU_PG_SUPPORT_GFX_SMG | */
- AMDGPU_PG_SUPPORT_UVD |
- /*AMDGPU_PG_SUPPORT_VCE |
- AMDGPU_PG_SUPPORT_CP |
- AMDGPU_PG_SUPPORT_GDS |
- AMDGPU_PG_SUPPORT_RLC_SMU_HS |
- AMDGPU_PG_SUPPORT_SAMU |*/
+ /*AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG | */
+ AMD_PG_SUPPORT_UVD |
+ /*AMD_PG_SUPPORT_VCE |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS |
+ AMD_PG_SUPPORT_SAMU |*/
0;
if (adev->asic_type == CHIP_KABINI) {
if (adev->rev_id == 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
index 675f34916aab..d3ac3298fba8 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@@ -212,7 +212,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 extra_bits = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 extra_bits = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 4)
@@ -261,6 +261,13 @@ static void cik_sdma_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, (0xfff << 16) | 10); /* retry count, poll interval */
}
+static void cik_sdma_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_SRBM_WRITE, 0, 0xf000));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
+
/**
* cik_sdma_ring_emit_fence - emit a fence on the DMA ring
*
@@ -636,8 +643,7 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[3] = 1;
ib.ptr[4] = 0xDEADBEEF;
ib.length_dw = 5;
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -663,7 +669,8 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -816,6 +823,30 @@ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib)
}
/**
+ * cik_sdma_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void cik_sdma_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_POLL_REG_MEM, 0,
+ SDMA_POLL_REG_MEM_EXTRA_OP(0) |
+ SDMA_POLL_REG_MEM_EXTRA_FUNC(3) | /* equal */
+ SDMA_POLL_REG_MEM_EXTRA_M));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, (0xfff << 16) | 4); /* retry count, poll interval */
+}
+
+/**
* cik_sdma_ring_emit_vm_flush - cik vm flush using sDMA
*
* @ring: amdgpu_ring pointer
@@ -856,7 +887,7 @@ static void cik_enable_sdma_mgcg(struct amdgpu_device *adev,
{
u32 orig, data;
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_MGCG)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_MGCG)) {
WREG32(mmSDMA0_CLK_CTRL + SDMA0_REGISTER_OFFSET, 0x00000100);
WREG32(mmSDMA0_CLK_CTRL + SDMA1_REGISTER_OFFSET, 0x00000100);
} else {
@@ -877,7 +908,7 @@ static void cik_enable_sdma_mgls(struct amdgpu_device *adev,
{
u32 orig, data;
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_SDMA_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_SDMA_LS)) {
orig = data = RREG32(mmSDMA0_POWER_CNTL + SDMA0_REGISTER_OFFSET);
data |= 0x100;
if (orig != data)
@@ -1270,8 +1301,10 @@ static const struct amdgpu_ring_funcs cik_sdma_ring_funcs = {
.parse_cs = NULL,
.emit_ib = cik_sdma_ring_emit_ib,
.emit_fence = cik_sdma_ring_emit_fence,
+ .emit_pipeline_sync = cik_sdma_ring_emit_pipeline_sync,
.emit_vm_flush = cik_sdma_ring_emit_vm_flush,
.emit_hdp_flush = cik_sdma_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = cik_sdma_ring_emit_hdp_invalidate,
.test_ring = cik_sdma_ring_test_ring,
.test_ib = cik_sdma_ring_test_ib,
.insert_nop = cik_sdma_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/cikd.h b/drivers/gpu/drm/amd/amdgpu/cikd.h
index 7f6d457f250a..60d4493206dd 100644
--- a/drivers/gpu/drm/amd/amdgpu/cikd.h
+++ b/drivers/gpu/drm/amd/amdgpu/cikd.h
@@ -46,9 +46,6 @@
#define BONAIRE_GB_ADDR_CONFIG_GOLDEN 0x12010001
#define HAWAII_GB_ADDR_CONFIG_GOLDEN 0x12011003
-#define CIK_RB_BITMAP_WIDTH_PER_SH 2
-#define HAWAII_RB_BITMAP_WIDTH_PER_SH 4
-
#define AMDGPU_NUM_OF_VMIDS 8
#define PIPEID(x) ((x) << 0)
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
index 4dd17f2dd905..e7ef2261ff4a 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@@ -445,13 +445,13 @@ static int cz_dpm_init(struct amdgpu_device *adev)
pi->gfx_pg_threshold = 500;
pi->caps_fps = true;
/* uvd */
- pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+ pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
pi->caps_uvd_dpm = true;
/* vce */
- pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
+ pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
pi->caps_vce_dpm = true;
/* acp */
- pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+ pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
pi->caps_acp_dpm = true;
pi->caps_stable_power_state = false;
@@ -2202,8 +2202,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
AMD_PG_STATE_GATE);
cz_enable_vce_dpm(adev, false);
- /* TODO: to figure out why vce can't be poweroff. */
- /* cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF); */
+ cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerOFF);
pi->vce_power_gated = true;
} else {
cz_send_msg_to_smc(adev, PPSMC_MSG_VCEPowerON);
@@ -2226,10 +2225,8 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
}
} else { /*pi->caps_vce_pg*/
cz_update_vce_dpm(adev);
- cz_enable_vce_dpm(adev, true);
+ cz_enable_vce_dpm(adev, !gate);
}
-
- return;
}
const struct amd_ip_funcs cz_dpm_ip_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index e3ff809a0cae..6de2ce535e37 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1668,6 +1668,9 @@ static void dce_v10_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1973,7 +1976,7 @@ static void dce_v10_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v10_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1986,8 +1989,16 @@ static void dce_v10_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v10_0_afmt_fini(struct amdgpu_device *adev)
@@ -2064,8 +2075,7 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2079,9 +2089,9 @@ static int dce_v10_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2700,13 +2710,13 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v10_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v10_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2980,8 +2990,6 @@ static int dce_v10_0_sw_init(void *handle)
if (r)
return r;
- adev->mode_info.mode_config_initialized = true;
-
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
adev->ddev->mode_config.max_width = 16384;
@@ -3012,7 +3020,9 @@ static int dce_v10_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v10_0_afmt_init(adev);
+ r = dce_v10_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v10_0_audio_init(adev);
if (r)
@@ -3020,7 +3030,8 @@ static int dce_v10_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v10_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 6b6c9b6879ae..e9ccc6b787f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1658,6 +1658,9 @@ static void dce_v11_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1963,7 +1966,7 @@ static void dce_v11_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v11_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1976,8 +1979,16 @@ static void dce_v11_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v11_0_afmt_fini(struct amdgpu_device *adev)
@@ -2054,8 +2065,7 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2069,9 +2079,9 @@ static int dce_v11_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2691,13 +2701,13 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v11_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v11_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2961,7 +2971,7 @@ static int dce_v11_0_sw_init(void *handle)
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = amdgpu_irq_add_id(adev, i + 1, &adev->crtc_irq);
if (r)
- return r;
+ return r;
}
for (i = 8; i < 20; i += 2) {
@@ -2973,9 +2983,7 @@ static int dce_v11_0_sw_init(void *handle)
/* HPD hotplug */
r = amdgpu_irq_add_id(adev, 42, &adev->hpd_irq);
if (r)
- return r;
-
- adev->mode_info.mode_config_initialized = true;
+ return r;
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
@@ -2994,6 +3002,7 @@ static int dce_v11_0_sw_init(void *handle)
adev->ddev->mode_config.max_width = 16384;
adev->ddev->mode_config.max_height = 16384;
+
/* allocate crtcs */
for (i = 0; i < adev->mode_info.num_crtc; i++) {
r = dce_v11_0_crtc_init(adev, i);
@@ -3007,7 +3016,9 @@ static int dce_v11_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v11_0_afmt_init(adev);
+ r = dce_v11_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v11_0_audio_init(adev);
if (r)
@@ -3015,7 +3026,8 @@ static int dce_v11_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v11_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 56bea36a6b18..e56b55d8c280 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -1639,6 +1639,9 @@ static void dce_v8_0_audio_fini(struct amdgpu_device *adev)
{
int i;
+ if (!amdgpu_audio)
+ return;
+
if (!adev->mode_info.audio.enabled)
return;
@@ -1910,7 +1913,7 @@ static void dce_v8_0_afmt_enable(struct drm_encoder *encoder, bool enable)
enable ? "En" : "Dis", dig->afmt->offset, amdgpu_encoder->encoder_id);
}
-static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
+static int dce_v8_0_afmt_init(struct amdgpu_device *adev)
{
int i;
@@ -1923,8 +1926,16 @@ static void dce_v8_0_afmt_init(struct amdgpu_device *adev)
if (adev->mode_info.afmt[i]) {
adev->mode_info.afmt[i]->offset = dig_offsets[i];
adev->mode_info.afmt[i]->id = i;
+ } else {
+ int j;
+ for (j = 0; j < i; j++) {
+ kfree(adev->mode_info.afmt[j]);
+ adev->mode_info.afmt[j] = NULL;
+ }
+ return -ENOMEM;
}
}
+ return 0;
}
static void dce_v8_0_afmt_fini(struct amdgpu_device *adev)
@@ -2001,8 +2012,7 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
if (atomic) {
amdgpu_fb = to_amdgpu_framebuffer(fb);
target_fb = fb;
- }
- else {
+ } else {
amdgpu_fb = to_amdgpu_framebuffer(crtc->primary->fb);
target_fb = crtc->primary->fb;
}
@@ -2016,9 +2026,9 @@ static int dce_v8_0_crtc_do_set_base(struct drm_crtc *crtc,
if (unlikely(r != 0))
return r;
- if (atomic)
+ if (atomic) {
fb_location = amdgpu_bo_gpu_offset(rbo);
- else {
+ } else {
r = amdgpu_bo_pin(rbo, AMDGPU_GEM_DOMAIN_VRAM, &fb_location);
if (unlikely(r != 0)) {
amdgpu_bo_unreserve(rbo);
@@ -2612,13 +2622,13 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
amdgpu_irq_update(adev, &adev->crtc_irq, type);
amdgpu_irq_update(adev, &adev->pageflip_irq, type);
- drm_vblank_post_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_on(dev, amdgpu_crtc->crtc_id);
dce_v8_0_crtc_load_lut(crtc);
break;
case DRM_MODE_DPMS_STANDBY:
case DRM_MODE_DPMS_SUSPEND:
case DRM_MODE_DPMS_OFF:
- drm_vblank_pre_modeset(dev, amdgpu_crtc->crtc_id);
+ drm_vblank_off(dev, amdgpu_crtc->crtc_id);
if (amdgpu_crtc->enabled) {
dce_v8_0_vga_enable(crtc, true);
amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@@ -2890,8 +2900,6 @@ static int dce_v8_0_sw_init(void *handle)
if (r)
return r;
- adev->mode_info.mode_config_initialized = true;
-
adev->ddev->mode_config.funcs = &amdgpu_mode_funcs;
adev->ddev->mode_config.max_width = 16384;
@@ -2922,7 +2930,9 @@ static int dce_v8_0_sw_init(void *handle)
return -EINVAL;
/* setup afmt */
- dce_v8_0_afmt_init(adev);
+ r = dce_v8_0_afmt_init(adev);
+ if (r)
+ return r;
r = dce_v8_0_audio_init(adev);
if (r)
@@ -2930,7 +2940,8 @@ static int dce_v8_0_sw_init(void *handle)
drm_kms_helper_poll_init(adev->ddev);
- return r;
+ adev->mode_info.mode_config_initialized = true;
+ return 0;
}
static int dce_v8_0_sw_fini(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index 250bcbce7fdc..bb8709066fd8 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1635,30 +1635,25 @@ static u32 gfx_v7_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v7_0_setup_rb(struct amdgpu_device *adev)
{
int i, j;
- u32 data, tmp, num_rbs = 0;
+ u32 data;
u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
gfx_v7_0_select_se_sh(adev, i, j);
data = gfx_v7_0_get_rb_active_bitmap(adev);
- if (adev->asic_type == CHIP_HAWAII)
- active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
- HAWAII_RB_BITMAP_WIDTH_PER_SH);
- else
- active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
- CIK_RB_BITMAP_WIDTH_PER_SH);
+ active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+ rb_bitmap_width_per_sh);
}
}
gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
- tmp = active_rbs;
- while (tmp >>= 1)
- num_rbs++;
- adev->gfx.config.num_rbs = num_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
}
/**
@@ -1930,6 +1925,25 @@ static void gfx_v7_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
}
/**
+ * gfx_v7_0_ring_emit_hdp_invalidate - emit an hdp invalidate on the cp
+ *
+ * @adev: amdgpu_device pointer
+ * @ridx: amdgpu ring index
+ *
+ * Emits an hdp invalidate on the cp.
+ */
+static void gfx_v7_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) |
+ WR_CONFIRM));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 1);
+}
+
+/**
* gfx_v7_0_ring_emit_fence_gfx - emit a fence on the gfx ring
*
* @adev: amdgpu_device pointer
@@ -2046,8 +2060,7 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
else
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -2075,8 +2088,7 @@ static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -2124,8 +2136,7 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
@@ -2152,7 +2163,8 @@ static int gfx_v7_0_ring_test_ib(struct amdgpu_ring *ring)
err2:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -3030,6 +3042,26 @@ static int gfx_v7_0_cp_resume(struct amdgpu_device *adev)
return 0;
}
+/**
+ * gfx_v7_0_ring_emit_vm_flush - cik vm flush using the CP
+ *
+ * @ring: the ring to emmit the commands to
+ *
+ * Sync the command pipeline with the PFP. E.g. wait for everything
+ * to be completed.
+ */
+static void gfx_v7_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
+ if (usepfp) {
+ /* synce CE with ME to prevent CE fetch CEIB before context switch done */
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
+ amdgpu_ring_write(ring, 0);
+ }
+}
+
/*
* vm
* VMID 0 is the physical GPU addresses as used by the kernel.
@@ -3048,13 +3080,18 @@ static void gfx_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
unsigned vm_id, uint64_t pd_addr)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
- if (usepfp) {
- /* synce CE with ME to prevent CE fetch CEIB before context switch done */
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
- amdgpu_ring_write(ring, 0);
- }
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
+ amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq);
+ amdgpu_ring_write(ring, 0xffffffff);
+ amdgpu_ring_write(ring, 4); /* poll interval */
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -3529,7 +3566,7 @@ static void gfx_v7_0_enable_cgcg(struct amdgpu_device *adev, bool enable)
orig = data = RREG32(mmRLC_CGCG_CGLS_CTRL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGCG)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGCG)) {
gfx_v7_0_enable_gui_idle_interrupt(adev, true);
tmp = gfx_v7_0_halt_rlc(adev);
@@ -3567,9 +3604,9 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
{
u32 data, orig, tmp = 0;
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGCG)) {
- if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) {
- if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CP_LS) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CP_LS) {
orig = data = RREG32(mmCP_MEM_SLP_CNTL);
data |= CP_MEM_SLP_CNTL__CP_MEM_LS_EN_MASK;
if (orig != data)
@@ -3596,14 +3633,14 @@ static void gfx_v7_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
gfx_v7_0_update_rlc(adev, tmp);
- if (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS) {
orig = data = RREG32(mmCGTS_SM_CTRL_REG);
data &= ~CGTS_SM_CTRL_REG__SM_MODE_MASK;
data |= (0x2 << CGTS_SM_CTRL_REG__SM_MODE__SHIFT);
data |= CGTS_SM_CTRL_REG__SM_MODE_ENABLE_MASK;
data &= ~CGTS_SM_CTRL_REG__OVERRIDE_MASK;
- if ((adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_MGLS) &&
- (adev->cg_flags & AMDGPU_CG_SUPPORT_GFX_CGTS_LS))
+ if ((adev->cg_flags & AMD_CG_SUPPORT_GFX_MGLS) &&
+ (adev->cg_flags & AMD_CG_SUPPORT_GFX_CGTS_LS))
data &= ~CGTS_SM_CTRL_REG__LS_OVERRIDE_MASK;
data &= ~CGTS_SM_CTRL_REG__ON_MONITOR_ADD_MASK;
data |= CGTS_SM_CTRL_REG__ON_MONITOR_ADD_EN_MASK;
@@ -3669,7 +3706,7 @@ static void gfx_v7_0_enable_sclk_slowdown_on_pu(struct amdgpu_device *adev,
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PU_ENABLE_MASK;
@@ -3683,7 +3720,7 @@ static void gfx_v7_0_enable_sclk_slowdown_on_pd(struct amdgpu_device *adev,
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_RLC_SMU_HS))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_RLC_SMU_HS))
data |= RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__SMU_CLK_SLOWDOWN_ON_PD_ENABLE_MASK;
@@ -3696,7 +3733,7 @@ static void gfx_v7_0_enable_cp_pg(struct amdgpu_device *adev, bool enable)
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_CP))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_CP))
data &= ~0x8000;
else
data |= 0x8000;
@@ -3709,7 +3746,7 @@ static void gfx_v7_0_enable_gds_pg(struct amdgpu_device *adev, bool enable)
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GDS))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GDS))
data &= ~0x2000;
else
data |= 0x2000;
@@ -3790,7 +3827,7 @@ static void gfx_v7_0_enable_gfx_cgpg(struct amdgpu_device *adev,
{
u32 data, orig;
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG)) {
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG)) {
orig = data = RREG32(mmRLC_PG_CNTL);
data |= RLC_PG_CNTL__GFX_POWER_GATING_ENABLE_MASK;
if (orig != data)
@@ -3825,8 +3862,7 @@ static u32 gfx_v7_0_get_cu_active_bitmap(struct amdgpu_device *adev)
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
- mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_backends_per_se /
- adev->gfx.config.max_sh_per_se);
+ mask = gfx_v7_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
return (~data) & mask;
}
@@ -3854,7 +3890,7 @@ static void gfx_v7_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_SMG))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_SMG))
data |= RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__STATIC_PER_CU_PG_ENABLE_MASK;
@@ -3868,7 +3904,7 @@ static void gfx_v7_0_enable_gfx_dynamic_mgpg(struct amdgpu_device *adev,
u32 data, orig;
orig = data = RREG32(mmRLC_PG_CNTL);
- if (enable && (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_DMG))
+ if (enable && (adev->pg_flags & AMD_PG_SUPPORT_GFX_DMG))
data |= RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
else
data &= ~RLC_PG_CNTL__DYN_PER_CU_PG_ENABLE_MASK;
@@ -4035,15 +4071,15 @@ static void gfx_v7_0_get_csb_buffer(struct amdgpu_device *adev,
static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
{
- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
- AMDGPU_PG_SUPPORT_GFX_SMG |
- AMDGPU_PG_SUPPORT_GFX_DMG |
- AMDGPU_PG_SUPPORT_CP |
- AMDGPU_PG_SUPPORT_GDS |
- AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
gfx_v7_0_enable_sclk_slowdown_on_pu(adev, true);
gfx_v7_0_enable_sclk_slowdown_on_pd(adev, true);
- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
gfx_v7_0_init_gfx_cgpg(adev);
gfx_v7_0_enable_cp_pg(adev, true);
gfx_v7_0_enable_gds_pg(adev, true);
@@ -4055,14 +4091,14 @@ static void gfx_v7_0_init_pg(struct amdgpu_device *adev)
static void gfx_v7_0_fini_pg(struct amdgpu_device *adev)
{
- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
- AMDGPU_PG_SUPPORT_GFX_SMG |
- AMDGPU_PG_SUPPORT_GFX_DMG |
- AMDGPU_PG_SUPPORT_CP |
- AMDGPU_PG_SUPPORT_GDS |
- AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
gfx_v7_0_update_gfx_pg(adev, false);
- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
gfx_v7_0_enable_cp_pg(adev, false);
gfx_v7_0_enable_gds_pg(adev, false);
}
@@ -5097,14 +5133,14 @@ static int gfx_v7_0_set_powergating_state(void *handle,
if (state == AMD_PG_STATE_GATE)
gate = true;
- if (adev->pg_flags & (AMDGPU_PG_SUPPORT_GFX_PG |
- AMDGPU_PG_SUPPORT_GFX_SMG |
- AMDGPU_PG_SUPPORT_GFX_DMG |
- AMDGPU_PG_SUPPORT_CP |
- AMDGPU_PG_SUPPORT_GDS |
- AMDGPU_PG_SUPPORT_RLC_SMU_HS)) {
+ if (adev->pg_flags & (AMD_PG_SUPPORT_GFX_PG |
+ AMD_PG_SUPPORT_GFX_SMG |
+ AMD_PG_SUPPORT_GFX_DMG |
+ AMD_PG_SUPPORT_CP |
+ AMD_PG_SUPPORT_GDS |
+ AMD_PG_SUPPORT_RLC_SMU_HS)) {
gfx_v7_0_update_gfx_pg(adev, gate);
- if (adev->pg_flags & AMDGPU_PG_SUPPORT_GFX_PG) {
+ if (adev->pg_flags & AMD_PG_SUPPORT_GFX_PG) {
gfx_v7_0_enable_cp_pg(adev, gate);
gfx_v7_0_enable_gds_pg(adev, gate);
}
@@ -5137,9 +5173,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_gfx = {
.parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib_gfx,
.emit_fence = gfx_v7_0_ring_emit_fence_gfx,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -5153,9 +5191,11 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
.parse_cs = NULL,
.emit_ib = gfx_v7_0_ring_emit_ib_compute,
.emit_fence = gfx_v7_0_ring_emit_fence_compute,
+ .emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v7_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v7_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v7_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v7_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v7_0_ring_test_ring,
.test_ib = gfx_v7_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -5237,6 +5277,8 @@ int gfx_v7_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
+ memset(cu_info, 0, sizeof(*cu_info));
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 10c865087d0a..f0c7b3596480 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -706,8 +706,7 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[2] = 0xDEADBEEF;
ib.length_dw = 3;
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err2;
@@ -733,7 +732,8 @@ static int gfx_v8_0_ring_test_ib(struct amdgpu_ring *ring)
}
err2:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err1:
amdgpu_gfx_scratch_free(adev, scratch);
return r;
@@ -1262,8 +1262,7 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
ib.ptr[ib.length_dw++] = EVENT_TYPE(7) | EVENT_INDEX(4);
/* shedule the ib on the ring */
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r) {
DRM_ERROR("amdgpu: ib submit failed (%d).\n", r);
goto fail;
@@ -1291,7 +1290,8 @@ static int gfx_v8_0_do_edc_gpr_workarounds(struct amdgpu_device *adev)
fail:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
return r;
}
@@ -2613,8 +2613,10 @@ static u32 gfx_v8_0_get_rb_active_bitmap(struct amdgpu_device *adev)
static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
{
int i, j;
- u32 data, tmp, num_rbs = 0;
+ u32 data;
u32 active_rbs = 0;
+ u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+ adev->gfx.config.max_sh_per_se;
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
@@ -2622,17 +2624,14 @@ static void gfx_v8_0_setup_rb(struct amdgpu_device *adev)
gfx_v8_0_select_se_sh(adev, i, j);
data = gfx_v8_0_get_rb_active_bitmap(adev);
active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
- RB_BITMAP_WIDTH_PER_SH);
+ rb_bitmap_width_per_sh);
}
}
gfx_v8_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
mutex_unlock(&adev->grbm_idx_mutex);
adev->gfx.config.backend_enable_mask = active_rbs;
- tmp = active_rbs;
- while (tmp >>= 1)
- num_rbs++;
- adev->gfx.config.num_rbs = num_rbs;
+ adev->gfx.config.num_rbs = hweight32(active_rbs);
}
/**
@@ -4590,6 +4589,18 @@ static void gfx_v8_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
amdgpu_ring_write(ring, 0x20); /* poll interval */
}
+static void gfx_v8_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
+ amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(0) |
+ WRITE_DATA_DST_SEL(0) |
+ WR_CONFIRM));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 0);
+ amdgpu_ring_write(ring, 1);
+
+}
+
static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
@@ -4622,8 +4633,7 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
else
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -4652,8 +4662,7 @@ static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
header = PACKET3(PACKET3_INDIRECT_BUFFER, 2);
- control |= ib->length_dw |
- (ib->vm ? (ib->vm->ids[ring->idx].id << 24) : 0);
+ control |= ib->length_dw | (ib->vm_id << 24);
amdgpu_ring_write(ring, header);
amdgpu_ring_write(ring,
@@ -4685,8 +4694,7 @@ static void gfx_v8_0_ring_emit_fence_gfx(struct amdgpu_ring *ring, u64 addr,
}
-static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
- unsigned vm_id, uint64_t pd_addr)
+static void gfx_v8_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
{
int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
uint32_t seq = ring->fence_drv.sync_seq;
@@ -4694,7 +4702,8 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_WAIT_REG_MEM, 5));
amdgpu_ring_write(ring, (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
- WAIT_REG_MEM_FUNCTION(3))); /* equal */
+ WAIT_REG_MEM_FUNCTION(3) | /* equal */
+ WAIT_REG_MEM_ENGINE(usepfp))); /* pfp or me */
amdgpu_ring_write(ring, addr & 0xfffffffc);
amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
amdgpu_ring_write(ring, seq);
@@ -4708,6 +4717,12 @@ static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
amdgpu_ring_write(ring, 0);
}
+}
+
+static void gfx_v8_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
+ unsigned vm_id, uint64_t pd_addr)
+{
+ int usepfp = (ring->type == AMDGPU_RING_TYPE_GFX);
amdgpu_ring_write(ring, PACKET3(PACKET3_WRITE_DATA, 3));
amdgpu_ring_write(ring, (WRITE_DATA_ENGINE_SEL(usepfp) |
@@ -4880,7 +4895,7 @@ static int gfx_v8_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
case AMDGPU_IRQ_STATE_ENABLE:
cp_int_cntl = RREG32(mmCP_INT_CNTL_RING0);
cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
- PRIV_REG_INT_ENABLE, 0);
+ PRIV_REG_INT_ENABLE, 1);
WREG32(mmCP_INT_CNTL_RING0, cp_int_cntl);
break;
default:
@@ -5030,9 +5045,11 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_gfx = {
.parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib_gfx,
.emit_fence = gfx_v8_0_ring_emit_fence_gfx,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -5046,9 +5063,11 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
.parse_cs = NULL,
.emit_ib = gfx_v8_0_ring_emit_ib_compute,
.emit_fence = gfx_v8_0_ring_emit_fence_compute,
+ .emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
.emit_vm_flush = gfx_v8_0_ring_emit_vm_flush,
.emit_gds_switch = gfx_v8_0_ring_emit_gds_switch,
.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
.test_ring = gfx_v8_0_ring_test_ring,
.test_ib = gfx_v8_0_ring_test_ib,
.insert_nop = amdgpu_ring_insert_nop,
@@ -5131,8 +5150,7 @@ static u32 gfx_v8_0_get_cu_active_bitmap(struct amdgpu_device *adev)
data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
- mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_backends_per_se /
- adev->gfx.config.max_sh_per_se);
+ mask = gfx_v8_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
return (~data) & mask;
}
@@ -5146,6 +5164,8 @@ int gfx_v8_0_get_cu_info(struct amdgpu_device *adev,
if (!adev || !cu_info)
return -EINVAL;
+ memset(cu_info, 0, sizeof(*cu_info));
+
mutex_lock(&adev->grbm_idx_mutex);
for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 68ee66b38e5c..82ce7d943884 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -339,7 +339,7 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
tmp = RREG32(mmHDP_MISC_CNTL);
- tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
WREG32(mmHDP_MISC_CNTL, tmp);
tmp = RREG32(mmHDP_HOST_PATH_CNTL);
@@ -793,7 +793,7 @@ static void gmc_v7_0_enable_mc_ls(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_LS))
data |= mc_cg_ls_en[i];
else
data &= ~mc_cg_ls_en[i];
@@ -810,7 +810,7 @@ static void gmc_v7_0_enable_mc_mgcg(struct amdgpu_device *adev,
for (i = 0; i < ARRAY_SIZE(mc_cg_registers); i++) {
orig = data = RREG32(mc_cg_registers[i]);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_MC_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_MC_MGCG))
data |= mc_cg_en[i];
else
data &= ~mc_cg_en[i];
@@ -826,7 +826,7 @@ static void gmc_v7_0_enable_bif_mgls(struct amdgpu_device *adev,
orig = data = RREG32_PCIE(ixPCIE_CNTL2);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_BIF_LS)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_BIF_LS)) {
data = REG_SET_FIELD(data, PCIE_CNTL2, SLV_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, MST_MEM_LS_EN, 1);
data = REG_SET_FIELD(data, PCIE_CNTL2, REPLAY_MEM_LS_EN, 1);
@@ -849,7 +849,7 @@ static void gmc_v7_0_enable_hdp_mgcg(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_HOST_PATH_CNTL);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_MGCG))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_MGCG))
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 0);
else
data = REG_SET_FIELD(data, HDP_HOST_PATH_CNTL, CLOCK_GATING_DIS, 1);
@@ -865,7 +865,7 @@ static void gmc_v7_0_enable_hdp_ls(struct amdgpu_device *adev,
orig = data = RREG32(mmHDP_MEM_POWER_LS);
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_HDP_LS))
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_HDP_LS))
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 1);
else
data = REG_SET_FIELD(data, HDP_MEM_POWER_LS, LS_ENABLE, 0);
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 757803ae7c4a..29bd7b57dc91 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -386,7 +386,7 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
WREG32(mmBIF_FB_EN, BIF_FB_EN__FB_READ_EN_MASK | BIF_FB_EN__FB_WRITE_EN_MASK);
tmp = RREG32(mmHDP_MISC_CNTL);
- tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 1);
+ tmp = REG_SET_FIELD(tmp, HDP_MISC_CNTL, FLUSH_INVALIDATE_CACHE, 0);
WREG32(mmHDP_MISC_CNTL, tmp);
tmp = RREG32(mmHDP_HOST_PATH_CNTL);
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 7e9154c7f1db..654d76723bc3 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -2859,11 +2859,11 @@ static int kv_dpm_init(struct amdgpu_device *adev)
pi->voltage_drop_t = 0;
pi->caps_sclk_throttle_low_notification = false;
pi->caps_fps = false; /* true? */
- pi->caps_uvd_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_UVD) ? true : false;
+ pi->caps_uvd_pg = (adev->pg_flags & AMD_PG_SUPPORT_UVD) ? true : false;
pi->caps_uvd_dpm = true;
- pi->caps_vce_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_VCE) ? true : false;
- pi->caps_samu_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_SAMU) ? true : false;
- pi->caps_acp_pg = (adev->pg_flags & AMDGPU_PG_SUPPORT_ACP) ? true : false;
+ pi->caps_vce_pg = (adev->pg_flags & AMD_PG_SUPPORT_VCE) ? true : false;
+ pi->caps_samu_pg = (adev->pg_flags & AMD_PG_SUPPORT_SAMU) ? true : false;
+ pi->caps_acp_pg = (adev->pg_flags & AMD_PG_SUPPORT_ACP) ? true : false;
pi->caps_stable_p_state = false;
ret = kv_parse_sys_info_table(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
index 29ec986dd6fc..6e0a86a563f3 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
@@ -32,8 +32,8 @@
#include "oss/oss_2_4_d.h"
#include "oss/oss_2_4_sh_mask.h"
-#include "gmc/gmc_8_1_d.h"
-#include "gmc/gmc_8_1_sh_mask.h"
+#include "gmc/gmc_7_1_d.h"
+#include "gmc/gmc_7_1_sh_mask.h"
#include "gca/gfx_8_0_d.h"
#include "gca/gfx_8_0_enum.h"
@@ -244,7 +244,7 @@ static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
@@ -300,6 +300,13 @@ static void sdma_v2_4_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v2_4_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
/**
* sdma_v2_4_ring_emit_fence - emit a fence on the DMA ring
*
@@ -694,8 +701,7 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[7] = SDMA_PKT_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -721,7 +727,8 @@ static int sdma_v2_4_ring_test_ib(struct amdgpu_ring *ring)
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -874,6 +881,31 @@ static void sdma_v2_4_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
}
/**
+ * sdma_v2_4_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v2_4_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
* sdma_v2_4_ring_emit_vm_flush - cik vm flush using sDMA
*
* @ring: amdgpu_ring pointer
@@ -1274,8 +1306,10 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
.parse_cs = NULL,
.emit_ib = sdma_v2_4_ring_emit_ib,
.emit_fence = sdma_v2_4_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v2_4_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v2_4_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = sdma_v2_4_ring_emit_hdp_invalidate,
.test_ring = sdma_v2_4_ring_test_ring,
.test_ib = sdma_v2_4_ring_test_ib,
.insert_nop = sdma_v2_4_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
index 6f064d7076e6..8c8ca98dd129 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
@@ -355,7 +355,7 @@ static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
struct amdgpu_ib *ib)
{
- u32 vmid = (ib->vm ? ib->vm->ids[ring->idx].id : 0) & 0xf;
+ u32 vmid = ib->vm_id & 0xf;
u32 next_rptr = ring->wptr + 5;
while ((next_rptr & 7) != 2)
@@ -410,6 +410,14 @@ static void sdma_v3_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(10)); /* retry count, poll interval */
}
+static void sdma_v3_0_ring_emit_hdp_invalidate(struct amdgpu_ring *ring)
+{
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
+ SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
+ amdgpu_ring_write(ring, mmHDP_DEBUG0);
+ amdgpu_ring_write(ring, 1);
+}
+
/**
* sdma_v3_0_ring_emit_fence - emit a fence on the DMA ring
*
@@ -845,8 +853,7 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
ib.ptr[7] = SDMA_PKT_NOP_HEADER_OP(SDMA_OP_NOP);
ib.length_dw = 8;
- r = amdgpu_ib_schedule(ring, 1, &ib, AMDGPU_FENCE_OWNER_UNDEFINED,
- NULL, &f);
+ r = amdgpu_ib_schedule(ring, 1, &ib, NULL, &f);
if (r)
goto err1;
@@ -871,7 +878,8 @@ static int sdma_v3_0_ring_test_ib(struct amdgpu_ring *ring)
}
err1:
fence_put(f);
- amdgpu_ib_free(adev, &ib);
+ amdgpu_ib_free(adev, &ib, NULL);
+ fence_put(f);
err0:
amdgpu_wb_free(adev, index);
return r;
@@ -1024,6 +1032,31 @@ static void sdma_v3_0_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib
}
/**
+ * sdma_v3_0_ring_emit_pipeline_sync - sync the pipeline
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Make sure all previous operations are completed (CIK).
+ */
+static void sdma_v3_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
+{
+ uint32_t seq = ring->fence_drv.sync_seq;
+ uint64_t addr = ring->fence_drv.gpu_addr;
+
+ /* wait for idle */
+ amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_POLL_REGMEM) |
+ SDMA_PKT_POLL_REGMEM_HEADER_HDP_FLUSH(0) |
+ SDMA_PKT_POLL_REGMEM_HEADER_FUNC(3) | /* equal */
+ SDMA_PKT_POLL_REGMEM_HEADER_MEM_POLL(1));
+ amdgpu_ring_write(ring, addr & 0xfffffffc);
+ amdgpu_ring_write(ring, upper_32_bits(addr) & 0xffffffff);
+ amdgpu_ring_write(ring, seq); /* reference */
+ amdgpu_ring_write(ring, 0xfffffff); /* mask */
+ amdgpu_ring_write(ring, SDMA_PKT_POLL_REGMEM_DW5_RETRY_COUNT(0xfff) |
+ SDMA_PKT_POLL_REGMEM_DW5_INTERVAL(4)); /* retry count, poll interval */
+}
+
+/**
* sdma_v3_0_ring_emit_vm_flush - cik vm flush using sDMA
*
* @ring: amdgpu_ring pointer
@@ -1541,8 +1574,10 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
.parse_cs = NULL,
.emit_ib = sdma_v3_0_ring_emit_ib,
.emit_fence = sdma_v3_0_ring_emit_fence,
+ .emit_pipeline_sync = sdma_v3_0_ring_emit_pipeline_sync,
.emit_vm_flush = sdma_v3_0_ring_emit_vm_flush,
.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
+ .emit_hdp_invalidate = sdma_v3_0_ring_emit_hdp_invalidate,
.test_ring = sdma_v3_0_ring_test_ring,
.test_ib = sdma_v3_0_ring_test_ib,
.insert_nop = sdma_v3_0_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 70ed73fa5156..c606ccb38d8b 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -588,7 +588,7 @@ static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
{
u32 orig, data;
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG)) {
data = RREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL);
data = 0xfff;
WREG32_UVD_CTX(ixUVD_CGC_MEM_CTRL, data);
@@ -814,6 +814,9 @@ static int uvd_v4_2_set_clockgating_state(void *handle,
bool gate = false;
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+ return 0;
+
if (state == AMD_CG_STATE_GATE)
gate = true;
@@ -832,7 +835,10 @@ static int uvd_v4_2_set_powergating_state(void *handle,
* revisit this when there is a cleaner line between
* the smc and the hw blocks
*/
- struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+ return 0;
if (state == AMD_PG_STATE_GATE) {
uvd_v4_2_stop(adev);
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 578ffb62fdb2..e3c852d9d79a 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -757,6 +757,11 @@ static int uvd_v5_0_process_interrupt(struct amdgpu_device *adev,
static int uvd_v5_0_set_clockgating_state(void *handle,
enum amd_clockgating_state state)
{
+ struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
+ return 0;
+
return 0;
}
@@ -772,6 +777,9 @@ static int uvd_v5_0_set_powergating_state(void *handle,
*/
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+ return 0;
+
if (state == AMD_PG_STATE_GATE) {
uvd_v5_0_stop(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index d4da1f04378c..3375e614ac67 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -536,7 +536,7 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
uvd_v6_0_mc_resume(adev);
/* Set dynamic clock gating in S/W control mode */
- if (adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG) {
+ if (adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG) {
if (adev->flags & AMD_IS_APU)
cz_set_uvd_clock_gating_branches(adev, false);
else
@@ -983,7 +983,7 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
- if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_UVD_MGCG))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
return 0;
if (enable) {
@@ -1013,6 +1013,9 @@ static int uvd_v6_0_set_powergating_state(void *handle,
*/
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
+ return 0;
+
if (state == AMD_PG_STATE_GATE) {
uvd_v6_0_stop(adev);
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 9c804f436974..c7e885bcfd41 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -373,7 +373,7 @@ static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
{
bool sw_cg = false;
- if (enable && (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)) {
+ if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
if (sw_cg)
vce_v2_0_set_sw_cg(adev, true);
else
@@ -608,6 +608,9 @@ static int vce_v2_0_set_powergating_state(void *handle,
*/
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+ return 0;
+
if (state == AMD_PG_STATE_GATE)
/* XXX do we need a vce_v2_0_stop()? */
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 8f8d479061f8..ce468ee5da2a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -277,7 +277,7 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
WREG32_P(mmVCE_STATUS, 0, ~1);
/* Set Clock-Gating off */
- if (adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG)
+ if (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)
vce_v3_0_set_vce_sw_clock_gating(adev, false);
if (r) {
@@ -676,7 +676,7 @@ static int vce_v3_0_set_clockgating_state(void *handle,
bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
int i;
- if (!(adev->cg_flags & AMDGPU_CG_SUPPORT_VCE_MGCG))
+ if (!(adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG))
return 0;
mutex_lock(&adev->grbm_idx_mutex);
@@ -728,6 +728,9 @@ static int vce_v3_0_set_powergating_state(void *handle,
*/
struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+ if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
+ return 0;
+
if (state == AMD_PG_STATE_GATE)
/* XXX do we need a vce_v3_0_stop()? */
return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 125003517544..1c120efa292c 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -1071,29 +1071,24 @@ static int vi_common_early_init(void *handle)
adev->external_rev_id = 0xFF;
switch (adev->asic_type) {
case CHIP_TOPAZ:
- adev->has_uvd = false;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = 0x1;
break;
case CHIP_FIJI:
- adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x3c;
break;
case CHIP_TONGA:
- adev->has_uvd = true;
adev->cg_flags = 0;
adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x14;
break;
case CHIP_CARRIZO:
case CHIP_STONEY:
- adev->has_uvd = true;
adev->cg_flags = 0;
- /* Disable UVD pg */
- adev->pg_flags = /* AMDGPU_PG_SUPPORT_UVD | */AMDGPU_PG_SUPPORT_VCE;
+ adev->pg_flags = 0;
adev->external_rev_id = adev->rev_id + 0x1;
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index d98aa9d82fa1..ace49976f7be 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -71,8 +71,6 @@
#define VMID(x) ((x) << 4)
#define QUEUEID(x) ((x) << 8)
-#define RB_BITMAP_WIDTH_PER_SH 2
-
#define MC_SEQ_MISC0__MT__MASK 0xf0000000
#define MC_SEQ_MISC0__MT__GDDR1 0x10000000
#define MC_SEQ_MISC0__MT__DDR2 0x20000000
OpenPOWER on IntegriCloud