22 files changed, 227 insertions, 126 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index 8a707d8bbb1c..f92597c292fe 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -206,11 +206,9 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 					struct amdgpu_amdkfd_fence ***ef_list,
 					unsigned int *ef_count)
 {
-	struct reservation_object_list *fobj;
-	struct reservation_object *resv;
-	unsigned int i = 0, j = 0, k = 0, shared_count;
-	unsigned int count = 0;
-	struct amdgpu_amdkfd_fence **fence_list;
+	struct reservation_object *resv = bo->tbo.resv;
+	struct reservation_object_list *old, *new;
+	unsigned int i, j, k;
 
 	if (!ef && !ef_list)
 		return -EINVAL;
@@ -220,76 +218,67 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 		*ef_count = 0;
 	}
 
-	resv = bo->tbo.resv;
-	fobj = reservation_object_get_list(resv);
-
-	if (!fobj)
+	old = reservation_object_get_list(resv);
+	if (!old)
 		return 0;
 
-	preempt_disable();
-	write_seqcount_begin(&resv->seq);
+	new = kmalloc(offsetof(typeof(*new), shared[old->shared_max]),
+		      GFP_KERNEL);
+	if (!new)
+		return -ENOMEM;
 
-	/* Go through all the shared fences in the resevation object. If
-	 * ef is specified and it exists in the list, remove it and reduce the
-	 * count. If ef is not specified, then get the count of eviction fences
-	 * present.
+	/* Go through all the shared fences in the resevation object and sort
+	 * the interesting ones to the end of the list.
 	 */
-	shared_count = fobj->shared_count;
-	for (i = 0; i < shared_count; ++i) {
+	for (i = 0, j = old->shared_count, k = 0; i < old->shared_count; ++i) {
 		struct dma_fence *f;
 
-		f = rcu_dereference_protected(fobj->shared[i],
+		f = rcu_dereference_protected(old->shared[i],
 					      reservation_object_held(resv));
 
-		if (ef) {
-			if (f->context == ef->base.context) {
-				dma_fence_put(f);
-				fobj->shared_count--;
-			} else {
-				RCU_INIT_POINTER(fobj->shared[j++], f);
-			}
-		} else if (to_amdgpu_amdkfd_fence(f))
-			count++;
+		if ((ef && f->context == ef->base.context) ||
+		    (!ef && to_amdgpu_amdkfd_fence(f)))
+			RCU_INIT_POINTER(new->shared[--j], f);
+		else
+			RCU_INIT_POINTER(new->shared[k++], f);
 	}
-	write_seqcount_end(&resv->seq);
-	preempt_enable();
-
-	if (ef || !count)
-		return 0;
-
-	/* Alloc memory for count number of eviction fence pointers. Fill the
-	 * ef_list array and ef_count
-	 */
-	fence_list = kcalloc(count, sizeof(struct amdgpu_amdkfd_fence *),
-			     GFP_KERNEL);
-	if (!fence_list)
-		return -ENOMEM;
+	new->shared_max = old->shared_max;
+	new->shared_count = k;
 
-	preempt_disable();
-	write_seqcount_begin(&resv->seq);
+	if (!ef) {
+		unsigned int count = old->shared_count - j;
 
-	j = 0;
-	for (i = 0; i < shared_count; ++i) {
-		struct dma_fence *f;
-		struct amdgpu_amdkfd_fence *efence;
-
-		f = rcu_dereference_protected(fobj->shared[i],
-			reservation_object_held(resv));
+		/* Alloc memory for count number of eviction fence pointers.
+		 * Fill the ef_list array and ef_count
+		 */
+		*ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
+		*ef_count = count;
 
-		efence = to_amdgpu_amdkfd_fence(f);
-		if (efence) {
-			fence_list[k++] = efence;
-			fobj->shared_count--;
-		} else {
-			RCU_INIT_POINTER(fobj->shared[j++], f);
+		if (!*ef_list) {
+			kfree(new);
+			return -ENOMEM;
 		}
 	}
 
+	/* Install the new fence list, seqcount provides the barriers */
+	preempt_disable();
+	write_seqcount_begin(&resv->seq);
+	RCU_INIT_POINTER(resv->fence, new);
 	write_seqcount_end(&resv->seq);
 	preempt_enable();
 
-	*ef_list = fence_list;
-	*ef_count = k;
+	/* Drop the references to the removed fences or move them to ef_list */
+	for (i = j, k = 0; i < old->shared_count; ++i) {
+		struct dma_fence *f;
+
+		f = rcu_dereference_protected(new->shared[i],
+					      reservation_object_held(resv));
+		if (!ef)
+			(*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
+		else
+			dma_fence_put(f);
+	}
+	kfree_rcu(old, rcu);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 1e66dfd0e39c..8ab5ccbc14ac 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -2274,7 +2274,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
-#ifdef CONFIG_X86
+#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
 #endif
 		return amdgpu_dc != 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index a365ea2383d1..e55508b39496 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -178,12 +178,18 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
  *
  * @amn: our notifier
  */
-static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
+static int amdgpu_mn_read_lock(struct amdgpu_mn *amn, bool blockable)
 {
-	mutex_lock(&amn->read_lock);
+	if (blockable)
+		mutex_lock(&amn->read_lock);
+	else if (!mutex_trylock(&amn->read_lock))
+		return -EAGAIN;
+
 	if (atomic_inc_return(&amn->recursion) == 1)
 		down_read_non_owner(&amn->lock);
 	mutex_unlock(&amn->read_lock);
+
+	return 0;
 }
 
 /**
@@ -239,10 +245,11 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
-static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -250,17 +257,28 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	/* TODO we should be able to split locking for interval tree and
+	 * amdgpu_mn_invalidate_node
+	 */
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
 		amdgpu_mn_invalidate_node(node, start, end);
 	}
+
+	return 0;
 }
 
 /**
@@ -275,10 +293,11 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
  * necessitates evicting all user-mode queues of the process. The BOs
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
-static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
+static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
-						 unsigned long end)
+						 unsigned long end,
+						 bool blockable)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
@@ -286,13 +305,19 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;
 
-	amdgpu_mn_read_lock(amn);
+	if (amdgpu_mn_read_lock(amn, blockable))
+		return -EAGAIN;
 
 	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
+		if (!blockable) {
+			amdgpu_mn_read_unlock(amn);
+			return -EAGAIN;
+		}
+
 		node = container_of(it, struct amdgpu_mn_node, it);
 		it = interval_tree_iter_next(it, start, end);
 
@@ -304,6 +329,8 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 				amdgpu_amdkfd_evict_userptr(mem, mm);
 		}
 	}
+
+	return 0;
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 9f1a5bd39ae8..5b39d1399630 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -131,6 +131,11 @@ psp_cmd_submit_buf(struct psp_context *psp,
 		msleep(1);
 	}
 
+	if (ucode) {
+		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
+		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
+	}
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
index 86a0715d9431..1cafe8d83a4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@@ -53,9 +53,8 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 						  int fd,
 						  enum drm_sched_priority priority)
 {
-	struct file *filp = fcheck(fd);
+	struct file *filp = fget(fd);
 	struct drm_file *file;
-	struct pid *pid;
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_ctx *ctx;
 	uint32_t id;
@@ -63,20 +62,12 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 	if (!filp)
 		return -EINVAL;
 
-	pid = get_pid(((struct drm_file *)filp->private_data)->pid);
+	file = filp->private_data;
+	fpriv = file->driver_priv;
+	idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
+		amdgpu_ctx_priority_override(ctx, priority);
 
-	mutex_lock(&adev->ddev->filelist_mutex);
-	list_for_each_entry(file, &adev->ddev->filelist, lhead) {
-		if (file->pid != pid)
-			continue;
-
-		fpriv = file->driver_priv;
-		idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
-				amdgpu_ctx_priority_override(ctx, priority);
-	}
-	mutex_unlock(&adev->ddev->filelist_mutex);
-
-	put_pid(pid);
+	fput(filp);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
index 08e38579af24..bdc472b6e641 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ucode.h
@@ -194,6 +194,7 @@ enum AMDGPU_UCODE_ID {
 	AMDGPU_UCODE_ID_SMC,
 	AMDGPU_UCODE_ID_UVD,
 	AMDGPU_UCODE_ID_VCE,
+	AMDGPU_UCODE_ID_VCN,
 	AMDGPU_UCODE_ID_MAXIMUM,
 };
 
@@ -226,6 +227,9 @@ struct amdgpu_firmware_info {
 	void *kaddr;
 	/* ucode_size_bytes */
 	uint32_t ucode_size;
+	/* starting tmr mc address */
+	uint32_t tmr_mc_addr_lo;
+	uint32_t tmr_mc_addr_hi;
 };
 
 void amdgpu_ucode_print_mc_hdr(const struct common_firmware_header *hdr);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 632fa5980ff4..e5a6db6beab7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -122,8 +122,6 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work);
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 {
-	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
 	unsigned long bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
@@ -266,13 +264,6 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 		}
 	}
 
-	ring = &adev->uvd.inst[0].ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
-	if (r) {
-		DRM_ERROR("Failed setting up UVD kernel entity.\n");
-		return r;
-	}
 	for (i = 0; i < adev->uvd.max_handles; ++i) {
 		atomic_set(&adev->uvd.handles[i], 0);
 		adev->uvd.filp[i] = NULL;
@@ -311,7 +302,7 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
 		if (adev->uvd.harvest_config & (1 << j))
 			continue;
-		kfree(adev->uvd.inst[j].saved_bo);
+		kvfree(adev->uvd.inst[j].saved_bo);
 
 		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
 				      &adev->uvd.inst[j].gpu_addr,
@@ -327,6 +318,29 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 	return 0;
 }
 
+/**
+ * amdgpu_uvd_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	struct drm_sched_rq *rq;
+	int r;
+
+	ring = &adev->uvd.inst[0].ring;
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+	if (r) {
+		DRM_ERROR("Failed setting up UVD kernel entity.\n");
+		return r;
+	}
+
+	return 0;
+}
+
 int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 {
 	unsigned size;
@@ -354,7 +368,7 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)
 		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
 		ptr = adev->uvd.inst[j].cpu_addr;
 
-		adev->uvd.inst[j].saved_bo = kmalloc(size, GFP_KERNEL);
+		adev->uvd.inst[j].saved_bo = kvmalloc(size, GFP_KERNEL);
 		if (!adev->uvd.inst[j].saved_bo)
 			return -ENOMEM;
 
@@ -380,7 +394,7 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 
 		if (adev->uvd.inst[i].saved_bo != NULL) {
 			memcpy_toio(ptr, adev->uvd.inst[i].saved_bo, size);
-			kfree(adev->uvd.inst[i].saved_bo);
+			kvfree(adev->uvd.inst[i].saved_bo);
 			adev->uvd.inst[i].saved_bo = NULL;
 		} else {
 			const struct common_firmware_header *hdr;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
index 33c5f806f925..a3ab1a41060f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@@ -69,6 +69,7 @@ struct amdgpu_uvd {
 
 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
 int amdgpu_uvd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_uvd_entity_init(struct amdgpu_device *adev);
 int amdgpu_uvd_suspend(struct amdgpu_device *adev);
 int amdgpu_uvd_resume(struct amdgpu_device *adev);
 int amdgpu_uvd_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index b6ab4f5350c8..0cc5190f4f36 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -90,8 +90,6 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work);
  */
 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 {
-	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
 	unsigned ucode_version, version_major, version_minor, binary_id;
@@ -188,14 +186,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 		return r;
 	}
 
-	ring = &adev->vce.ring[0];
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
-	if (r != 0) {
-		DRM_ERROR("Failed setting up VCE run queue.\n");
-		return r;
-	}
-
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		atomic_set(&adev->vce.handles[i], 0);
 		adev->vce.filp[i] = NULL;
@@ -236,6 +226,29 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 }
 
 /**
+ * amdgpu_vce_entity_init - init entity
+ *
+ * @adev: amdgpu_device pointer
+ *
+ */
+int amdgpu_vce_entity_init(struct amdgpu_device *adev)
+{
+	struct amdgpu_ring *ring;
+	struct drm_sched_rq *rq;
+	int r;
+
+	ring = &adev->vce.ring[0];
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
+	if (r != 0) {
+		DRM_ERROR("Failed setting up VCE run queue.\n");
+		return r;
+	}
+
+	return 0;
+}
+
+/**
  * amdgpu_vce_suspend - unpin VCE fw memory
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
index 71781267ee4c..a1f209eed4c4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@@ -55,6 +55,7 @@ struct amdgpu_vce {
 
 int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size);
 int amdgpu_vce_sw_fini(struct amdgpu_device *adev);
+int amdgpu_vce_entity_init(struct amdgpu_device *adev);
 int amdgpu_vce_suspend(struct amdgpu_device *adev);
 int amdgpu_vce_resume(struct amdgpu_device *adev);
 int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index 798648a19710..fd654a4406db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -111,9 +111,10 @@ int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
 			version_major, version_minor, family_id);
 	}
 
-	bo_size = AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8)
-		  +  AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
+	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_HEAP_SIZE
 		  +  AMDGPU_VCN_SESSION_SIZE * 40;
+	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
+		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
 	r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
 				    AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.vcpu_bo,
 				    &adev->vcn.gpu_addr, &adev->vcn.cpu_addr);
@@ -129,7 +130,7 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 {
 	int i;
 
-	kfree(adev->vcn.saved_bo);
+	kvfree(adev->vcn.saved_bo);
 
 	amdgpu_bo_free_kernel(&adev->vcn.vcpu_bo,
 			      &adev->vcn.gpu_addr,
@@ -160,7 +161,7 @@ int amdgpu_vcn_suspend(struct amdgpu_device *adev)
 	size = amdgpu_bo_size(adev->vcn.vcpu_bo);
 	ptr = adev->vcn.cpu_addr;
 
-	adev->vcn.saved_bo = kmalloc(size, GFP_KERNEL);
+	adev->vcn.saved_bo = kvmalloc(size, GFP_KERNEL);
 	if (!adev->vcn.saved_bo)
 		return -ENOMEM;
 
@@ -182,18 +183,20 @@ int amdgpu_vcn_resume(struct amdgpu_device *adev)
 
 	if (adev->vcn.saved_bo != NULL) {
 		memcpy_toio(ptr, adev->vcn.saved_bo, size);
-		kfree(adev->vcn.saved_bo);
+		kvfree(adev->vcn.saved_bo);
 		adev->vcn.saved_bo = NULL;
 	} else {
 		const struct common_firmware_header *hdr;
 		unsigned offset;
 
 		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
-		offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
-		memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
-			    le32_to_cpu(hdr->ucode_size_bytes));
-		size -= le32_to_cpu(hdr->ucode_size_bytes);
-		ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
+			offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
+			memcpy_toio(adev->vcn.cpu_addr, adev->vcn.fw->data + offset,
+				    le32_to_cpu(hdr->ucode_size_bytes));
+			size -= le32_to_cpu(hdr->ucode_size_bytes);
+			ptr += le32_to_cpu(hdr->ucode_size_bytes);
+		}
 		memset_io(ptr, 0, size);
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
index 0ff136d02d9b..02be34e72ed9 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v10_0.c
@@ -88,6 +88,9 @@ psp_v10_0_get_fw_type(struct amdgpu_firmware_info *ucode, enum psp_gfx_fw_type *
 	case AMDGPU_UCODE_ID_VCE:
 		*type = GFX_FW_TYPE_VCE;
 		break;
+	case AMDGPU_UCODE_ID_VCN:
+		*type = GFX_FW_TYPE_VCN;
+		break;
 	case AMDGPU_UCODE_ID_MAXIMUM:
 	default:
 		return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 6fed3d7797a8..8a926d1df939 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -123,6 +123,10 @@ static int uvd_v4_2_sw_init(void *handle)
 	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
 	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_uvd_entity_init(adev);
 
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index aeaa1ca46a99..50248059412e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -120,6 +120,10 @@ static int uvd_v5_0_sw_init(void *handle)
 	ring = &adev->uvd.inst->ring;
 	sprintf(ring->name, "uvd");
 	r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst->irq, 0);
+	if (r)
+		return r;
+
+	r = amdgpu_uvd_entity_init(adev);
 
 	return r;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 598dbeaba636..6ae82cc2e55e 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -440,6 +440,8 @@ static int uvd_v6_0_sw_init(void *handle)
 		}
 	}
 
+	r = amdgpu_uvd_entity_init(adev);
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 5fab3560a71d..9b7f8469bc5c 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -410,6 +410,7 @@ static int uvd_v7_0_early_init(void *handle)
 static int uvd_v7_0_sw_init(void *handle)
 {
 	struct amdgpu_ring *ring;
+
 	int i, j, r;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
@@ -478,6 +479,10 @@ static int uvd_v7_0_sw_init(void *handle)
 		}
 	}
 
+	r = amdgpu_uvd_entity_init(adev);
+	if (r)
+		return r;
+
 	r = amdgpu_virt_alloc_mm_table(adev);
 	if (r)
 		return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index d48e877b682e..7eaa54ba016b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -439,6 +439,8 @@ static int vce_v2_0_sw_init(void *handle)
 			return r;
 	}
 
+	r = amdgpu_vce_entity_init(adev);
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index cc6ce6cc03f4..c8390f9adfd6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -448,6 +448,8 @@ static int vce_v3_0_sw_init(void *handle)
 			return r;
 	}
 
+	r = amdgpu_vce_entity_init(adev);
+
 	return r;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index 65f8860169e9..2e4d1b5f6243 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -419,6 +419,7 @@ static int vce_v4_0_sw_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring;
+
 	unsigned size;
 	int r, i;
 
@@ -438,7 +439,7 @@ static int vce_v4_0_sw_init(void *handle)
 		const struct common_firmware_header *hdr;
 		unsigned size = amdgpu_bo_size(adev->vce.vcpu_bo);
 
-		adev->vce.saved_bo = kmalloc(size, GFP_KERNEL);
+		adev->vce.saved_bo = kvmalloc(size, GFP_KERNEL);
 		if (!adev->vce.saved_bo)
 			return -ENOMEM;
 
@@ -474,6 +475,11 @@ static int vce_v4_0_sw_init(void *handle)
 			return r;
 	}
 
+
+	r = amdgpu_vce_entity_init(adev);
+	if (r)
+		return r;
+
 	r = amdgpu_virt_alloc_mm_table(adev);
 	if (r)
 		return r;
@@ -490,7 +496,7 @@ static int vce_v4_0_sw_fini(void *handle)
 	amdgpu_virt_free_mm_table(adev);
 
 	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
-		kfree(adev->vce.saved_bo);
+		kvfree(adev->vce.saved_bo);
 		adev->vce.saved_bo = NULL;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index 2ce91a748c40..072371ef5975 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -100,6 +100,16 @@ static int vcn_v1_0_sw_init(void *handle)
 	if (r)
 		return r;
 
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		const struct common_firmware_header *hdr;
+		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
+		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
+		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
+		adev->firmware.fw_size +=
+			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
+		DRM_INFO("PSP loading VCN firmware\n");
+	}
+
 	r = amdgpu_vcn_resume(adev);
 	if (r)
 		return r;
@@ -265,26 +275,38 @@ static int vcn_v1_0_resume(void *handle)
 static void vcn_v1_0_mc_resume(struct amdgpu_device *adev)
 {
 	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
-
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+	uint32_t offset;
+
+	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
+		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
+			     (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_lo));
+		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+			     (adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].tmr_mc_addr_hi));
+		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0, 0);
+		offset = 0;
+	} else {
+		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
 			lower_32_bits(adev->vcn.gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
+		WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
 			upper_32_bits(adev->vcn.gpu_addr));
-	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
-				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+		offset = size;
+		WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET0,
+			     AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
+	}
+
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE0, size);
 
 	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
-			lower_32_bits(adev->vcn.gpu_addr + size));
+		     lower_32_bits(adev->vcn.gpu_addr + offset));
 	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
-			upper_32_bits(adev->vcn.gpu_addr + size));
+		     upper_32_bits(adev->vcn.gpu_addr + offset));
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET1, 0);
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_HEAP_SIZE);
 
 	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
-			lower_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
+		     lower_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));
 	WREG32_SOC15(UVD, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
-			upper_32_bits(adev->vcn.gpu_addr + size + AMDGPU_VCN_HEAP_SIZE));
+		     upper_32_bits(adev->vcn.gpu_addr + offset + AMDGPU_VCN_HEAP_SIZE));
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_OFFSET2, 0);
 	WREG32_SOC15(UVD, 0, mmUVD_VCPU_CACHE_SIZE2,
 			AMDGPU_VCN_STACK_SIZE + (AMDGPU_VCN_SESSION_SIZE * 40));
diff --git a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
index 52778de93ab0..2d4473557b0d 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega20_reg_init.c
@@ -38,6 +38,7 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[ATHUB_HWIP][i] = (uint32_t *)(&(ATHUB_BASE.instance[i]));
 		adev->reg_offset[NBIO_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
 		adev->reg_offset[MP0_HWIP][i] = (uint32_t *)(&(MP0_BASE.instance[i]));
+		adev->reg_offset[MP1_HWIP][i] = (uint32_t *)(&(MP1_BASE.instance[i]));
 		adev->reg_offset[UVD_HWIP][i] = (uint32_t *)(&(UVD_BASE.instance[i]));
 		adev->reg_offset[VCE_HWIP][i] = (uint32_t *)(&(VCE_BASE.instance[i]));
 		adev->reg_offset[DF_HWIP][i] = (uint32_t *)(&(DF_BASE.instance[i]));
@@ -46,6 +47,8 @@ int vega20_reg_base_init(struct amdgpu_device *adev)
 		adev->reg_offset[SDMA0_HWIP][i] = (uint32_t *)(&(SDMA0_BASE.instance[i]));
 		adev->reg_offset[SDMA1_HWIP][i] = (uint32_t *)(&(SDMA1_BASE.instance[i]));
 		adev->reg_offset[SMUIO_HWIP][i] = (uint32_t *)(&(SMUIO_BASE.instance[i]));
+		adev->reg_offset[NBIF_HWIP][i] = (uint32_t *)(&(NBIO_BASE.instance[i]));
+		adev->reg_offset[THM_HWIP][i] = (uint32_t *)(&(THM_BASE.instance[i]));
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 42c8ad105b05..88b57a5e9489 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -112,8 +112,8 @@ static u32 vi_smc_rreg(struct amdgpu_device *adev, u32 reg)
 	u32 r;
 
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(mmSMC_IND_INDEX_11, (reg));
-	r = RREG32(mmSMC_IND_DATA_11);
+	WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+	r = RREG32_NO_KIQ(mmSMC_IND_DATA_11);
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 	return r;
 }