471 files changed, 19701 insertions, 5973 deletions
diff --git a/drivers/gpu/drm/Kconfig b/drivers/gpu/drm/Kconfig
index 90bc65d07a35..88e01e08e279 100644
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@@ -263,6 +263,8 @@ source "drivers/gpu/drm/mxsfb/Kconfig"
 
 source "drivers/gpu/drm/meson/Kconfig"
 
+source "drivers/gpu/drm/tinydrm/Kconfig"
+
 # Keep legacy drivers last
 
 menuconfig DRM_LEGACY
diff --git a/drivers/gpu/drm/Makefile b/drivers/gpu/drm/Makefile
index 92de3991fa56..3ee95793d122 100644
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@@ -94,3 +94,4 @@ obj-$(CONFIG_DRM_ARCPGU)+= arc/
 obj-y			+= hisilicon/
 obj-$(CONFIG_DRM_ZTE)	+= zte/
 obj-$(CONFIG_DRM_MXSFB)	+= mxsfb/
+obj-$(CONFIG_DRM_TINYDRM) += tinydrm/
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 94a64e3bc682..c1b913541739 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1037,7 +1037,6 @@ struct amdgpu_uvd {
 	bool			use_ctx_buf;
 	struct amd_sched_entity entity;
 	uint32_t                srbm_soft_reset;
-	bool			is_powergated;
 };
 
 /*
@@ -1066,7 +1065,6 @@ struct amdgpu_vce {
 	struct amd_sched_entity	entity;
 	uint32_t                srbm_soft_reset;
 	unsigned		num_rings;
-	bool			is_powergated;
 };
 
 /*
@@ -1484,6 +1482,9 @@ struct amdgpu_device {
 	spinlock_t			gtt_list_lock;
 	struct list_head                gtt_list;
 
+	/* record hw reset is performed */
+	bool has_hw_reset;
+
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@@ -1702,13 +1703,14 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 int amdgpu_gpu_reset(struct amdgpu_device *adev);
 bool amdgpu_need_backup(struct amdgpu_device *adev);
 void amdgpu_pci_config_reset(struct amdgpu_device *adev);
-bool amdgpu_card_posted(struct amdgpu_device *adev);
+bool amdgpu_need_post(struct amdgpu_device *adev);
 void amdgpu_update_display_priority(struct amdgpu_device *adev);
 
 int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data);
 int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		       u32 ip_instance, u32 ring,
 		       struct amdgpu_ring **out_ring);
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes);
 void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
 bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
index d9def01f276e..821f7cc2051f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@@ -100,7 +100,7 @@ static bool igp_read_bios_from_vram(struct amdgpu_device *adev)
 	resource_size_t size = 256 * 1024; /* ??? */
 
 	if (!(adev->flags & AMD_IS_APU))
-		if (!amdgpu_card_posted(adev))
+		if (amdgpu_need_post(adev))
 			return false;
 
 	adev->bios = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index c02db01f6583..0218cea6be4d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -70,10 +70,10 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
 	struct amdgpu_bo_list *list;
 
 	mutex_lock(&fpriv->bo_list_lock);
-	list = idr_find(&fpriv->bo_list_handles, id);
+	list = idr_remove(&fpriv->bo_list_handles, id);
 	if (list) {
+		/* Another user may have a reference to this list still */
 		mutex_lock(&list->lock);
-		idr_remove(&fpriv->bo_list_handles, id);
 		mutex_unlock(&list->lock);
 		amdgpu_bo_list_free(list);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index a5df1ef306d9..d9e5aa4a79ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -834,32 +834,57 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 			case CHIP_TOPAZ:
 				if (((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x81)) ||
 				    ((adev->pdev->device == 0x6900) && (adev->pdev->revision == 0x83)) ||
-				    ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87)))
+				    ((adev->pdev->device == 0x6907) && (adev->pdev->revision == 0x87))) {
+					info->is_kicker = true;
 					strcpy(fw_name, "amdgpu/topaz_k_smc.bin");
-				else
+				} else
 					strcpy(fw_name, "amdgpu/topaz_smc.bin");
 				break;
 			case CHIP_TONGA:
 				if (((adev->pdev->device == 0x6939) && (adev->pdev->revision == 0xf1)) ||
-				    ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1)))
+				    ((adev->pdev->device == 0x6938) && (adev->pdev->revision == 0xf1))) {
+					info->is_kicker = true;
 					strcpy(fw_name, "amdgpu/tonga_k_smc.bin");
-				else
+				} else
 					strcpy(fw_name, "amdgpu/tonga_smc.bin");
 				break;
 			case CHIP_FIJI:
 				strcpy(fw_name, "amdgpu/fiji_smc.bin");
 				break;
 			case CHIP_POLARIS11:
-				if (type == CGS_UCODE_ID_SMU)
-					strcpy(fw_name, "amdgpu/polaris11_smc.bin");
-				else if (type == CGS_UCODE_ID_SMU_SK)
+				if (type == CGS_UCODE_ID_SMU) {
+					if (((adev->pdev->device == 0x67ef) &&
+					     ((adev->pdev->revision == 0xe0) ||
+					      (adev->pdev->revision == 0xe2) ||
+					      (adev->pdev->revision == 0xe5))) ||
+					    ((adev->pdev->device == 0x67ff) &&
+					     ((adev->pdev->revision == 0xcf) ||
+					      (adev->pdev->revision == 0xef) ||
+					      (adev->pdev->revision == 0xff)))) {
+						info->is_kicker = true;
+						strcpy(fw_name, "amdgpu/polaris11_k_smc.bin");
+					} else
+						strcpy(fw_name, "amdgpu/polaris11_smc.bin");
+				} else if (type == CGS_UCODE_ID_SMU_SK) {
 					strcpy(fw_name, "amdgpu/polaris11_smc_sk.bin");
+				}
 				break;
 			case CHIP_POLARIS10:
-				if (type == CGS_UCODE_ID_SMU)
-					strcpy(fw_name, "amdgpu/polaris10_smc.bin");
-				else if (type == CGS_UCODE_ID_SMU_SK)
+				if (type == CGS_UCODE_ID_SMU) {
+					if ((adev->pdev->device == 0x67df) &&
+					    ((adev->pdev->revision == 0xe0) ||
+					     (adev->pdev->revision == 0xe3) ||
+					     (adev->pdev->revision == 0xe4) ||
+					     (adev->pdev->revision == 0xe5) ||
+					     (adev->pdev->revision == 0xe7) ||
+					     (adev->pdev->revision == 0xef))) {
+						info->is_kicker = true;
+						strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
+					} else
+						strcpy(fw_name, "amdgpu/polaris10_smc.bin");
+				} else if (type == CGS_UCODE_ID_SMU_SK) {
 					strcpy(fw_name, "amdgpu/polaris10_smc_sk.bin");
+				}
 				break;
 			case CHIP_POLARIS12:
 				strcpy(fw_name, "amdgpu/polaris12_smc.bin");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index cf2e8c4e9b8b..d2d0f60ff36d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -83,6 +83,13 @@ int amdgpu_cs_get_ring(struct amdgpu_device *adev, u32 ip_type,
 		}
 		break;
 	}
+
+	if (!(*out_ring && (*out_ring)->adev)) {
+		DRM_ERROR("Ring %d is not initialized on IP %d\n",
+			  ring, ip_type);
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -344,8 +351,7 @@ static u64 amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev)
  * submission. This can result in a debt that can stop buffer migrations
  * temporarily.
  */
-static void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev,
-					 u64 num_bytes)
+void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes)
 {
 	spin_lock(&adev->mm_stats.lock);
 	adev->mm_stats.accum_us -= bytes_to_us(adev, num_bytes);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 400c66ba4c6b..cf0500671353 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -135,15 +135,11 @@ static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
 	struct amdgpu_ctx *ctx;
 
 	mutex_lock(&mgr->lock);
-	ctx = idr_find(&mgr->ctx_handles, id);
-	if (ctx) {
-		idr_remove(&mgr->ctx_handles, id);
+	ctx = idr_remove(&mgr->ctx_handles, id);
+	if (ctx)
 		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
-		mutex_unlock(&mgr->lock);
-		return 0;
-	}
 	mutex_unlock(&mgr->lock);
-	return -EINVAL;
+	return ctx ? 0 : -EINVAL;
 }
 
 static int amdgpu_ctx_query(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 944ba0d3874a..6abb238b25c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -619,25 +619,29 @@ void amdgpu_gtt_location(struct amdgpu_device *adev, struct amdgpu_mc *mc)
  * GPU helpers function.
  */
 /**
- * amdgpu_card_posted - check if the hw has already been initialized
+ * amdgpu_need_post - check if the hw need post or not
  *
  * @adev: amdgpu_device pointer
  *
- * Check if the asic has been initialized (all asics).
- * Used at driver startup.
- * Returns true if initialized or false if not.
+ * Check if the asic has been initialized (all asics) at driver startup
+ * or post is needed if  hw reset is performed.
+ * Returns true if need or false if not.
  */
-bool amdgpu_card_posted(struct amdgpu_device *adev)
+bool amdgpu_need_post(struct amdgpu_device *adev)
 {
 	uint32_t reg;
 
+	if (adev->has_hw_reset) {
+		adev->has_hw_reset = false;
+		return true;
+	}
 	/* then check MEM_SIZE, in case the crtcs are off */
 	reg = RREG32(mmCONFIG_MEMSIZE);
 
 	if (reg)
-		return true;
+		return false;
 
-	return false;
+	return true;
 
 }
 
@@ -665,7 +669,7 @@ static bool amdgpu_vpost_needed(struct amdgpu_device *adev)
 				return true;
 		}
 	}
-	return !amdgpu_card_posted(adev);
+	return amdgpu_need_post(adev);
 }
 
 /**
@@ -2071,7 +2075,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	amdgpu_atombios_scratch_regs_restore(adev);
 
 	/* post card */
-	if (!amdgpu_card_posted(adev) || !resume) {
+	if (amdgpu_need_post(adev)) {
 		r = amdgpu_atom_asic_init(adev->mode_info.atom_context);
 		if (r)
 			DRM_ERROR("amdgpu asic init failed\n");
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
index 838943d0962e..36ce3cac81ba 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@@ -374,7 +374,6 @@ int amdgpu_fbdev_init(struct amdgpu_device *adev)
 			&amdgpu_fb_helper_funcs);
 
 	ret = drm_fb_helper_init(adev->ddev, &rfbdev->helper,
-				 adev->mode_info.num_crtc,
 				 AMDGPUFB_CONN_LIMIT);
 	if (ret) {
 		kfree(rfbdev);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 9bd1b4eae32e..51d759463384 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -487,67 +487,44 @@ static int amdgpu_gem_va_check(void *param, struct amdgpu_bo *bo)
  *
  * @adev: amdgpu_device pointer
  * @bo_va: bo_va to update
+ * @list: validation list
+ * @operation: map or unmap
  *
- * Update the bo_va directly after setting it's address. Errors are not
+ * Update the bo_va directly after setting its address. Errors are not
  * vital here, so they are not reported back to userspace.
  */
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct amdgpu_bo_va *bo_va,
+				    struct list_head *list,
 				    uint32_t operation)
 {
-	struct ttm_validate_buffer tv, *entry;
-	struct amdgpu_bo_list_entry vm_pd;
-	struct ww_acquire_ctx ticket;
-	struct list_head list, duplicates;
-	int r;
-
-	INIT_LIST_HEAD(&list);
-	INIT_LIST_HEAD(&duplicates);
-
-	tv.bo = &bo_va->bo->tbo;
-	tv.shared = true;
-	list_add(&tv.head, &list);
-
-	amdgpu_vm_get_pd_bo(bo_va->vm, &list, &vm_pd);
+	struct ttm_validate_buffer *entry;
+	int r = -ERESTARTSYS;
 
-	/* Provide duplicates to avoid -EALREADY */
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
-	if (r)
-		goto error_print;
-
-	list_for_each_entry(entry, &list, head) {
+	list_for_each_entry(entry, list, head) {
 		struct amdgpu_bo *bo =
 			container_of(entry->bo, struct amdgpu_bo, tbo);
-
-		/* if anything is swapped out don't swap it in here,
-		   just abort and wait for the next CS */
-		if (!amdgpu_bo_gpu_accessible(bo))
-			goto error_unreserve;
-
-		if (bo->shadow && !amdgpu_bo_gpu_accessible(bo->shadow))
-			goto error_unreserve;
+		if (amdgpu_gem_va_check(NULL, bo))
+			goto error;
 	}
 
 	r = amdgpu_vm_validate_pt_bos(adev, bo_va->vm, amdgpu_gem_va_check,
 				      NULL);
 	if (r)
-		goto error_unreserve;
+		goto error;
 
 	r = amdgpu_vm_update_page_directory(adev, bo_va->vm);
 	if (r)
-		goto error_unreserve;
+		goto error;
 
 	r = amdgpu_vm_clear_freed(adev, bo_va->vm);
 	if (r)
-		goto error_unreserve;
+		goto error;
 
 	if (operation == AMDGPU_VA_OP_MAP)
 		r = amdgpu_vm_bo_update(adev, bo_va, false);
 
-error_unreserve:
-	ttm_eu_backoff_reservation(&ticket, &list);
-
-error_print:
+error:
 	if (r && r != -ERESTARTSYS)
 		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
 }
@@ -564,7 +541,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_bo_list_entry vm_pd;
 	struct ttm_validate_buffer tv;
 	struct ww_acquire_ctx ticket;
-	struct list_head list, duplicates;
+	struct list_head list;
 	uint32_t invalid_flags, va_flags = 0;
 	int r = 0;
 
@@ -602,14 +579,13 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		return -ENOENT;
 	abo = gem_to_amdgpu_bo(gobj);
 	INIT_LIST_HEAD(&list);
-	INIT_LIST_HEAD(&duplicates);
 	tv.bo = &abo->tbo;
-	tv.shared = true;
+	tv.shared = false;
 	list_add(&tv.head, &list);
 
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);
 
-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
 	if (r) {
 		drm_gem_object_unreference_unlocked(gobj);
 		return r;
@@ -640,10 +616,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 	default:
 		break;
 	}
-	ttm_eu_backoff_reservation(&ticket, &list);
 	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) &&
 	    !amdgpu_vm_debug)
-		amdgpu_gem_va_update_vm(adev, bo_va, args->operation);
+		amdgpu_gem_va_update_vm(adev, bo_va, &list, args->operation);
+	ttm_eu_backoff_reservation(&ticket, &list);
 
 	drm_gem_object_unreference_unlocked(gobj);
 	return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
index e4eb6dd3798a..0335c2f331e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@@ -97,8 +97,7 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
 {
 	struct amdgpu_gtt_mgr *mgr = man->priv;
 	struct drm_mm_node *node = mem->mm_node;
-	enum drm_mm_search_flags sflags = DRM_MM_SEARCH_BEST;
-	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
+	enum drm_mm_insert_mode mode;
 	unsigned long fpfn, lpfn;
 	int r;
 
@@ -115,15 +114,14 @@ int amdgpu_gtt_mgr_alloc(struct ttm_mem_type_manager *man,
 	else
 		lpfn = man->size;
 
-	if (place && place->flags & TTM_PL_FLAG_TOPDOWN) {
-		sflags = DRM_MM_SEARCH_BELOW;
-		aflags = DRM_MM_CREATE_TOP;
-	}
+	mode = DRM_MM_INSERT_BEST;
+	if (place && place->flags & TTM_PL_FLAG_TOPDOWN)
+		mode = DRM_MM_INSERT_HIGH;
 
 	spin_lock(&mgr->lock);
-	r = drm_mm_insert_node_in_range_generic(&mgr->mm, node, mem->num_pages,
-						mem->page_alignment, 0,
-						fpfn, lpfn, sflags, aflags);
+	r = drm_mm_insert_node_in_range(&mgr->mm, node,
+					mem->num_pages, mem->page_alignment, 0,
+					fpfn, lpfn, mode);
 	spin_unlock(&mgr->lock);
 
 	if (!r) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index d1aa291b2638..be80a4a68d7b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -323,6 +323,7 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	struct amdgpu_bo *bo;
 	enum ttm_bo_type type;
 	unsigned long page_align;
+	u64 initial_bytes_moved;
 	size_t acc_size;
 	int r;
 
@@ -374,8 +375,10 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
 	 */
 
+#ifndef CONFIG_COMPILE_TEST
 #warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
 	 thanks to write-combining
+#endif
 
 	if (bo->flags & AMDGPU_GEM_CREATE_CPU_GTT_USWC)
 		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
@@ -399,12 +402,20 @@ int amdgpu_bo_create_restricted(struct amdgpu_device *adev,
 		locked = ww_mutex_trylock(&bo->tbo.ttm_resv.lock);
 		WARN_ON(!locked);
 	}
+
+	initial_bytes_moved = atomic64_read(&adev->num_bytes_moved);
 	r = ttm_bo_init(&adev->mman.bdev, &bo->tbo, size, type,
 			&bo->placement, page_align, !kernel, NULL,
 			acc_size, sg, resv ? resv : &bo->tbo.ttm_resv,
 			&amdgpu_ttm_bo_destroy);
-	if (unlikely(r != 0))
+	amdgpu_cs_report_moved_bytes(adev,
+		atomic64_read(&adev->num_bytes_moved) - initial_bytes_moved);
+
+	if (unlikely(r != 0)) {
+		if (!resv)
+			ww_mutex_unlock(&bo->tbo.resv->lock);
 		return r;
+	}
 
 	bo->tbo.priority = ilog2(bo->tbo.num_pages);
 	if (kernel)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index a61882ddc804..346e80a7119b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -1142,12 +1142,22 @@ void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 			/* XXX select vce level based on ring/task */
 			adev->pm.dpm.vce_level = AMD_VCE_LEVEL_AC_ALL;
 			mutex_unlock(&adev->pm.mutex);
+			amdgpu_pm_compute_clocks(adev);
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							AMD_PG_STATE_UNGATE);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							AMD_CG_STATE_UNGATE);
 		} else {
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							AMD_PG_STATE_GATE);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							AMD_CG_STATE_GATE);
 			mutex_lock(&adev->pm.mutex);
 			adev->pm.dpm.vce_active = false;
 			mutex_unlock(&adev->pm.mutex);
+			amdgpu_pm_compute_clocks(adev);
 		}
-		amdgpu_pm_compute_clocks(adev);
+
 	}
 }
 
@@ -1286,7 +1296,8 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	if (!adev->pm.dpm_enabled)
 		return;
 
-	amdgpu_display_bandwidth_update(adev);
+	if (adev->mode_info.num_crtc)
+		amdgpu_display_bandwidth_update(adev);
 
 	for (i = 0; i < AMDGPU_MAX_RINGS; i++) {
 		struct amdgpu_ring *ring = adev->rings[i];
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 1154b0a8881d..4c6094eefc51 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -529,6 +529,9 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_TT:
 		break;
 	case TTM_PL_VRAM:
+		if (mem->start == AMDGPU_BO_INVALID_OFFSET)
+			return -EINVAL;
+
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
 		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 6f62ac473064..6d6ab7f11b4c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -1113,6 +1113,11 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 			amdgpu_dpm_enable_uvd(adev, false);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
+			/* shutdown the UVD block */
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							    AMD_PG_STATE_GATE);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							    AMD_CG_STATE_GATE);
 		}
 	} else {
 		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
@@ -1129,6 +1134,10 @@ void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 			amdgpu_dpm_enable_uvd(adev, true);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 53300, 40000);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							    AMD_CG_STATE_UNGATE);
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							    AMD_PG_STATE_UNGATE);
 		}
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 79bc9c7aad45..e2c06780ce49 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -321,6 +321,10 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 			amdgpu_dpm_enable_vce(adev, false);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							    AMD_PG_STATE_GATE);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							    AMD_CG_STATE_GATE);
 		}
 	} else {
 		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
@@ -346,6 +350,11 @@ void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 			amdgpu_dpm_enable_vce(adev, true);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
+			amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							    AMD_CG_STATE_UNGATE);
+			amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
+							    AMD_PG_STATE_UNGATE);
+
 		}
 	}
 	mutex_unlock(&adev->vce.idle_mutex);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index 3fd951c71d1b..dcfb7df3caf4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -83,7 +83,6 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		DRM_ERROR("failed to do bo_map on static CSA, err=%d\n", r);
 		amdgpu_vm_bo_rmv(adev, bo_va);
 		ttm_eu_backoff_reservation(&ticket, &list);
-		kfree(bo_va);
 		return r;
 	}
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index ac9007986c11..9e577e3d3147 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -97,8 +97,7 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	struct amdgpu_vram_mgr *mgr = man->priv;
 	struct drm_mm *mm = &mgr->mm;
 	struct drm_mm_node *nodes;
-	enum drm_mm_search_flags sflags = DRM_MM_SEARCH_DEFAULT;
-	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
+	enum drm_mm_insert_mode mode;
 	unsigned long lpfn, num_nodes, pages_per_node, pages_left;
 	unsigned i;
 	int r;
@@ -121,10 +120,9 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!nodes)
 		return -ENOMEM;
 
-	if (place->flags & TTM_PL_FLAG_TOPDOWN) {
-		sflags = DRM_MM_SEARCH_BELOW;
-		aflags = DRM_MM_CREATE_TOP;
-	}
+	mode = DRM_MM_INSERT_BEST;
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		mode = DRM_MM_INSERT_HIGH;
 
 	pages_left = mem->num_pages;
 
@@ -135,13 +133,11 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
-		else
-			sflags |= DRM_MM_SEARCH_BEST;
 
-		r = drm_mm_insert_node_in_range_generic(mm, &nodes[i], pages,
-							alignment, 0,
-							place->fpfn, lpfn,
-							sflags, aflags);
+		r = drm_mm_insert_node_in_range(mm, &nodes[i],
+						pages, alignment, 0,
+						place->fpfn, lpfn,
+						mode);
 		if (unlikely(r))
 			goto error;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
index 9498e78b90d7..f97ecb49972e 100644
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@@ -2210,7 +2210,6 @@ static void ci_clear_vc(struct amdgpu_device *adev)
 
 static int ci_upload_firmware(struct amdgpu_device *adev)
 {
-	struct ci_power_info *pi = ci_get_pi(adev);
 	int i, ret;
 
 	if (amdgpu_ci_is_smc_running(adev)) {
@@ -2227,7 +2226,7 @@ static int ci_upload_firmware(struct amdgpu_device *adev)
 	amdgpu_ci_stop_smc_clock(adev);
 	amdgpu_ci_reset_smc(adev);
 
-	ret = amdgpu_ci_load_smc_ucode(adev, pi->sram_end);
+	ret = amdgpu_ci_load_smc_ucode(adev, SMC_RAM_END);
 
 	return ret;
 
@@ -4257,12 +4256,6 @@ static int ci_update_vce_dpm(struct amdgpu_device *adev,
 
 	if (amdgpu_current_state->evclk != amdgpu_new_state->evclk) {
 		if (amdgpu_new_state->evclk) {
-			/* turn the clocks on when encoding */
-			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_CG_STATE_UNGATE);
-			if (ret)
-				return ret;
-
 			pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(adev);
 			tmp = RREG32_SMC(ixDPM_TABLE_475);
 			tmp &= ~DPM_TABLE_475__VceBootLevel_MASK;
@@ -4274,9 +4267,6 @@ static int ci_update_vce_dpm(struct amdgpu_device *adev,
 			ret = ci_enable_vce_dpm(adev, false);
 			if (ret)
 				return ret;
-			/* turn the clocks off when not encoding */
-			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_CG_STATE_GATE);
 		}
 	}
 	return ret;
@@ -6278,13 +6268,13 @@ static int ci_dpm_sw_init(void *handle)
 	adev->pm.current_mclk = adev->clock.default_mclk;
 	adev->pm.int_thermal_type = THERMAL_TYPE_NONE;
 
-	if (amdgpu_dpm == 0)
-		return 0;
-
 	ret = ci_dpm_init_microcode(adev);
 	if (ret)
 		return ret;
 
+	if (amdgpu_dpm == 0)
+		return 0;
+
 	INIT_WORK(&adev->pm.dpm.thermal.work, amdgpu_dpm_thermal_work_handler);
 	mutex_lock(&adev->pm.mutex);
 	ret = ci_dpm_init(adev);
@@ -6328,8 +6318,15 @@ static int ci_dpm_hw_init(void *handle)
 
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!amdgpu_dpm)
+	if (!amdgpu_dpm) {
+		ret = ci_upload_firmware(adev);
+		if (ret) {
+			DRM_ERROR("ci_upload_firmware failed\n");
+			return ret;
+		}
+		ci_dpm_start_smc(adev);
 		return 0;
+	}
 
 	mutex_lock(&adev->pm.mutex);
 	ci_dpm_setup_asic(adev);
@@ -6351,6 +6348,8 @@ static int ci_dpm_hw_fini(void *handle)
 		mutex_lock(&adev->pm.mutex);
 		ci_dpm_disable(adev);
 		mutex_unlock(&adev->pm.mutex);
+	} else {
+		ci_dpm_stop_smc(adev);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/cik.c b/drivers/gpu/drm/amd/amdgpu/cik.c
index 7da688b0d27d..c4d4b35e54ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@@ -1176,6 +1176,7 @@ static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
 			/* enable BM */
 			pci_set_master(adev->pdev);
+			adev->has_hw_reset = true;
 			r = 0;
 			break;
 		}
@@ -1722,8 +1723,8 @@ static int cik_common_early_init(void *handle)
 			  AMD_PG_SUPPORT_GFX_SMG |
 			  AMD_PG_SUPPORT_GFX_DMG |*/
 			AMD_PG_SUPPORT_UVD |
-			/*AMD_PG_SUPPORT_VCE |
-			  AMD_PG_SUPPORT_CP |
+			AMD_PG_SUPPORT_VCE |
+			/*  AMD_PG_SUPPORT_CP |
 			  AMD_PG_SUPPORT_GDS |
 			  AMD_PG_SUPPORT_RLC_SMU_HS |
 			  AMD_PG_SUPPORT_ACP |
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 1cf1d9d1aec1..5b24e89552ec 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -3737,9 +3737,15 @@ static void dce_v11_0_encoder_add(struct amdgpu_device *adev,
 	default:
 		encoder->possible_crtcs = 0x3;
 		break;
+	case 3:
+		encoder->possible_crtcs = 0x7;
+		break;
 	case 4:
 		encoder->possible_crtcs = 0xf;
 		break;
+	case 5:
+		encoder->possible_crtcs = 0x1f;
+		break;
 	case 6:
 		encoder->possible_crtcs = 0x3f;
 		break;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 762f8e82ceb7..e9a176891e13 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -627,11 +627,8 @@ static const struct drm_encoder_helper_funcs dce_virtual_encoder_helper_funcs =
 
 static void dce_virtual_encoder_destroy(struct drm_encoder *encoder)
 {
-	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
-
-	kfree(amdgpu_encoder->enc_priv);
 	drm_encoder_cleanup(encoder);
-	kfree(amdgpu_encoder);
+	kfree(encoder);
 }
 
 static const struct drm_encoder_funcs dce_virtual_encoder_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
index c998f6aaaf36..2086e7e68de4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@@ -1325,21 +1325,19 @@ static u32 gfx_v6_0_create_bitmask(u32 bit_width)
 	return (u32)(((u64)1 << bit_width) - 1);
 }
 
-static u32 gfx_v6_0_get_rb_disabled(struct amdgpu_device *adev,
-				    u32 max_rb_num_per_se,
-				    u32 sh_per_se)
+static u32 gfx_v6_0_get_rb_active_bitmap(struct amdgpu_device *adev)
 {
 	u32 data, mask;
 
-	data = RREG32(mmCC_RB_BACKEND_DISABLE);
-	data &= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE_MASK;
-	data |= RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+	data = RREG32(mmCC_RB_BACKEND_DISABLE) |
+		RREG32(mmGC_USER_RB_BACKEND_DISABLE);
 
-	data >>= CC_RB_BACKEND_DISABLE__BACKEND_DISABLE__SHIFT;
+	data = REG_GET_FIELD(data, GC_USER_RB_BACKEND_DISABLE, BACKEND_DISABLE);
 
-	mask = gfx_v6_0_create_bitmask(max_rb_num_per_se / sh_per_se);
+	mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_backends_per_se/
+					adev->gfx.config.max_sh_per_se);
 
-	return data & mask;
+	return ~data & mask;
 }
 
 static void gfx_v6_0_raster_config(struct amdgpu_device *adev, u32 *rconf)
@@ -1468,68 +1466,55 @@ static void gfx_v6_0_write_harvested_raster_configs(struct amdgpu_device *adev,
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 }
 
-static void gfx_v6_0_setup_rb(struct amdgpu_device *adev,
-			      u32 se_num, u32 sh_per_se,
-			      u32 max_rb_num_per_se)
+static void gfx_v6_0_setup_rb(struct amdgpu_device *adev)
 {
 	int i, j;
-	u32 data, mask;
-	u32 disabled_rbs = 0;
-	u32 enabled_rbs = 0;
+	u32 data;
+	u32 raster_config = 0;
+	u32 active_rbs = 0;
+	u32 rb_bitmap_width_per_sh = adev->gfx.config.max_backends_per_se /
+					adev->gfx.config.max_sh_per_se;
 	unsigned num_rb_pipes;
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		for (j = 0; j < sh_per_se; j++) {
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
-			data = gfx_v6_0_get_rb_disabled(adev, max_rb_num_per_se, sh_per_se);
-			disabled_rbs |= data << ((i * sh_per_se + j) * 2);
+			data = gfx_v6_0_get_rb_active_bitmap(adev);
+			active_rbs |= data << ((i * adev->gfx.config.max_sh_per_se + j) *
+					rb_bitmap_width_per_sh);
 		}
 	}
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
-
-	mask = 1;
-	for (i = 0; i < max_rb_num_per_se * se_num; i++) {
-		if (!(disabled_rbs & mask))
-			enabled_rbs |= mask;
-		mask <<= 1;
-	}
 
-	adev->gfx.config.backend_enable_mask = enabled_rbs;
-	adev->gfx.config.num_rbs = hweight32(enabled_rbs);
+	adev->gfx.config.backend_enable_mask = active_rbs;
+	adev->gfx.config.num_rbs = hweight32(active_rbs);
 
 	num_rb_pipes = min_t(unsigned, adev->gfx.config.max_backends_per_se *
 			     adev->gfx.config.max_shader_engines, 16);
 
-	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		gfx_v6_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
-		data = 0;
-		for (j = 0; j < sh_per_se; j++) {
-			switch (enabled_rbs & 3) {
-			case 1:
-				data |= (RASTER_CONFIG_RB_MAP_0 << (i * sh_per_se + j) * 2);
-				break;
-			case 2:
-				data |= (RASTER_CONFIG_RB_MAP_3 << (i * sh_per_se + j) * 2);
-				break;
-			case 3:
-			default:
-				data |= (RASTER_CONFIG_RB_MAP_2 << (i * sh_per_se + j) * 2);
-				break;
-			}
-			enabled_rbs >>= 2;
-		}
-		gfx_v6_0_raster_config(adev, &data);
+	gfx_v6_0_raster_config(adev, &raster_config);
 
-		if (!adev->gfx.config.backend_enable_mask ||
-				adev->gfx.config.num_rbs >= num_rb_pipes)
-			WREG32(mmPA_SC_RASTER_CONFIG, data);
-		else
-			gfx_v6_0_write_harvested_raster_configs(adev, data,
-								adev->gfx.config.backend_enable_mask,
-								num_rb_pipes);
+	if (!adev->gfx.config.backend_enable_mask ||
+			adev->gfx.config.num_rbs >= num_rb_pipes) {
+		WREG32(mmPA_SC_RASTER_CONFIG, raster_config);
+	} else {
+		gfx_v6_0_write_harvested_raster_configs(adev, raster_config,
+							adev->gfx.config.backend_enable_mask,
+							num_rb_pipes);
+	}
+
+	/* cache the values for userspace */
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
+			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+			adev->gfx.config.rb_config[i][j].rb_backend_disable =
+				RREG32(mmCC_RB_BACKEND_DISABLE);
+			adev->gfx.config.rb_config[i][j].user_rb_backend_disable =
+				RREG32(mmGC_USER_RB_BACKEND_DISABLE);
+			adev->gfx.config.rb_config[i][j].raster_config =
+				RREG32(mmPA_SC_RASTER_CONFIG);
+		}
 	}
 	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
@@ -1540,36 +1525,44 @@ static void gmc_v6_0_init_compute_vmid(struct amdgpu_device *adev)
 }
 */
 
-static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev, u32 cu_per_sh)
+static void gfx_v6_0_set_user_cu_inactive_bitmap(struct amdgpu_device *adev,
+						 u32 bitmap)
 {
-	u32 data, mask;
+	u32 data;
 
-	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
-	data &= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
-	data |= RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+	if (!bitmap)
+		return;
 
-	data >>= CC_GC_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data = bitmap << GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS__SHIFT;
+	data &= GC_USER_SHADER_ARRAY_CONFIG__INACTIVE_CUS_MASK;
 
-	mask = gfx_v6_0_create_bitmask(cu_per_sh);
+	WREG32(mmGC_USER_SHADER_ARRAY_CONFIG, data);
+}
 
-	return ~data & mask;
+static u32 gfx_v6_0_get_cu_enabled(struct amdgpu_device *adev)
+{
+	u32 data, mask;
+
+	data = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG) |
+		RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
+
+	mask = gfx_v6_0_create_bitmask(adev->gfx.config.max_cu_per_sh);
+	return ~REG_GET_FIELD(data, CC_GC_SHADER_ARRAY_CONFIG, INACTIVE_CUS) & mask;
 }
 
 
-static void gfx_v6_0_setup_spi(struct amdgpu_device *adev,
-			 u32 se_num, u32 sh_per_se,
-			 u32 cu_per_sh)
+static void gfx_v6_0_setup_spi(struct amdgpu_device *adev)
 {
 	int i, j, k;
 	u32 data, mask;
 	u32 active_cu = 0;
 
 	mutex_lock(&adev->grbm_idx_mutex);
-	for (i = 0; i < se_num; i++) {
-		for (j = 0; j < sh_per_se; j++) {
+	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
+		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
 			data = RREG32(mmSPI_STATIC_THREAD_MGMT_3);
-			active_cu = gfx_v6_0_get_cu_enabled(adev, cu_per_sh);
+			active_cu = gfx_v6_0_get_cu_enabled(adev);
 
 			mask = 1;
 			for (k = 0; k < 16; k++) {
@@ -1717,6 +1710,9 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
 		gb_addr_config |= 2 << GB_ADDR_CONFIG__ROW_SIZE__SHIFT;
 		break;
 	}
+	gb_addr_config &= ~GB_ADDR_CONFIG__NUM_SHADER_ENGINES_MASK;
+	if (adev->gfx.config.max_shader_engines == 2)
+		gb_addr_config |= 1 << GB_ADDR_CONFIG__NUM_SHADER_ENGINES__SHIFT;
 	adev->gfx.config.gb_addr_config = gb_addr_config;
 
 	WREG32(mmGB_ADDR_CONFIG, gb_addr_config);
@@ -1735,13 +1731,9 @@ static void gfx_v6_0_gpu_init(struct amdgpu_device *adev)
 #endif
 	gfx_v6_0_tiling_mode_table_init(adev);
 
-	gfx_v6_0_setup_rb(adev, adev->gfx.config.max_shader_engines,
-		    adev->gfx.config.max_sh_per_se,
-		    adev->gfx.config.max_backends_per_se);
+	gfx_v6_0_setup_rb(adev);
 
-	gfx_v6_0_setup_spi(adev, adev->gfx.config.max_shader_engines,
-		     adev->gfx.config.max_sh_per_se,
-		     adev->gfx.config.max_cu_per_sh);
+	gfx_v6_0_setup_spi(adev);
 
 	gfx_v6_0_get_cu_info(adev);
 
@@ -2941,61 +2933,16 @@ static void gfx_v6_0_enable_gfx_cgpg(struct amdgpu_device *adev,
 	}
 }
 
-static u32 gfx_v6_0_get_cu_active_bitmap(struct amdgpu_device *adev,
-					 u32 se, u32 sh)
-{
-
-	u32 mask = 0, tmp, tmp1;
-	int i;
-
-	mutex_lock(&adev->grbm_idx_mutex);
-	gfx_v6_0_select_se_sh(adev, se, sh, 0xffffffff);
-	tmp = RREG32(mmCC_GC_SHADER_ARRAY_CONFIG);
-	tmp1 = RREG32(mmGC_USER_SHADER_ARRAY_CONFIG);
-	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
-
-	tmp &= 0xffff0000;
-
-	tmp |= tmp1;
-	tmp >>= 16;
-
-	for (i = 0; i < adev->gfx.config.max_cu_per_sh; i ++) {
-		mask <<= 1;
-		mask |= 1;
-	}
-
-	return (~tmp) & mask;
-}
-
 static void gfx_v6_0_init_ao_cu_mask(struct amdgpu_device *adev)
 {
-	u32 i, j, k, active_cu_number = 0;
-
-	u32 mask, counter, cu_bitmap;
-	u32 tmp = 0;
-
-	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
-			mask = 1;
-			cu_bitmap = 0;
-			counter  = 0;
-			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k++) {
-				if (gfx_v6_0_get_cu_active_bitmap(adev, i, j) & mask) {
-					if (counter < 2)
-						cu_bitmap |= mask;
-					counter++;
-				}
-				mask <<= 1;
-			}
+	u32 tmp;
 
-			active_cu_number += counter;
-			tmp |= (cu_bitmap << (i * 16 + j * 8));
-		}
-	}
+	WREG32(mmRLC_PG_ALWAYS_ON_CU_MASK, adev->gfx.cu_info.ao_cu_mask);
 
-	WREG32(mmRLC_PG_AO_CU_MASK, tmp);
-	WREG32_FIELD(RLC_MAX_PG_CU, MAX_POWERED_UP_CU, active_cu_number);
+	tmp = RREG32(mmRLC_MAX_PG_CU);
+	tmp &= ~RLC_MAX_PG_CU__MAX_POWERED_UP_CU_MASK;
+	tmp |= (adev->gfx.cu_info.number << RLC_MAX_PG_CU__MAX_POWERED_UP_CU__SHIFT);
+	WREG32(mmRLC_MAX_PG_CU, tmp);
 }
 
 static void gfx_v6_0_enable_gfx_static_mgpg(struct amdgpu_device *adev,
@@ -3770,18 +3717,26 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 	int i, j, k, counter, active_cu_number = 0;
 	u32 mask, bitmap, ao_bitmap, ao_cu_mask = 0;
 	struct amdgpu_cu_info *cu_info = &adev->gfx.cu_info;
+	unsigned disable_masks[4 * 2];
 
 	memset(cu_info, 0, sizeof(*cu_info));
 
+	amdgpu_gfx_parse_disable_cu(disable_masks, 4, 2);
+
+	mutex_lock(&adev->grbm_idx_mutex);
 	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
 		for (j = 0; j < adev->gfx.config.max_sh_per_se; j++) {
 			mask = 1;
 			ao_bitmap = 0;
 			counter = 0;
-			bitmap = gfx_v6_0_get_cu_active_bitmap(adev, i, j);
+			gfx_v6_0_select_se_sh(adev, i, j, 0xffffffff);
+			if (i < 4 && j < 2)
+				gfx_v6_0_set_user_cu_inactive_bitmap(
+					adev, disable_masks[i * 2 + j]);
+			bitmap = gfx_v6_0_get_cu_enabled(adev);
 			cu_info->bitmap[i][j] = bitmap;
 
-			for (k = 0; k < adev->gfx.config.max_cu_per_sh; k ++) {
+			for (k = 0; k < 16; k++) {
 				if (bitmap & mask) {
 					if (counter < 2)
 						ao_bitmap |= mask;
@@ -3794,6 +3749,9 @@ static void gfx_v6_0_get_cu_info(struct amdgpu_device *adev)
 		}
 	}
 
+	gfx_v6_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
 	cu_info->number = active_cu_number;
 	cu_info->ao_cu_mask = ao_cu_mask;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
index e3589b55a1e1..1f9354541f29 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@@ -1983,6 +1983,14 @@ static void gfx_v7_0_gpu_init(struct amdgpu_device *adev)
 	WREG32(mmPA_CL_ENHANCE, PA_CL_ENHANCE__CLIP_VTX_REORDER_ENA_MASK |
 			(3 << PA_CL_ENHANCE__NUM_CLIP_SEQ__SHIFT));
 	WREG32(mmPA_SC_ENHANCE, PA_SC_ENHANCE__ENABLE_PA_SC_OUT_OF_ORDER_MASK);
+
+	tmp = RREG32(mmSPI_ARB_PRIORITY);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
+	WREG32(mmSPI_ARB_PRIORITY, tmp);
+
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 	udelay(50);
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 35f9cd83b821..67afc901905c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -3898,6 +3898,14 @@ static void gfx_v8_0_gpu_init(struct amdgpu_device *adev)
 			PA_SC_FIFO_SIZE__SC_HIZ_TILE_FIFO_SIZE__SHIFT) |
 		   (adev->gfx.config.sc_earlyz_tile_fifo_size <<
 			PA_SC_FIFO_SIZE__SC_EARLYZ_TILE_FIFO_SIZE__SHIFT));
+
+	tmp = RREG32(mmSPI_ARB_PRIORITY);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS0, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS1, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS2, 2);
+	tmp = REG_SET_FIELD(tmp, SPI_ARB_PRIORITY, PIPE_ORDER_TS3, 2);
+	WREG32(mmSPI_ARB_PRIORITY, tmp);
+
 	mutex_unlock(&adev->grbm_idx_mutex);
 
 }
@@ -7260,7 +7268,7 @@ static void gfx_v8_0_ring_emit_ce_meta_init(struct amdgpu_ring *ring, uint64_t c
 	static union {
 		struct amdgpu_ce_ib_state regular;
 		struct amdgpu_ce_ib_state_chained_ib chained;
-	} ce_payload = {0};
+	} ce_payload = {};
 
 	if (ring->adev->virt.chained_ib_support) {
 		ce_payload_addr = csa_addr + offsetof(struct amdgpu_gfx_meta_data_chained_ib, ce_payload);
@@ -7287,7 +7295,7 @@ static void gfx_v8_0_ring_emit_de_meta_init(struct amdgpu_ring *ring, uint64_t c
 	static union {
 		struct amdgpu_de_ib_state regular;
 		struct amdgpu_de_ib_state_chained_ib chained;
-	} de_payload = {0};
+	} de_payload = {};
 
 	gds_addr = csa_addr + 4096;
 	if (ring->adev->virt.chained_ib_support) {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index e2b0b1646f99..0635829b18cf 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -254,6 +254,9 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 	}
 	WREG32(mmHDP_REG_COHERENCY_FLUSH_CNTL, 0);
 
+	if (adev->mode_info.num_crtc)
+		amdgpu_display_set_vga_render_state(adev, false);
+
 	gmc_v6_0_mc_stop(adev, &save);
 
 	if (gmc_v6_0_wait_for_idle((void *)adev)) {
@@ -283,7 +286,6 @@ static void gmc_v6_0_mc_program(struct amdgpu_device *adev)
 		dev_warn(adev->dev, "Wait for MC idle timedout !\n");
 	}
 	gmc_v6_0_mc_resume(adev, &save);
-	amdgpu_display_set_vga_render_state(adev, false);
 }
 
 static int gmc_v6_0_mc_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
index 8785ca570729..f5a343cb0010 100644
--- a/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/kv_dpm.c
@@ -1550,11 +1550,6 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
 
 	if (amdgpu_new_state->evclk > 0 && amdgpu_current_state->evclk == 0) {
 		kv_dpm_powergate_vce(adev, false);
-		/* turn the clocks on when encoding */
-		ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						    AMD_CG_STATE_UNGATE);
-		if (ret)
-			return ret;
 		if (pi->caps_stable_p_state)
 			pi->vce_boot_level = table->count - 1;
 		else
@@ -1573,15 +1568,9 @@ static int kv_update_vce_dpm(struct amdgpu_device *adev,
 			amdgpu_kv_send_msg_to_smc_with_parameter(adev,
 							  PPSMC_MSG_VCEDPM_SetEnabledMask,
 							  (1 << pi->vce_boot_level));
-
 		kv_enable_vce_dpm(adev, true);
 	} else if (amdgpu_new_state->evclk == 0 && amdgpu_current_state->evclk > 0) {
 		kv_enable_vce_dpm(adev, false);
-		/* turn the clocks off when not encoding */
-		ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-						    AMD_CG_STATE_GATE);
-		if (ret)
-			return ret;
 		kv_dpm_powergate_vce(adev, true);
 	}
 
@@ -1688,70 +1677,44 @@ static void kv_dpm_powergate_uvd(struct amdgpu_device *adev, bool gate)
 	struct kv_power_info *pi = kv_get_pi(adev);
 	int ret;
 
-	if (pi->uvd_power_gated == gate)
-		return;
-
 	pi->uvd_power_gated = gate;
 
 	if (gate) {
-		if (pi->caps_uvd_pg) {
-			/* disable clockgating so we can properly shut down the block */
-			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_UNGATE);
-			/* shutdown the UVD block */
-			ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_PG_STATE_GATE);
-			/* XXX: check for errors */
-		}
+		/* stop the UVD block */
+		ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							AMD_PG_STATE_GATE);
 		kv_update_uvd_dpm(adev, gate);
 		if (pi->caps_uvd_pg)
 			/* power off the UVD block */
 			amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerOFF);
 	} else {
-		if (pi->caps_uvd_pg) {
+		if (pi->caps_uvd_pg)
 			/* power on the UVD block */
 			amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_UVDPowerON);
 			/* re-init the UVD block */
-			ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_PG_STATE_UNGATE);
-			/* enable clockgating. hw will dynamically gate/ungate clocks on the fly */
-			ret = amdgpu_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
-							    AMD_CG_STATE_GATE);
-			/* XXX: check for errors */
-		}
 		kv_update_uvd_dpm(adev, gate);
+
+		ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_UVD,
+							AMD_PG_STATE_UNGATE);
 	}
 }
 
 static void kv_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 {
 	struct kv_power_info *pi = kv_get_pi(adev);
-	int ret;
 
 	if (pi->vce_power_gated == gate)
 		return;
 
 	pi->vce_power_gated = gate;
 
-	if (gate) {
-		if (pi->caps_vce_pg) {
-			/* shutdown the VCE block */
-			ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_PG_STATE_GATE);
-			/* XXX: check for errors */
-			/* power off the VCE block */
-			amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
-		}
-	} else {
-		if (pi->caps_vce_pg) {
-			/* power on the VCE block */
-			amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
-			/* re-init the VCE block */
-			ret = amdgpu_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCE,
-							    AMD_PG_STATE_UNGATE);
-			/* XXX: check for errors */
-		}
-	}
+	if (!pi->caps_vce_pg)
+		return;
+
+	if (gate)
+		amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerOFF);
+	else
+		amdgpu_kv_notify_message_to_smu(adev, PPSMC_MSG_VCEPowerON);
 }
 
 static void kv_dpm_powergate_samu(struct amdgpu_device *adev, bool gate)
@@ -3009,8 +2972,7 @@ static int kv_dpm_late_init(void *handle)
 
 	kv_dpm_powergate_acp(adev, true);
 	kv_dpm_powergate_samu(adev, true);
-	kv_dpm_powergate_vce(adev, true);
-	kv_dpm_powergate_uvd(adev, true);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/si.c b/drivers/gpu/drm/amd/amdgpu/si.c
index da46992f7b18..b71e3faa40db 100644
--- a/drivers/gpu/drm/amd/amdgpu/si.c
+++ b/drivers/gpu/drm/amd/amdgpu/si.c
@@ -1010,24 +1010,81 @@ static struct amdgpu_allowed_register_entry si_allowed_read_registers[] = {
 	{PA_SC_RASTER_CONFIG, false, true},
 };
 
-static uint32_t si_read_indexed_register(struct amdgpu_device *adev,
-					  u32 se_num, u32 sh_num,
-					  u32 reg_offset)
+static uint32_t si_get_register_value(struct amdgpu_device *adev,
+				      bool indexed, u32 se_num,
+				      u32 sh_num, u32 reg_offset)
 {
-	uint32_t val;
+	if (indexed) {
+		uint32_t val;
+		unsigned se_idx = (se_num == 0xffffffff) ? 0 : se_num;
+		unsigned sh_idx = (sh_num == 0xffffffff) ? 0 : sh_num;
+
+		switch (reg_offset) {
+		case mmCC_RB_BACKEND_DISABLE:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].rb_backend_disable;
+		case mmGC_USER_RB_BACKEND_DISABLE:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].user_rb_backend_disable;
+		case mmPA_SC_RASTER_CONFIG:
+			return adev->gfx.config.rb_config[se_idx][sh_idx].raster_config;
+		}
 
-	mutex_lock(&adev->grbm_idx_mutex);
-	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
+		mutex_lock(&adev->grbm_idx_mutex);
+		if (se_num != 0xffffffff || sh_num != 0xffffffff)
+			amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);
 
-	val = RREG32(reg_offset);
+		val = RREG32(reg_offset);
 
-	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-	mutex_unlock(&adev->grbm_idx_mutex);
-	return val;
+		if (se_num != 0xffffffff || sh_num != 0xffffffff)
+			amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+		return val;
+	} else {
+		unsigned idx;
+
+		switch (reg_offset) {
+		case mmGB_ADDR_CONFIG:
+			return adev->gfx.config.gb_addr_config;
+		case mmMC_ARB_RAMCFG:
+			return adev->gfx.config.mc_arb_ramcfg;
+		case mmGB_TILE_MODE0:
+		case mmGB_TILE_MODE1:
+		case mmGB_TILE_MODE2:
+		case mmGB_TILE_MODE3:
+		case mmGB_TILE_MODE4:
+		case mmGB_TILE_MODE5:
+		case mmGB_TILE_MODE6:
+		case mmGB_TILE_MODE7:
+		case mmGB_TILE_MODE8:
+		case mmGB_TILE_MODE9:
+		case mmGB_TILE_MODE10:
+		case mmGB_TILE_MODE11:
+		case mmGB_TILE_MODE12:
+		case mmGB_TILE_MODE13:
+		case mmGB_TILE_MODE14:
+		case mmGB_TILE_MODE15:
+		case mmGB_TILE_MODE16:
+		case mmGB_TILE_MODE17:
+		case mmGB_TILE_MODE18:
+		case mmGB_TILE_MODE19:
+		case mmGB_TILE_MODE20:
+		case mmGB_TILE_MODE21:
+		case mmGB_TILE_MODE22:
+		case mmGB_TILE_MODE23:
+		case mmGB_TILE_MODE24:
+		case mmGB_TILE_MODE25:
+		case mmGB_TILE_MODE26:
+		case mmGB_TILE_MODE27:
+		case mmGB_TILE_MODE28:
+		case mmGB_TILE_MODE29:
+		case mmGB_TILE_MODE30:
+		case mmGB_TILE_MODE31:
+			idx = (reg_offset - mmGB_TILE_MODE0);
+			return adev->gfx.config.tile_mode_array[idx];
+		default:
+			return RREG32(reg_offset);
+		}
+	}
 }
-
 static int si_read_register(struct amdgpu_device *adev, u32 se_num,
 			     u32 sh_num, u32 reg_offset, u32 *value)
 {
@@ -1039,10 +1096,9 @@ static int si_read_register(struct amdgpu_device *adev, u32 se_num,
 			continue;
 
 		if (!si_allowed_read_registers[i].untouched)
-			*value = si_allowed_read_registers[i].grbm_indexed ?
-				 si_read_indexed_register(adev, se_num,
-							   sh_num, reg_offset) :
-				 RREG32(reg_offset);
+			*value = si_get_register_value(adev,
+						si_allowed_read_registers[i].grbm_indexed,
+						se_num, sh_num, reg_offset);
 		return 0;
 	}
 	return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_enums.h b/drivers/gpu/drm/amd/amdgpu/si_enums.h
index fde2086246fa..dc9e0e6b4558 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_enums.h
+++ b/drivers/gpu/drm/amd/amdgpu/si_enums.h
@@ -143,8 +143,8 @@
 #define RLC_CLEAR_STATE_DESCRIPTOR_OFFSET    0x3D
 
 #define TAHITI_GB_ADDR_CONFIG_GOLDEN        0x12011003
-#define VERDE_GB_ADDR_CONFIG_GOLDEN         0x12010002
-#define HAINAN_GB_ADDR_CONFIG_GOLDEN        0x02010001
+#define VERDE_GB_ADDR_CONFIG_GOLDEN         0x02010002
+#define HAINAN_GB_ADDR_CONFIG_GOLDEN        0x02011003
 
 #define PACKET3(op, n)  ((RADEON_PACKET_TYPE3 << 30) |                  \
                          (((op) & 0xFF) << 8) |                         \
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 7fb9137dd89b..b34cefc7ebd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -159,9 +159,6 @@ static int uvd_v4_2_hw_init(void *handle)
 
 	uvd_v4_2_enable_mgcg(adev, true);
 	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
-	r = uvd_v4_2_start(adev);
-	if (r)
-		goto done;
 
 	ring->ready = true;
 	r = amdgpu_ring_test_ring(ring);
@@ -198,7 +195,6 @@ static int uvd_v4_2_hw_init(void *handle)
 	amdgpu_ring_commit(ring);
 
 done:
-
 	if (!r)
 		DRM_INFO("UVD initialized successfully.\n");
 
@@ -217,7 +213,9 @@ static int uvd_v4_2_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring = &adev->uvd.ring;
 
-	uvd_v4_2_stop(adev);
+	if (RREG32(mmUVD_STATUS) != 0)
+		uvd_v4_2_stop(adev);
+
 	ring->ready = false;
 
 	return 0;
@@ -267,37 +265,26 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring = &adev->uvd.ring;
 	uint32_t rb_bufsz;
 	int i, j, r;
+	u32 tmp;
 	/* disable byte swapping */
 	u32 lmi_swap_cntl = 0;
 	u32 mp_swap_cntl = 0;
 
-	WREG32(mmUVD_CGC_GATE, 0);
-	uvd_v4_2_set_dcm(adev, true);
-
-	uvd_v4_2_mc_resume(adev);
+	/* set uvd busy */
+	WREG32_P(mmUVD_STATUS, 1<<2, ~(1<<2));
 
-	/* disable interupt */
-	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
-
-	/* Stall UMC and register bus before resetting VCPU */
-	WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
-	mdelay(1);
-
-	/* put LMI, VCPU, RBC etc... into reset */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK | UVD_SOFT_RESET__LBSI_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__RBC_SOFT_RESET_MASK | UVD_SOFT_RESET__CSM_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__CXW_SOFT_RESET_MASK | UVD_SOFT_RESET__TAP_SOFT_RESET_MASK |
-		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
-	mdelay(5);
+	uvd_v4_2_set_dcm(adev, true);
+	WREG32(mmUVD_CGC_GATE, 0);
 
 	/* take UVD block out of reset */
 	WREG32_P(mmSRBM_SOFT_RESET, 0, ~SRBM_SOFT_RESET__SOFT_RESET_UVD_MASK);
 	mdelay(5);
 
-	/* initialize UVD memory controller */
-	WREG32(mmUVD_LMI_CTRL, 0x40 | (1 << 8) | (1 << 13) |
-			     (1 << 21) | (1 << 9) | (1 << 20));
+	/* enable VCPU clock */
+	WREG32(mmUVD_VCPU_CNTL,  1 << 9);
+
+	/* disable interupt */
+	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
 
 #ifdef __BIG_ENDIAN
 	/* swap (8 in 32) RB and IB */
@@ -306,6 +293,11 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
 #endif
 	WREG32(mmUVD_LMI_SWAP_CNTL, lmi_swap_cntl);
 	WREG32(mmUVD_MP_SWAP_CNTL, mp_swap_cntl);
+	/* initialize UVD memory controller */
+	WREG32(mmUVD_LMI_CTRL, 0x203108);
+
+	tmp = RREG32(mmUVD_MPC_CNTL);
+	WREG32(mmUVD_MPC_CNTL, tmp | 0x10);
 
 	WREG32(mmUVD_MPC_SET_MUXA0, 0x40c2040);
 	WREG32(mmUVD_MPC_SET_MUXA1, 0x0);
@@ -314,18 +306,20 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
 	WREG32(mmUVD_MPC_SET_ALU, 0);
 	WREG32(mmUVD_MPC_SET_MUX, 0x88);
 
-	/* take all subblocks out of reset, except VCPU */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-	mdelay(5);
+	uvd_v4_2_mc_resume(adev);
 
-	/* enable VCPU clock */
-	WREG32(mmUVD_VCPU_CNTL,  1 << 9);
+	tmp = RREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL);
+	WREG32_UVD_CTX(ixUVD_LMI_CACHE_CTRL, tmp & (~0x10));
 
 	/* enable UMC */
 	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
 
-	/* boot up the VCPU */
-	WREG32(mmUVD_SOFT_RESET, 0);
+	WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_SOFT_RESET_MASK);
+
+	WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+	WREG32_P(mmUVD_SOFT_RESET, 0, ~UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
+
 	mdelay(10);
 
 	for (i = 0; i < 10; ++i) {
@@ -357,6 +351,8 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
 	/* enable interupt */
 	WREG32_P(mmUVD_MASTINT_EN, 3<<1, ~(3 << 1));
 
+	WREG32_P(mmUVD_STATUS, 0, ~(1<<2));
+
 	/* force RBC into idle state */
 	WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
 
@@ -393,22 +389,57 @@ static int uvd_v4_2_start(struct amdgpu_device *adev)
  */
 static void uvd_v4_2_stop(struct amdgpu_device *adev)
 {
-	/* force RBC into idle state */
+	uint32_t i, j;
+	uint32_t status;
+
 	WREG32(mmUVD_RBC_RB_CNTL, 0x11010101);
 
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(mmUVD_STATUS);
+			if (status & 2)
+				break;
+			mdelay(1);
+		}
+		if (status & 2)
+			break;
+	}
+
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(mmUVD_LMI_STATUS);
+			if (status & 0xf)
+				break;
+			mdelay(1);
+		}
+		if (status & 0xf)
+			break;
+	}
+
 	/* Stall UMC and register bus before resetting VCPU */
 	WREG32_P(mmUVD_LMI_CTRL2, 1 << 8, ~(1 << 8));
-	mdelay(1);
 
-	/* put VCPU into reset */
-	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK);
-	mdelay(5);
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(mmUVD_LMI_STATUS);
+			if (status & 0x240)
+				break;
+			mdelay(1);
+		}
+		if (status & 0x240)
+			break;
+	}
 
-	/* disable VCPU clock */
-	WREG32(mmUVD_VCPU_CNTL, 0x0);
+	WREG32_P(0x3D49, 0, ~(1 << 2));
 
-	/* Unstall UMC and register bus */
-	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+	WREG32_P(mmUVD_VCPU_CNTL, 0, ~(1 << 9));
+
+	/* put LMI, VCPU, RBC etc... into reset */
+	WREG32(mmUVD_SOFT_RESET, UVD_SOFT_RESET__LMI_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__VCPU_SOFT_RESET_MASK |
+		UVD_SOFT_RESET__LMI_UMC_SOFT_RESET_MASK);
+
+	WREG32(mmUVD_STATUS, 0);
 
 	uvd_v4_2_set_dcm(adev, false);
 }
@@ -694,8 +725,26 @@ static int uvd_v4_2_set_powergating_state(void *handle,
 
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v4_2_stop(adev);
+		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+			if (!(RREG32_SMC(ixCURRENT_PG_STATUS) &
+				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK)) {
+				WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK   |
+							UVD_PGFSM_CONFIG__UVD_PGFSM_POWER_DOWN_MASK |
+							UVD_PGFSM_CONFIG__UVD_PGFSM_P1_SELECT_MASK));
+				mdelay(20);
+			}
+		}
 		return 0;
 	} else {
+		if (adev->pg_flags & AMD_PG_SUPPORT_UVD && amdgpu_dpm == 0) {
+			if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
+				WREG32(mmUVD_PGFSM_CONFIG, (UVD_PGFSM_CONFIG__UVD_PGFSM_FSM_ADDR_MASK   |
+						UVD_PGFSM_CONFIG__UVD_PGFSM_POWER_UP_MASK |
+						UVD_PGFSM_CONFIG__UVD_PGFSM_P1_SELECT_MASK));
+				mdelay(30);
+			}
+		}
 		return uvd_v4_2_start(adev);
 	}
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index 9b49824233ae..ad8c02e423d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -152,9 +152,9 @@ static int uvd_v5_0_hw_init(void *handle)
 	uint32_t tmp;
 	int r;
 
-	r = uvd_v5_0_start(adev);
-	if (r)
-		goto done;
+	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+	uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+	uvd_v5_0_enable_mgcg(adev, true);
 
 	ring->ready = true;
 	r = amdgpu_ring_test_ring(ring);
@@ -189,11 +189,13 @@ static int uvd_v5_0_hw_init(void *handle)
 	amdgpu_ring_write(ring, 3);
 
 	amdgpu_ring_commit(ring);
+
 done:
 	if (!r)
 		DRM_INFO("UVD initialized successfully.\n");
 
 	return r;
+
 }
 
 /**
@@ -208,7 +210,9 @@ static int uvd_v5_0_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring = &adev->uvd.ring;
 
-	uvd_v5_0_stop(adev);
+	if (RREG32(mmUVD_STATUS) != 0)
+		uvd_v5_0_stop(adev);
+
 	ring->ready = false;
 
 	return 0;
@@ -310,10 +314,6 @@ static int uvd_v5_0_start(struct amdgpu_device *adev)
 
 	uvd_v5_0_mc_resume(adev);
 
-	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
-	uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
-	uvd_v5_0_enable_mgcg(adev, true);
-
 	/* disable interupt */
 	WREG32_P(mmUVD_MASTINT_EN, 0, ~(1 << 1));
 
@@ -456,6 +456,8 @@ static void uvd_v5_0_stop(struct amdgpu_device *adev)
 
 	/* Unstall UMC and register bus */
 	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+	WREG32(mmUVD_STATUS, 0);
 }
 
 /**
@@ -792,9 +794,6 @@ static int uvd_v5_0_set_clockgating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 
-	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
-		return 0;
-
 	if (enable) {
 		/* wait for STATUS to clear */
 		if (uvd_v5_0_wait_for_idle(handle))
@@ -824,17 +823,12 @@ static int uvd_v5_0_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret = 0;
 
-	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
-		return 0;
-
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v5_0_stop(adev);
-		adev->uvd.is_powergated = true;
 	} else {
 		ret = uvd_v5_0_start(adev);
 		if (ret)
 			goto out;
-		adev->uvd.is_powergated = false;
 	}
 
 out:
@@ -848,7 +842,8 @@ static void uvd_v5_0_get_clockgating_state(void *handle, u32 *flags)
 
 	mutex_lock(&adev->pm.mutex);
 
-	if (adev->uvd.is_powergated) {
+	if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
 		DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
 		goto out;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index de7e03544d00..18a6de4e1512 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -155,9 +155,9 @@ static int uvd_v6_0_hw_init(void *handle)
 	uint32_t tmp;
 	int r;
 
-	r = uvd_v6_0_start(adev);
-	if (r)
-		goto done;
+	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
+	uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
+	uvd_v6_0_enable_mgcg(adev, true);
 
 	ring->ready = true;
 	r = amdgpu_ring_test_ring(ring);
@@ -212,7 +212,9 @@ static int uvd_v6_0_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring = &adev->uvd.ring;
 
-	uvd_v6_0_stop(adev);
+	if (RREG32(mmUVD_STATUS) != 0)
+		uvd_v6_0_stop(adev);
+
 	ring->ready = false;
 
 	return 0;
@@ -397,9 +399,6 @@ static int uvd_v6_0_start(struct amdgpu_device *adev)
 	lmi_swap_cntl = 0;
 	mp_swap_cntl = 0;
 
-	amdgpu_asic_set_uvd_clocks(adev, 10000, 10000);
-	uvd_v6_0_set_clockgating_state(adev, AMD_CG_STATE_UNGATE);
-	uvd_v6_0_enable_mgcg(adev, true);
 	uvd_v6_0_mc_resume(adev);
 
 	/* disable interupt */
@@ -554,6 +553,8 @@ static void uvd_v6_0_stop(struct amdgpu_device *adev)
 
 	/* Unstall UMC and register bus */
 	WREG32_P(mmUVD_LMI_CTRL2, 0, ~(1 << 8));
+
+	WREG32(mmUVD_STATUS, 0);
 }
 
 /**
@@ -1018,9 +1019,6 @@ static int uvd_v6_0_set_clockgating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
 
-	if (!(adev->cg_flags & AMD_CG_SUPPORT_UVD_MGCG))
-		return 0;
-
 	if (enable) {
 		/* wait for STATUS to clear */
 		if (uvd_v6_0_wait_for_idle(handle))
@@ -1049,19 +1047,14 @@ static int uvd_v6_0_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret = 0;
 
-	if (!(adev->pg_flags & AMD_PG_SUPPORT_UVD))
-		return 0;
-
 	WREG32(mmUVD_POWER_STATUS, UVD_POWER_STATUS__UVD_PG_EN_MASK);
 
 	if (state == AMD_PG_STATE_GATE) {
 		uvd_v6_0_stop(adev);
-		adev->uvd.is_powergated = true;
 	} else {
 		ret = uvd_v6_0_start(adev);
 		if (ret)
 			goto out;
-		adev->uvd.is_powergated = false;
 	}
 
 out:
@@ -1075,7 +1068,8 @@ static void uvd_v6_0_get_clockgating_state(void *handle, u32 *flags)
 
 	mutex_lock(&adev->pm.mutex);
 
-	if (adev->uvd.is_powergated) {
+	if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+				CURRENT_PG_STATUS__UVD_PG_STATUS_MASK) {
 		DRM_INFO("Cannot get clockgating state when UVD is powergated.\n");
 		goto out;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 38ed903dd6f8..9ea99348e493 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -42,10 +42,9 @@
 #define VCE_V2_0_DATA_SIZE	(23552 * AMDGPU_MAX_VCE_HANDLES)
 #define VCE_STATUS_VCPU_REPORT_FW_LOADED_MASK	0x02
 
-static void vce_v2_0_mc_resume(struct amdgpu_device *adev);
 static void vce_v2_0_set_ring_funcs(struct amdgpu_device *adev);
 static void vce_v2_0_set_irq_funcs(struct amdgpu_device *adev);
-static int vce_v2_0_wait_for_idle(void *handle);
+
 /**
  * vce_v2_0_ring_get_rptr - get read pointer
  *
@@ -140,6 +139,86 @@ static int vce_v2_0_firmware_loaded(struct amdgpu_device *adev)
 	return -ETIMEDOUT;
 }
 
+static void vce_v2_0_disable_cg(struct amdgpu_device *adev)
+{
+	WREG32(mmVCE_CGTT_CLK_OVERRIDE, 7);
+}
+
+static void vce_v2_0_init_cg(struct amdgpu_device *adev)
+{
+	u32 tmp;
+
+	tmp = RREG32(mmVCE_CLOCK_GATING_A);
+	tmp &= ~0xfff;
+	tmp |= ((0 << 0) | (4 << 4));
+	tmp |= 0x40000;
+	WREG32(mmVCE_CLOCK_GATING_A, tmp);
+
+	tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+	tmp &= ~0xfff;
+	tmp |= ((0 << 0) | (4 << 4));
+	WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+	tmp = RREG32(mmVCE_CLOCK_GATING_B);
+	tmp |= 0x10;
+	tmp &= ~0x100000;
+	WREG32(mmVCE_CLOCK_GATING_B, tmp);
+}
+
+static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
+{
+	uint64_t addr = adev->vce.gpu_addr;
+	uint32_t size;
+
+	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
+	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
+	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
+	WREG32(mmVCE_CLOCK_GATING_B, 0xf7);
+
+	WREG32(mmVCE_LMI_CTRL, 0x00398000);
+	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
+	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
+	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
+	WREG32(mmVCE_LMI_VM_CTRL, 0);
+
+	addr += AMDGPU_VCE_FIRMWARE_OFFSET;
+	size = VCE_V2_0_FW_SIZE;
+	WREG32(mmVCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
+	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = VCE_V2_0_STACK_SIZE;
+	WREG32(mmVCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
+	WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = VCE_V2_0_DATA_SIZE;
+	WREG32(mmVCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
+	WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
+
+	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
+	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
+}
+
+static bool vce_v2_0_is_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
+}
+
+static int vce_v2_0_wait_for_idle(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	unsigned i;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		if (vce_v2_0_is_idle(handle))
+			return 0;
+	}
+	return -ETIMEDOUT;
+}
+
 /**
  * vce_v2_0_start - start VCE block
  *
@@ -152,11 +231,14 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring;
 	int r;
 
-	vce_v2_0_mc_resume(adev);
-
 	/* set BUSY flag */
 	WREG32_P(mmVCE_STATUS, 1, ~1);
 
+	vce_v2_0_init_cg(adev);
+	vce_v2_0_disable_cg(adev);
+
+	vce_v2_0_mc_resume(adev);
+
 	ring = &adev->vce.ring[0];
 	WREG32(mmVCE_RB_RPTR, ring->wptr);
 	WREG32(mmVCE_RB_WPTR, ring->wptr);
@@ -189,6 +271,145 @@ static int vce_v2_0_start(struct amdgpu_device *adev)
 	return 0;
 }
 
+static int vce_v2_0_stop(struct amdgpu_device *adev)
+{
+	int i, j;
+	int status;
+
+	if (vce_v2_0_lmi_clean(adev)) {
+		DRM_INFO("vce is not idle \n");
+		return 0;
+	}
+/*
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(mmVCE_FW_REG_STATUS);
+			if (!(status & 1))
+				break;
+			mdelay(1);
+		}
+		break;
+	}
+*/
+	if (vce_v2_0_wait_for_idle(adev)) {
+		DRM_INFO("VCE is busy, Can't set clock gateing");
+		return 0;
+	}
+
+	/* Stall UMC and register bus before resetting VCPU */
+	WREG32_P(mmVCE_LMI_CTRL2, 1 << 8, ~(1 << 8));
+
+	for (i = 0; i < 10; ++i) {
+		for (j = 0; j < 100; ++j) {
+			status = RREG32(mmVCE_LMI_STATUS);
+			if (status & 0x240)
+				break;
+			mdelay(1);
+		}
+		break;
+	}
+
+	WREG32_P(mmVCE_VCPU_CNTL, 0, ~0x80001);
+
+	/* put LMI, VCPU, RBC etc... into reset */
+	WREG32_P(mmVCE_SOFT_RESET, 1, ~0x1);
+
+	WREG32(mmVCE_STATUS, 0);
+
+	return 0;
+}
+
+static void vce_v2_0_set_sw_cg(struct amdgpu_device *adev, bool gated)
+{
+	u32 tmp;
+
+	if (gated) {
+		tmp = RREG32(mmVCE_CLOCK_GATING_B);
+		tmp |= 0xe70000;
+		WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+		tmp |= 0xff000000;
+		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+		tmp &= ~0x3fc;
+		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+
+		WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
+	} else {
+		tmp = RREG32(mmVCE_CLOCK_GATING_B);
+		tmp |= 0xe7;
+		tmp &= ~0xe70000;
+		WREG32(mmVCE_CLOCK_GATING_B, tmp);
+
+		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+		tmp |= 0x1fe000;
+		tmp &= ~0xff000000;
+		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+		tmp |= 0x3fc;
+		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+	}
+}
+
+static void vce_v2_0_set_dyn_cg(struct amdgpu_device *adev, bool gated)
+{
+	u32 orig, tmp;
+
+/* LMI_MC/LMI_UMC always set in dynamic,
+ * set {CGC_*_GATE_MODE, CGC_*_SW_GATE} = {0, 0}
+ */
+	tmp = RREG32(mmVCE_CLOCK_GATING_B);
+	tmp &= ~0x00060006;
+
+/* Exception for ECPU, IH, SEM, SYS blocks needs to be turned on/off by SW */
+	if (gated) {
+		tmp |= 0xe10000;
+		WREG32(mmVCE_CLOCK_GATING_B, tmp);
+	} else {
+		tmp |= 0xe1;
+		tmp &= ~0xe10000;
+		WREG32(mmVCE_CLOCK_GATING_B, tmp);
+	}
+
+	orig = tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
+	tmp &= ~0x1fe000;
+	tmp &= ~0xff000000;
+	if (tmp != orig)
+		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
+
+	orig = tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
+	tmp &= ~0x3fc;
+	if (tmp != orig)
+		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
+
+	/* set VCE_UENC_REG_CLOCK_GATING always in dynamic mode */
+	WREG32(mmVCE_UENC_REG_CLOCK_GATING, 0x00);
+
+	if(gated)
+		WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
+}
+
+static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable,
+								bool sw_cg)
+{
+	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
+		if (sw_cg)
+			vce_v2_0_set_sw_cg(adev, true);
+		else
+			vce_v2_0_set_dyn_cg(adev, true);
+	} else {
+		vce_v2_0_disable_cg(adev);
+
+		if (sw_cg)
+			vce_v2_0_set_sw_cg(adev, false);
+		else
+			vce_v2_0_set_dyn_cg(adev, false);
+	}
+}
+
 static int vce_v2_0_early_init(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -254,11 +475,8 @@ static int vce_v2_0_hw_init(void *handle)
 	int r, i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = vce_v2_0_start(adev);
-	/* this error mean vcpu not in running state, so just skip ring test, not stop driver initialize */
-	if (r)
-		return 0;
-
+	amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
+	vce_v2_0_enable_mgcg(adev, true, false);
 	for (i = 0; i < adev->vce.num_rings; i++)
 		adev->vce.ring[i].ready = false;
 
@@ -312,190 +530,6 @@ static int vce_v2_0_resume(void *handle)
 	return r;
 }
 
-static void vce_v2_0_set_sw_cg(struct amdgpu_device *adev, bool gated)
-{
-	u32 tmp;
-
-	if (gated) {
-		tmp = RREG32(mmVCE_CLOCK_GATING_B);
-		tmp |= 0xe70000;
-		WREG32(mmVCE_CLOCK_GATING_B, tmp);
-
-		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
-		tmp |= 0xff000000;
-		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
-
-		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
-		tmp &= ~0x3fc;
-		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
-
-		WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
-	} else {
-		tmp = RREG32(mmVCE_CLOCK_GATING_B);
-		tmp |= 0xe7;
-		tmp &= ~0xe70000;
-		WREG32(mmVCE_CLOCK_GATING_B, tmp);
-
-		tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
-		tmp |= 0x1fe000;
-		tmp &= ~0xff000000;
-		WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
-
-		tmp = RREG32(mmVCE_UENC_REG_CLOCK_GATING);
-		tmp |= 0x3fc;
-		WREG32(mmVCE_UENC_REG_CLOCK_GATING, tmp);
-	}
-}
-
-static void vce_v2_0_set_dyn_cg(struct amdgpu_device *adev, bool gated)
-{
-	if (vce_v2_0_wait_for_idle(adev)) {
-		DRM_INFO("VCE is busy, Can't set clock gateing");
-		return;
-	}
-
-	WREG32_P(mmVCE_LMI_CTRL2, 0x100, ~0x100);
-
-	if (vce_v2_0_lmi_clean(adev)) {
-		DRM_INFO("LMI is busy, Can't set clock gateing");
-		return;
-	}
-
-	WREG32_P(mmVCE_VCPU_CNTL, 0, ~VCE_VCPU_CNTL__CLK_EN_MASK);
-	WREG32_P(mmVCE_SOFT_RESET,
-		 VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK,
-		 ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-	WREG32(mmVCE_STATUS, 0);
-
-	if (gated)
-		WREG32(mmVCE_CGTT_CLK_OVERRIDE, 0);
-	/* LMI_MC/LMI_UMC always set in dynamic, set {CGC_*_GATE_MODE, CGC_*_SW_GATE} = {0, 0} */
-	if (gated) {
-		/* Force CLOCK OFF , set {CGC_*_GATE_MODE, CGC_*_SW_GATE} = {*, 1} */
-		WREG32(mmVCE_CLOCK_GATING_B, 0xe90010);
-	} else {
-		/* Force CLOCK ON, set {CGC_*_GATE_MODE, CGC_*_SW_GATE} = {1, 0} */
-		WREG32(mmVCE_CLOCK_GATING_B, 0x800f1);
-	}
-
-	/* Set VCE_UENC_CLOCK_GATING always in dynamic mode {*_FORCE_ON, *_FORCE_OFF} = {0, 0}*/;
-	WREG32(mmVCE_UENC_CLOCK_GATING, 0x40);
-
-	/* set VCE_UENC_REG_CLOCK_GATING always in dynamic mode */
-	WREG32(mmVCE_UENC_REG_CLOCK_GATING, 0x00);
-
-	WREG32_P(mmVCE_LMI_CTRL2, 0, ~0x100);
-	if(!gated) {
-		WREG32_P(mmVCE_VCPU_CNTL, VCE_VCPU_CNTL__CLK_EN_MASK, ~VCE_VCPU_CNTL__CLK_EN_MASK);
-		mdelay(100);
-		WREG32_P(mmVCE_SOFT_RESET, 0, ~VCE_SOFT_RESET__ECPU_SOFT_RESET_MASK);
-
-		vce_v2_0_firmware_loaded(adev);
-		WREG32_P(mmVCE_STATUS, 0, ~VCE_STATUS__JOB_BUSY_MASK);
-	}
-}
-
-static void vce_v2_0_disable_cg(struct amdgpu_device *adev)
-{
-	WREG32(mmVCE_CGTT_CLK_OVERRIDE, 7);
-}
-
-static void vce_v2_0_enable_mgcg(struct amdgpu_device *adev, bool enable)
-{
-	bool sw_cg = false;
-
-	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_VCE_MGCG)) {
-		if (sw_cg)
-			vce_v2_0_set_sw_cg(adev, true);
-		else
-			vce_v2_0_set_dyn_cg(adev, true);
-	} else {
-		vce_v2_0_disable_cg(adev);
-
-		if (sw_cg)
-			vce_v2_0_set_sw_cg(adev, false);
-		else
-			vce_v2_0_set_dyn_cg(adev, false);
-	}
-}
-
-static void vce_v2_0_init_cg(struct amdgpu_device *adev)
-{
-	u32 tmp;
-
-	tmp = RREG32(mmVCE_CLOCK_GATING_A);
-	tmp &= ~0xfff;
-	tmp |= ((0 << 0) | (4 << 4));
-	tmp |= 0x40000;
-	WREG32(mmVCE_CLOCK_GATING_A, tmp);
-
-	tmp = RREG32(mmVCE_UENC_CLOCK_GATING);
-	tmp &= ~0xfff;
-	tmp |= ((0 << 0) | (4 << 4));
-	WREG32(mmVCE_UENC_CLOCK_GATING, tmp);
-
-	tmp = RREG32(mmVCE_CLOCK_GATING_B);
-	tmp |= 0x10;
-	tmp &= ~0x100000;
-	WREG32(mmVCE_CLOCK_GATING_B, tmp);
-}
-
-static void vce_v2_0_mc_resume(struct amdgpu_device *adev)
-{
-	uint64_t addr = adev->vce.gpu_addr;
-	uint32_t size;
-
-	WREG32_P(mmVCE_CLOCK_GATING_A, 0, ~(1 << 16));
-	WREG32_P(mmVCE_UENC_CLOCK_GATING, 0x1FF000, ~0xFF9FF000);
-	WREG32_P(mmVCE_UENC_REG_CLOCK_GATING, 0x3F, ~0x3F);
-	WREG32(mmVCE_CLOCK_GATING_B, 0xf7);
-
-	WREG32(mmVCE_LMI_CTRL, 0x00398000);
-	WREG32_P(mmVCE_LMI_CACHE_CTRL, 0x0, ~0x1);
-	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
-	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
-	WREG32(mmVCE_LMI_VM_CTRL, 0);
-
-	addr += AMDGPU_VCE_FIRMWARE_OFFSET;
-	size = VCE_V2_0_FW_SIZE;
-	WREG32(mmVCE_VCPU_CACHE_OFFSET0, addr & 0x7fffffff);
-	WREG32(mmVCE_VCPU_CACHE_SIZE0, size);
-
-	addr += size;
-	size = VCE_V2_0_STACK_SIZE;
-	WREG32(mmVCE_VCPU_CACHE_OFFSET1, addr & 0x7fffffff);
-	WREG32(mmVCE_VCPU_CACHE_SIZE1, size);
-
-	addr += size;
-	size = VCE_V2_0_DATA_SIZE;
-	WREG32(mmVCE_VCPU_CACHE_OFFSET2, addr & 0x7fffffff);
-	WREG32(mmVCE_VCPU_CACHE_SIZE2, size);
-
-	WREG32_P(mmVCE_LMI_CTRL2, 0x0, ~0x100);
-	WREG32_FIELD(VCE_SYS_INT_EN, VCE_SYS_INT_TRAP_INTERRUPT_EN, 1);
-
-	vce_v2_0_init_cg(adev);
-}
-
-static bool vce_v2_0_is_idle(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	return !(RREG32(mmSRBM_STATUS2) & SRBM_STATUS2__VCE_BUSY_MASK);
-}
-
-static int vce_v2_0_wait_for_idle(void *handle)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	unsigned i;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		if (vce_v2_0_is_idle(handle))
-			return 0;
-	}
-	return -ETIMEDOUT;
-}
-
 static int vce_v2_0_soft_reset(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
@@ -539,33 +573,20 @@ static int vce_v2_0_process_interrupt(struct amdgpu_device *adev,
 	return 0;
 }
 
-static void vce_v2_0_set_bypass_mode(struct amdgpu_device *adev, bool enable)
-{
-	u32 tmp = RREG32_SMC(ixGCK_DFS_BYPASS_CNTL);
-
-	if (enable)
-		tmp |= GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-	else
-		tmp &= ~GCK_DFS_BYPASS_CNTL__BYPASSECLK_MASK;
-
-	WREG32_SMC(ixGCK_DFS_BYPASS_CNTL, tmp);
-}
-
-
 static int vce_v2_0_set_clockgating_state(void *handle,
 					  enum amd_clockgating_state state)
 {
 	bool gate = false;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	bool enable = (state == AMD_CG_STATE_GATE) ? true : false;
-
+	bool sw_cg = false;
 
-	vce_v2_0_set_bypass_mode(adev, enable);
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (state == AMD_CG_STATE_GATE)
+	if (state == AMD_CG_STATE_GATE) {
 		gate = true;
+		sw_cg = true;
+	}
 
-	vce_v2_0_enable_mgcg(adev, gate);
+	vce_v2_0_enable_mgcg(adev, gate, sw_cg);
 
 	return 0;
 }
@@ -582,12 +603,8 @@ static int vce_v2_0_set_powergating_state(void *handle,
 	 */
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
-		return 0;
-
 	if (state == AMD_PG_STATE_GATE)
-		/* XXX do we need a vce_v2_0_stop()? */
-		return 0;
+		return vce_v2_0_stop(adev);
 	else
 		return vce_v2_0_start(adev);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index 8db26559fd1b..93ec8815bb13 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -230,10 +230,6 @@ static int vce_v3_0_start(struct amdgpu_device *adev)
 	struct amdgpu_ring *ring;
 	int idx, r;
 
-	vce_v3_0_override_vce_clock_gating(adev, true);
-	if (!(adev->flags & AMD_IS_APU))
-		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
-
 	ring = &adev->vce.ring[0];
 	WREG32(mmVCE_RB_RPTR, ring->wptr);
 	WREG32(mmVCE_RB_WPTR, ring->wptr);
@@ -436,9 +432,9 @@ static int vce_v3_0_hw_init(void *handle)
 	int r, i;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
-	r = vce_v3_0_start(adev);
-	if (r)
-		return r;
+	vce_v3_0_override_vce_clock_gating(adev, true);
+	if (!(adev->flags & AMD_IS_APU))
+		amdgpu_asic_set_vce_clocks(adev, 10000, 10000);
 
 	for (i = 0; i < adev->vce.num_rings; i++)
 		adev->vce.ring[i].ready = false;
@@ -514,6 +510,8 @@ static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
 	WREG32(mmVCE_LMI_SWAP_CNTL, 0);
 	WREG32(mmVCE_LMI_SWAP_CNTL1, 0);
 	WREG32(mmVCE_LMI_VM_CTRL, 0);
+	WREG32_OR(mmVCE_VCPU_CNTL, 0x00100000);
+
 	if (adev->asic_type >= CHIP_STONEY) {
 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR0, (adev->vce.gpu_addr >> 8));
 		WREG32(mmVCE_LMI_VCPU_CACHE_40BIT_BAR1, (adev->vce.gpu_addr >> 8));
@@ -766,17 +764,14 @@ static int vce_v3_0_set_powergating_state(void *handle,
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	int ret = 0;
 
-	if (!(adev->pg_flags & AMD_PG_SUPPORT_VCE))
-		return 0;
-
 	if (state == AMD_PG_STATE_GATE) {
-		adev->vce.is_powergated = true;
-		/* XXX do we need a vce_v3_0_stop()? */
+		ret = vce_v3_0_stop(adev);
+		if (ret)
+			goto out;
 	} else {
 		ret = vce_v3_0_start(adev);
 		if (ret)
 			goto out;
-		adev->vce.is_powergated = false;
 	}
 
 out:
@@ -790,7 +785,8 @@ static void vce_v3_0_get_clockgating_state(void *handle, u32 *flags)
 
 	mutex_lock(&adev->pm.mutex);
 
-	if (adev->vce.is_powergated) {
+	if (RREG32_SMC(ixCURRENT_PG_STATUS) &
+			CURRENT_PG_STATUS__VCE_PG_STATUS_MASK) {
 		DRM_INFO("Cannot get clockgating state when VCE is powergated.\n");
 		goto out;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index 4922fff08c3c..50bdb24ef8d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -721,6 +721,7 @@ static int vi_gpu_pci_config_reset(struct amdgpu_device *adev)
 		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
 			/* enable BM */
 			pci_set_master(adev->pdev);
+			adev->has_hw_reset = true;
 			return 0;
 		}
 		udelay(1);
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 6a3470f84998..d1ce83d73a87 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -23,7 +23,7 @@
 #include <linux/mm_types.h>
 #include <linux/slab.h>
 #include <linux/types.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/uaccess.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
index d83de985e88c..6acc4313363e 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "cik_regs.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
index fa32c32fa1c2..a9b9882a9a77 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c
@@ -23,6 +23,8 @@
 
 #include <linux/printk.h>
 #include <linux/slab.h>
+#include <linux/mm_types.h>
+
 #include "kfd_priv.h"
 #include "kfd_mqd_manager.h"
 #include "vi_structs.h"
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
index ef7c8de7060e..84d1ffd1eef9 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c
@@ -23,6 +23,7 @@
 #include <linux/mutex.h>
 #include <linux/log2.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/amd-iommu.h>
 #include <linux/notifier.h>
@@ -262,7 +263,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn,
 	 * and because the mmu_notifier_unregister function also drop
 	 * mm_count we need to take an extra count here.
 	 */
-	atomic_inc(&p->mm->mm_count);
+	mmgrab(p->mm);
 	mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm);
 	mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed);
 }
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
index f9fd2ea4625b..dbc2e723f659 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_d.h
@@ -1310,5 +1310,6 @@
 #define ixROM_SW_DATA_62                                                        0xc060012c
 #define ixROM_SW_DATA_63                                                        0xc0600130
 #define ixROM_SW_DATA_64                                                        0xc0600134
+#define ixCURRENT_PG_STATUS                                                     0xc020029c
 
 #endif /* SMU_7_0_1_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
index 25882a4dea5d..34c6ff52710e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_0_1_sh_mask.h
@@ -5452,5 +5452,7 @@
 #define ROM_SW_DATA_63__ROM_SW_DATA__SHIFT 0x0
 #define ROM_SW_DATA_64__ROM_SW_DATA_MASK 0xffffffff
 #define ROM_SW_DATA_64__ROM_SW_DATA__SHIFT 0x0
+#define CURRENT_PG_STATUS__VCE_PG_STATUS_MASK 0x00000002
+#define CURRENT_PG_STATUS__UVD_PG_STATUS_MASK 0x00000004
 
 #endif /* SMU_7_0_1_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_d.h
index a9ef1562f43b..66597c64f525 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_d.h
@@ -1121,5 +1121,6 @@
 #define ixROM_SW_DATA_62                                                        0xc060011c
 #define ixROM_SW_DATA_63                                                        0xc0600120
 #define ixROM_SW_DATA_64                                                        0xc0600124
+#define ixCURRENT_PG_STATUS                                                     0xc020029c
 
 #endif /* SMU_7_1_1_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_sh_mask.h
index 2c997f7b5d13..fb06f2e2f6e6 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_1_sh_mask.h
@@ -4860,5 +4860,7 @@
 #define ROM_SW_DATA_63__ROM_SW_DATA__SHIFT 0x0
 #define ROM_SW_DATA_64__ROM_SW_DATA_MASK 0xffffffff
 #define ROM_SW_DATA_64__ROM_SW_DATA__SHIFT 0x0
+#define CURRENT_PG_STATUS__VCE_PG_STATUS_MASK 0x00000002
+#define CURRENT_PG_STATUS__UVD_PG_STATUS_MASK 0x00000004
 
 #endif /* SMU_7_1_1_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
index 22dd4c2b7290..4446d43d2a8f 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_d.h
@@ -1271,5 +1271,6 @@
 #define ixROM_SW_DATA_62                                                        0xc060011c
 #define ixROM_SW_DATA_63                                                        0xc0600120
 #define ixROM_SW_DATA_64                                                        0xc0600124
+#define ixCURRENT_PG_STATUS                                                     0xc020029c
 
 #endif /* SMU_7_1_2_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
index 518fd02e9d35..627906674fe8 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_2_sh_mask.h
@@ -5830,5 +5830,7 @@
 #define ROM_SW_DATA_63__ROM_SW_DATA__SHIFT 0x0
 #define ROM_SW_DATA_64__ROM_SW_DATA_MASK 0xffffffff
 #define ROM_SW_DATA_64__ROM_SW_DATA__SHIFT 0x0
+#define CURRENT_PG_STATUS__VCE_PG_STATUS_MASK 0x00000002
+#define CURRENT_PG_STATUS__UVD_PG_STATUS_MASK 0x00000004
 
 #endif /* SMU_7_1_2_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
index eca2b851f25f..0333d880bc9e 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_d.h
@@ -1244,5 +1244,5 @@
 #define ixGC_CAC_ACC_CU14                                                       0xc8
 #define ixGC_CAC_ACC_CU15                                                       0xc9
 #define ixGC_CAC_OVRD_CU                                                        0xe7
-
+#define ixCURRENT_PG_STATUS                                                     0xc020029c
 #endif /* SMU_7_1_3_D_H */
diff --git a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
index 1ede9e274714..654c1093d362 100644
--- a/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
+++ b/drivers/gpu/drm/amd/include/asic_reg/smu/smu_7_1_3_sh_mask.h
@@ -6076,5 +6076,8 @@
 #define GC_CAC_OVRD_CU__OVRRD_SELECT__SHIFT 0x0
 #define GC_CAC_OVRD_CU__OVRRD_VALUE_MASK 0xffff0000
 #define GC_CAC_OVRD_CU__OVRRD_VALUE__SHIFT 0x10
+#define CURRENT_PG_STATUS__VCE_PG_STATUS_MASK 0x00000002
+#define CURRENT_PG_STATUS__UVD_PG_STATUS_MASK 0x00000004
+
 
 #endif /* SMU_7_1_3_SH_MASK_H */
diff --git a/drivers/gpu/drm/amd/include/atombios.h b/drivers/gpu/drm/amd/include/atombios.h
index 4a4d3797a6d3..181a2c3c6362 100644
--- a/drivers/gpu/drm/amd/include/atombios.h
+++ b/drivers/gpu/drm/amd/include/atombios.h
@@ -188,7 +188,7 @@
 #define HW_ASSISTED_I2C_STATUS_FAILURE     2
 #define HW_ASSISTED_I2C_STATUS_SUCCESS     1
 
-#pragma pack(1)                                       // BIOS data must use byte aligment
+#pragma pack(1)                                       // BIOS data must use byte alignment
 
 // Define offset to location of ROM header.
 #define OFFSET_TO_POINTER_TO_ATOM_ROM_HEADER         0x00000048L
@@ -4361,7 +4361,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 // GPIO use to control PCIE_VDDC in certain SLT board
 #define PCIE_VDDC_CONTROL_GPIO_PINID        56
 
-//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC swithing feature is enable
+//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC switching feature is enable
 #define PP_AC_DC_SWITCH_GPIO_PINID          60
 //from SMU7.x, if ucGPIO_ID=VDDC_REGULATOR_VRHOT_GPIO_PINID in GPIO_LUTable, VRHot feature is enable
 #define VDDC_VRHOT_GPIO_PINID               61
@@ -9180,7 +9180,7 @@ typedef struct  _ATOM_POWERPLAY_INFO_V3
 
 /*********************************************************************************/
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 #pragma pack(1)
 
@@ -9211,7 +9211,7 @@ typedef struct _ATOM_SERVICE_INFO
 
 
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 //
 // AMD ACPI Table
diff --git a/drivers/gpu/drm/amd/include/cgs_common.h b/drivers/gpu/drm/amd/include/cgs_common.h
index 1d26ae768147..17b9d41f3e87 100644
--- a/drivers/gpu/drm/amd/include/cgs_common.h
+++ b/drivers/gpu/drm/amd/include/cgs_common.h
@@ -171,6 +171,7 @@ struct cgs_firmware_info {
 	uint32_t		ucode_start_address;
 
 	void			*kptr;
+	bool			is_kicker;
 };
 
 struct cgs_mode_info {
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
index 3eccac735db3..b33935fcf428 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/cz_clockpowergating.c
@@ -161,28 +161,25 @@ int cz_dpm_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
 
-	if (cz_hwmgr->uvd_power_gated == bgate)
-		return 0;
-
 	cz_hwmgr->uvd_power_gated = bgate;
 
 	if (bgate) {
-		cgs_set_clockgating_state(hwmgr->device,
-						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_CG_STATE_GATE);
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_GATE);
+		cgs_set_clockgating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_UVD,
+						AMD_CG_STATE_GATE);
 		cz_dpm_update_uvd_dpm(hwmgr, true);
 		cz_dpm_powerdown_uvd(hwmgr);
 	} else {
 		cz_dpm_powerup_uvd(hwmgr);
-		cgs_set_powergating_state(hwmgr->device,
-						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_CG_STATE_UNGATE);
 		cgs_set_clockgating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_UNGATE);
+		cgs_set_powergating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_UVD,
+						AMD_CG_STATE_UNGATE);
 		cz_dpm_update_uvd_dpm(hwmgr, false);
 	}
 
@@ -193,47 +190,34 @@ int cz_dpm_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct cz_hwmgr *cz_hwmgr = (struct cz_hwmgr *)(hwmgr->backend);
 
-	if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
-					PHM_PlatformCaps_VCEPowerGating)) {
-		if (cz_hwmgr->vce_power_gated != bgate) {
-			if (bgate) {
-				cgs_set_clockgating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_CG_STATE_GATE);
-				cgs_set_powergating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_GATE);
-				cz_enable_disable_vce_dpm(hwmgr, false);
-				cz_dpm_powerdown_vce(hwmgr);
-				cz_hwmgr->vce_power_gated = true;
-			} else {
-				cz_dpm_powerup_vce(hwmgr);
-				cz_hwmgr->vce_power_gated = false;
-				cgs_set_powergating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_CG_STATE_UNGATE);
-				cgs_set_clockgating_state(
-							hwmgr->device,
-							AMD_IP_BLOCK_TYPE_VCE,
-							AMD_PG_STATE_UNGATE);
-				cz_dpm_update_vce_dpm(hwmgr);
-				cz_enable_disable_vce_dpm(hwmgr, true);
-				return 0;
-			}
-		}
+	if (bgate) {
+		cgs_set_powergating_state(
+					hwmgr->device,
+					AMD_IP_BLOCK_TYPE_VCE,
+					AMD_PG_STATE_GATE);
+		cgs_set_clockgating_state(
+					hwmgr->device,
+					AMD_IP_BLOCK_TYPE_VCE,
+					AMD_CG_STATE_GATE);
+		cz_enable_disable_vce_dpm(hwmgr, false);
+		cz_dpm_powerdown_vce(hwmgr);
+		cz_hwmgr->vce_power_gated = true;
 	} else {
-		cz_hwmgr->vce_power_gated = bgate;
+		cz_dpm_powerup_vce(hwmgr);
+		cz_hwmgr->vce_power_gated = false;
+		cgs_set_clockgating_state(
+					hwmgr->device,
+					AMD_IP_BLOCK_TYPE_VCE,
+					AMD_PG_STATE_UNGATE);
+		cgs_set_powergating_state(
+					hwmgr->device,
+					AMD_IP_BLOCK_TYPE_VCE,
+					AMD_CG_STATE_UNGATE);
 		cz_dpm_update_vce_dpm(hwmgr);
-		cz_enable_disable_vce_dpm(hwmgr, !bgate);
+		cz_enable_disable_vce_dpm(hwmgr, true);
 		return 0;
 	}
 
-	if (!cz_hwmgr->vce_power_gated)
-		cz_dpm_update_vce_dpm(hwmgr);
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
index 4b0a94cc995e..953e0c9ad7cd 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.c
@@ -1396,3 +1396,25 @@ int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr,
 
 	return 0;
 }
+
+int  atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
+				uint8_t *svd_gpio_id, uint8_t *svc_gpio_id,
+				uint16_t *load_line)
+{
+	ATOM_VOLTAGE_OBJECT_INFO_V3_1 *voltage_info =
+		(ATOM_VOLTAGE_OBJECT_INFO_V3_1 *)get_voltage_info_table(hwmgr->device);
+
+	const ATOM_VOLTAGE_OBJECT_V3 *voltage_object;
+
+	PP_ASSERT_WITH_CODE((NULL != voltage_info),
+			"Could not find Voltage Table in BIOS.", return -EINVAL);
+
+	voltage_object = atomctrl_lookup_voltage_type_v3
+		(voltage_info, voltage_type,  VOLTAGE_OBJ_SVID2);
+
+	*svd_gpio_id = voltage_object->asSVID2Obj.ucSVDGpioId;
+	*svc_gpio_id = voltage_object->asSVID2Obj.ucSVCGpioId;
+	*load_line = voltage_object->asSVID2Obj.usLoadLine_PSI;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
index fc898afce002..e9fe2e84006b 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/ppatomctrl.h
@@ -311,5 +311,8 @@ extern int atomctrl_get_smc_sclk_range_table(struct pp_hwmgr *hwmgr, struct pp_a
 
 extern int atomctrl_get_avfs_information(struct pp_hwmgr *hwmgr, struct pp_atom_ctrl__avfs_parameters *param);
 
+extern int  atomctrl_get_svi2_info(struct pp_hwmgr *hwmgr, uint8_t voltage_type,
+				uint8_t *svd_gpio_id, uint8_t *svc_gpio_id,
+				uint16_t *load_line);
 #endif
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
index a1fc4fcac1e0..8cf71f3c6d0e 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_clockpowergating.c
@@ -147,22 +147,22 @@ int smu7_powergate_uvd(struct pp_hwmgr *hwmgr, bool bgate)
 	data->uvd_power_gated = bgate;
 
 	if (bgate) {
-		cgs_set_clockgating_state(hwmgr->device,
-				AMD_IP_BLOCK_TYPE_UVD,
-				AMD_CG_STATE_GATE);
 		cgs_set_powergating_state(hwmgr->device,
 						AMD_IP_BLOCK_TYPE_UVD,
 						AMD_PG_STATE_GATE);
+		cgs_set_clockgating_state(hwmgr->device,
+				AMD_IP_BLOCK_TYPE_UVD,
+				AMD_CG_STATE_GATE);
 		smu7_update_uvd_dpm(hwmgr, true);
 		smu7_powerdown_uvd(hwmgr);
 	} else {
 		smu7_powerup_uvd(hwmgr);
-		cgs_set_powergating_state(hwmgr->device,
-						AMD_IP_BLOCK_TYPE_UVD,
-						AMD_CG_STATE_UNGATE);
 		cgs_set_clockgating_state(hwmgr->device,
 				AMD_IP_BLOCK_TYPE_UVD,
 				AMD_CG_STATE_UNGATE);
+		cgs_set_powergating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_UVD,
+						AMD_CG_STATE_UNGATE);
 		smu7_update_uvd_dpm(hwmgr, false);
 	}
 
@@ -173,12 +173,12 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 
-	if (data->vce_power_gated == bgate)
-		return 0;
-
 	data->vce_power_gated = bgate;
 
 	if (bgate) {
+		cgs_set_powergating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_VCE,
+						AMD_PG_STATE_UNGATE);
 		cgs_set_clockgating_state(hwmgr->device,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_GATE);
@@ -186,10 +186,13 @@ int smu7_powergate_vce(struct pp_hwmgr *hwmgr, bool bgate)
 		smu7_powerdown_vce(hwmgr);
 	} else {
 		smu7_powerup_vce(hwmgr);
-		smu7_update_vce_dpm(hwmgr, false);
 		cgs_set_clockgating_state(hwmgr->device,
 				AMD_IP_BLOCK_TYPE_VCE,
 				AMD_CG_STATE_UNGATE);
+		cgs_set_powergating_state(hwmgr->device,
+						AMD_IP_BLOCK_TYPE_VCE,
+						AMD_PG_STATE_UNGATE);
+		smu7_update_vce_dpm(hwmgr, false);
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 0a6c833720df..f75ee33ec5bb 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -1383,6 +1383,15 @@ static void smu7_init_dpm_defaults(struct pp_hwmgr *hwmgr)
 	data->force_pcie_gen = PP_PCIEGenInvalid;
 	data->ulv_supported = hwmgr->feature_mask & PP_ULV_MASK ? true : false;
 
+	if (hwmgr->chip_id == CHIP_POLARIS12 || hwmgr->smumgr->is_kicker) {
+		uint8_t tmp1, tmp2;
+		uint16_t tmp3 = 0;
+		atomctrl_get_svi2_info(hwmgr, VOLTAGE_TYPE_VDDC, &tmp1, &tmp2,
+						&tmp3);
+		tmp3 = (tmp3 >> 5) & 0x3;
+		data->vddc_phase_shed_control = ((tmp3 << 1) | (tmp3 >> 1)) & 0x3;
+	}
+
 	data->fast_watermark_threshold = 100;
 	if (atomctrl_is_voltage_controled_by_gpio_v3(hwmgr,
 			VOLTAGE_TYPE_VDDC, VOLTAGE_OBJ_SVID2))
@@ -2624,6 +2633,7 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr,
 		smu7_force_clock_level(hwmgr, PP_SCLK, 1<<sclk_mask);
 		smu7_force_clock_level(hwmgr, PP_MCLK, 1<<mclk_mask);
 		smu7_force_clock_level(hwmgr, PP_PCIE, 1<<pcie_mask);
+
 		break;
 	case AMD_DPM_FORCED_LEVEL_MANUAL:
 		hwmgr->dpm_level = level;
@@ -2633,9 +2643,9 @@ static int smu7_force_dpm_level(struct pp_hwmgr *hwmgr,
 		break;
 	}
 
-	if (level & (AMD_DPM_FORCED_LEVEL_PROFILE_PEAK | AMD_DPM_FORCED_LEVEL_HIGH))
+	if (level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->saved_dpm_level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
 		smu7_fan_ctrl_set_fan_speed_percent(hwmgr, 100);
-	else
+	else if (level != AMD_DPM_FORCED_LEVEL_PROFILE_PEAK && hwmgr->saved_dpm_level == AMD_DPM_FORCED_LEVEL_PROFILE_PEAK)
 		smu7_fan_ctrl_reset_fan_speed_to_default(hwmgr);
 
 	return 0;
@@ -4397,16 +4407,14 @@ static int smu7_get_sclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks)
 		if (table_info == NULL || table_info->vdd_dep_on_sclk == NULL)
 			return -EINVAL;
 		dep_sclk_table = table_info->vdd_dep_on_sclk;
-		for (i = 0; i < dep_sclk_table->count; i++) {
+		for (i = 0; i < dep_sclk_table->count; i++)
 			clocks->clock[i] = dep_sclk_table->entries[i].clk;
-			clocks->count++;
-		}
+		clocks->count = dep_sclk_table->count;
 	} else if (hwmgr->pp_table_version == PP_TABLE_V0) {
 		sclk_table = hwmgr->dyn_state.vddc_dependency_on_sclk;
-		for (i = 0; i < sclk_table->count; i++) {
+		for (i = 0; i < sclk_table->count; i++)
 			clocks->clock[i] = sclk_table->entries[i].clk;
-			clocks->count++;
-		}
+		clocks->count = sclk_table->count;
 	}
 
 	return 0;
@@ -4440,14 +4448,13 @@ static int smu7_get_mclks(struct pp_hwmgr *hwmgr, struct amd_pp_clocks *clocks)
 			clocks->clock[i] = dep_mclk_table->entries[i].clk;
 			clocks->latency[i] = smu7_get_mem_latency(hwmgr,
 						dep_mclk_table->entries[i].clk);
-			clocks->count++;
 		}
+		clocks->count = dep_mclk_table->count;
 	} else if (hwmgr->pp_table_version == PP_TABLE_V0) {
 		mclk_table = hwmgr->dyn_state.vddc_dependency_on_mclk;
-		for (i = 0; i < mclk_table->count; i++) {
+		for (i = 0; i < mclk_table->count; i++)
 			clocks->clock[i] = mclk_table->entries[i].clk;
-			clocks->count++;
-		}
+		clocks->count = mclk_table->count;
 	}
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
index 27e7f76ad8a6..f221e17b67e7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.h
@@ -268,7 +268,7 @@ struct smu7_hwmgr {
 	uint32_t                       fast_watermark_threshold;
 
 	/* ---- Phase Shedding ---- */
-	bool                           vddc_phase_shed_control;
+	uint8_t                           vddc_phase_shed_control;
 
 	/* ---- DI/DT ---- */
 	struct smu7_display_timing        display_timing;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
index 3341c0fbd069..1dc31aa72781 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_powertune.c
@@ -477,6 +477,151 @@ static const struct gpu_pt_config_reg DIDTConfig_Polaris12[] = {
 	{   0xFFFFFFFF  }
 };
 
+static const struct gpu_pt_config_reg DIDTConfig_Polaris11_Kicker[] =
+{
+/* ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ *      Offset                             Mask                                                Shift                                               Value       Type
+ * ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ */
+	/* DIDT_SQ */
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT0__SHIFT,                  0x004c,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT1__SHIFT,                  0x00d0,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0069,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT0_3,               DIDT_SQ_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_SQ_WEIGHT0_3__WEIGHT3__SHIFT,                  0x0048,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT4__SHIFT,                  0x005f,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT5__SHIFT,                  0x007a,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT6__SHIFT,                  0x001f,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT4_7,               DIDT_SQ_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_SQ_WEIGHT4_7__WEIGHT7__SHIFT,                  0x002d,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT8_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT8__SHIFT,                 0x0088,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT9_MASK,                   DIDT_SQ_WEIGHT8_11__WEIGHT9__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT10_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT10__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_WEIGHT8_11,              DIDT_SQ_WEIGHT8_11__WEIGHT11_MASK,                  DIDT_SQ_WEIGHT8_11__WEIGHT11__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MIN_POWER_MASK,                      DIDT_SQ_CTRL1__MIN_POWER__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL1,                   DIDT_SQ_CTRL1__MAX_POWER_MASK,                      DIDT_SQ_CTRL1__MAX_POWER__SHIFT,                    0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__UNUSED_0_MASK,                    DIDT_SQ_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL_OCP,                DIDT_SQ_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_SQ_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_SQ_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_0_MASK,                       DIDT_SQ_CTRL2__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_SQ_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_1_MASK,                       DIDT_SQ_CTRL2__UNUSED_1__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_SQ_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL2,                   DIDT_SQ_CTRL2__UNUSED_2_MASK,                       DIDT_SQ_CTRL2__UNUSED_2__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_SQ_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_SQ_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_SQ_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_STALL_CTRL,              DIDT_SQ_STALL_CTRL__UNUSED_0_MASK,                  DIDT_SQ_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_SQ_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_SQ_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_TUNING_CTRL,             DIDT_SQ_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_SQ_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_SQ_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_SQ_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__PHASE_OFFSET_MASK,                   DIDT_SQ_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_SQ_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_SQ_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0008,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_SQ_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0008,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_SQ_CTRL0,                   DIDT_SQ_CTRL0__UNUSED_0_MASK,                       DIDT_SQ_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	/* DIDT_TD */
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT0_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT0__SHIFT,                  0x000a,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT1_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT1__SHIFT,                  0x0010,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT2_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT2__SHIFT,                  0x0017,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT0_3,               DIDT_TD_WEIGHT0_3__WEIGHT3_MASK,                    DIDT_TD_WEIGHT0_3__WEIGHT3__SHIFT,                  0x002f,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT4_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT4__SHIFT,                  0x0046,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT5_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT5__SHIFT,                  0x005d,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT6_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT6__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_WEIGHT4_7,               DIDT_TD_WEIGHT4_7__WEIGHT7_MASK,                    DIDT_TD_WEIGHT4_7__WEIGHT7__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MIN_POWER_MASK,                      DIDT_TD_CTRL1__MIN_POWER__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL1,                   DIDT_TD_CTRL1__MAX_POWER_MASK,                      DIDT_TD_CTRL1__MAX_POWER__SHIFT,                    0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__UNUSED_0_MASK,                    DIDT_TD_CTRL_OCP__UNUSED_0__SHIFT,                  0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL_OCP,                DIDT_TD_CTRL_OCP__OCP_MAX_POWER_MASK,               DIDT_TD_CTRL_OCP__OCP_MAX_POWER__SHIFT,             0x00ff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__MAX_POWER_DELTA_MASK,                DIDT_TD_CTRL2__MAX_POWER_DELTA__SHIFT,              0x3fff,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_0_MASK,                       DIDT_TD_CTRL2__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,       DIDT_TD_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,     0x000f,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_1_MASK,                       DIDT_TD_CTRL2__UNUSED_1__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,       DIDT_TD_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL2,                   DIDT_TD_CTRL2__UNUSED_2_MASK,                       DIDT_TD_CTRL2__UNUSED_2__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,    DIDT_TD_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT,  0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,       DIDT_TD_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,     0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,   DIDT_TD_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT, 0x01aa,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_STALL_CTRL,              DIDT_TD_STALL_CTRL__UNUSED_0_MASK,                  DIDT_TD_STALL_CTRL__UNUSED_0__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,       DIDT_TD_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,     0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,       DIDT_TD_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,     0x0dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_TUNING_CTRL,             DIDT_TD_TUNING_CTRL__UNUSED_0_MASK,                 DIDT_TD_TUNING_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TD_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TD_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TD_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TD_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TD_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0008,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TD_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0008,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TD_CTRL0,                   DIDT_TD_CTRL0__UNUSED_0_MASK,                       DIDT_TD_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	/* DIDT_TCP */
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT0_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT0__SHIFT,                 0x0004,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT1_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT1__SHIFT,                 0x0037,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT2_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT2__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT0_3,              DIDT_TCP_WEIGHT0_3__WEIGHT3_MASK,                   DIDT_TCP_WEIGHT0_3__WEIGHT3__SHIFT,                 0x00ff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT4_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT4__SHIFT,                 0x0054,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT5_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT5__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT6_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT6__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_WEIGHT4_7,              DIDT_TCP_WEIGHT4_7__WEIGHT7_MASK,                   DIDT_TCP_WEIGHT4_7__WEIGHT7__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MIN_POWER_MASK,                     DIDT_TCP_CTRL1__MIN_POWER__SHIFT,                   0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL1,                  DIDT_TCP_CTRL1__MAX_POWER_MASK,                     DIDT_TCP_CTRL1__MAX_POWER__SHIFT,                   0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__UNUSED_0_MASK,                   DIDT_TCP_CTRL_OCP__UNUSED_0__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL_OCP,               DIDT_TCP_CTRL_OCP__OCP_MAX_POWER_MASK,              DIDT_TCP_CTRL_OCP__OCP_MAX_POWER__SHIFT,            0xffff,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__MAX_POWER_DELTA_MASK,               DIDT_TCP_CTRL2__MAX_POWER_DELTA__SHIFT,             0x3dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_0_MASK,                      DIDT_TCP_CTRL2__UNUSED_0__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE_MASK,      DIDT_TCP_CTRL2__SHORT_TERM_INTERVAL_SIZE__SHIFT,    0x0032,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_1_MASK,                      DIDT_TCP_CTRL2__UNUSED_1__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO_MASK,      DIDT_TCP_CTRL2__LONG_TERM_INTERVAL_RATIO__SHIFT,    0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL2,                  DIDT_TCP_CTRL2__UNUSED_2_MASK,                      DIDT_TCP_CTRL2__UNUSED_2__SHIFT,                    0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE_MASK,   DIDT_TCP_STALL_CTRL__DIDT_STALL_CTRL_ENABLE__SHIFT, 0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_HI__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO_MASK,      DIDT_TCP_STALL_CTRL__DIDT_STALL_DELAY_LO__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD_MASK,  DIDT_TCP_STALL_CTRL__DIDT_HI_POWER_THRESHOLD__SHIFT,0x01aa,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_STALL_CTRL,             DIDT_TCP_STALL_CTRL__UNUSED_0_MASK,                 DIDT_TCP_STALL_CTRL__UNUSED_0__SHIFT,               0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE_MASK,      DIDT_TCP_TUNING_CTRL__DIDT_TUNING_ENABLE__SHIFT,    0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_HI__SHIFT,    0x3dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO_MASK,      DIDT_TCP_TUNING_CTRL__MAX_POWER_DELTA_LO__SHIFT,    0x3dde,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_TUNING_CTRL,            DIDT_TCP_TUNING_CTRL__UNUSED_0_MASK,                DIDT_TCP_TUNING_CTRL__UNUSED_0__SHIFT,              0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN_MASK,                   DIDT_TCP_CTRL0__DIDT_CTRL_EN__SHIFT,                 0x0001,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__USE_REF_CLOCK_MASK,                  DIDT_TCP_CTRL0__USE_REF_CLOCK__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__PHASE_OFFSET_MASK,                   DIDT_TCP_CTRL0__PHASE_OFFSET__SHIFT,                 0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CTRL_RST_MASK,                  DIDT_TCP_CTRL0__DIDT_CTRL_RST__SHIFT,                0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE_MASK,           DIDT_TCP_CTRL0__DIDT_CLK_EN_OVERRIDE__SHIFT,         0x0000,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_HI__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO_MASK,     DIDT_TCP_CTRL0__DIDT_MAX_STALLS_ALLOWED_LO__SHIFT,   0x0010,     GPU_CONFIGREG_DIDT_IND },
+	{   ixDIDT_TCP_CTRL0,                   DIDT_TCP_CTRL0__UNUSED_0_MASK,                       DIDT_TCP_CTRL0__UNUSED_0__SHIFT,                     0x0000,     GPU_CONFIGREG_DIDT_IND },
+
+	{   0xFFFFFFFF  }  /* End of list */
+};
 
 static int smu7_enable_didt(struct pp_hwmgr *hwmgr, const bool enable)
 {
@@ -630,7 +775,10 @@ int smu7_enable_didt_config(struct pp_hwmgr *hwmgr)
 			} else if (hwmgr->chip_id == CHIP_POLARIS11) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
 				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
-				result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
+				if (hwmgr->smumgr->is_kicker)
+					result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11_Kicker);
+				else
+					result = smu7_program_pt_config_registers(hwmgr, DIDTConfig_Polaris11);
 				PP_ASSERT_WITH_CODE((result == 0), "DIDT Config failed.", return result);
 			} else if (hwmgr->chip_id == CHIP_POLARIS12) {
 				result = smu7_program_pt_config_registers(hwmgr, GCCACConfig_Polaris11);
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 26129972f686..80ed65985af8 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -89,7 +89,7 @@ enum phm_platform_caps {
 	PHM_PlatformCaps_EnableSideportControl,                 /* indicates Sideport can be controlled */
 	PHM_PlatformCaps_VideoPlaybackEEUNotification,          /* indicates EEU notification of video start/stop is required */
 	PHM_PlatformCaps_TurnOffPll_ASPML1,                     /* PCIE Turn Off PLL in ASPM L1 */
-	PHM_PlatformCaps_EnableHTLinkControl,                   /* indicates HT Link can be controlled by ACPI or CLMC overrided/automated mode. */
+	PHM_PlatformCaps_EnableHTLinkControl,                   /* indicates HT Link can be controlled by ACPI or CLMC overridden/automated mode. */
 	PHM_PlatformCaps_PerformanceStateOnly,                  /* indicates only performance power state to be used on current system. */
 	PHM_PlatformCaps_ExclusiveModeAlwaysHigh,               /* In Exclusive (3D) mode always stay in High state. */
 	PHM_PlatformCaps_DisableMGClockGating,                  /* to disable Medium Grain Clock Gating or not */
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
index 9b6531bd6350..7c318a95e0c2 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smumgr.h
@@ -137,6 +137,7 @@ struct pp_smumgr {
 	uint32_t usec_timeout;
 	bool reload_fw;
 	const struct pp_smumgr_func *smumgr_funcs;
+	bool is_kicker;
 };
 
 extern int smum_early_init(struct pp_instance *handle);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c
index 0e26900e459e..80e2329a1b9e 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smc.c
@@ -494,6 +494,7 @@ static int polaris10_populate_ulv_level(struct pp_hwmgr *hwmgr,
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	struct phm_ppt_v1_information *table_info =
 			(struct phm_ppt_v1_information *)(hwmgr->pptable);
+	struct pp_smumgr *smumgr = hwmgr->smumgr;
 
 	state->CcPwrDynRm = 0;
 	state->CcPwrDynRm1 = 0;
@@ -502,7 +503,10 @@ static int polaris10_populate_ulv_level(struct pp_hwmgr *hwmgr,
 	state->VddcOffsetVid = (uint8_t)(table_info->us_ulv_voltage_offset *
 			VOLTAGE_VID_OFFSET_SCALE2 / VOLTAGE_VID_OFFSET_SCALE1);
 
-	state->VddcPhase = (data->vddc_phase_shed_control) ? 0 : 1;
+	if (smumgr->chip_id == CHIP_POLARIS12 || smumgr->is_kicker)
+		state->VddcPhase = data->vddc_phase_shed_control ^ 0x3;
+	else
+		state->VddcPhase = (data->vddc_phase_shed_control) ? 0 : 1;
 
 	CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm);
 	CONVERT_FROM_HOST_TO_SMC_UL(state->CcPwrDynRm1);
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
index 6749fbe26c74..35ac27681415 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smu7_smumgr.c
@@ -533,6 +533,8 @@ int smu7_upload_smu_firmware_image(struct pp_smumgr *smumgr)
 		cgs_get_firmware_info(smumgr->device,
 			smu7_convert_fw_type_to_cgs(UCODE_ID_SMU_SK), &info);
 
+	smumgr->is_kicker = info.is_kicker;
+
 	result = smu7_upload_smc_firmware_data(smumgr, info.image_size, (uint32_t *)info.kptr, SMU7_SMC_SIZE);
 
 	return result;
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
index 60c36928284c..c0956a4207a9 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/smumgr.c
@@ -37,8 +37,10 @@ MODULE_FIRMWARE("amdgpu/tonga_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/fiji_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_smc_sk.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_smc_sk.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_k_smc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_smc.bin");
 
 
diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
index 1bf83ed113b3..16f96563cd2b 100644
--- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
+++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
@@ -24,6 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
+#include <uapi/linux/sched/types.h>
 #include <drm/drmP.h>
 #include "gpu_scheduler.h"
 
diff --git a/drivers/gpu/drm/arc/arcpgu_drv.c b/drivers/gpu/drm/arc/arcpgu_drv.c
index 0b6eaa49a1db..8d8344ed655e 100644
--- a/drivers/gpu/drm/arc/arcpgu_drv.c
+++ b/drivers/gpu/drm/arc/arcpgu_drv.c
@@ -135,8 +135,7 @@ static int arcpgu_load(struct drm_device *drm)
 	drm_kms_helper_poll_init(drm);
 
 	arcpgu->fbdev = drm_fbdev_cma_init(drm, 16,
-					      drm->mode_config.num_crtc,
-					      drm->mode_config.num_connector);
+					   drm->mode_config.num_connector);
 	if (IS_ERR(arcpgu->fbdev)) {
 		ret = PTR_ERR(arcpgu->fbdev);
 		arcpgu->fbdev = NULL;
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index a2e5b04cdee3..4ce4f970920b 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -349,7 +349,7 @@ static int hdlcd_drm_bind(struct device *dev)
 	drm_mode_config_reset(drm);
 	drm_kms_helper_poll_init(drm);
 
-	hdlcd->fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+	hdlcd->fbdev = drm_fbdev_cma_init(drm, 32,
 					  drm->mode_config.num_connector);
 
 	if (IS_ERR(hdlcd->fbdev)) {
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index 99fb0ab39191..8b0672d4aee9 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -457,7 +457,7 @@ static int malidp_bind(struct device *dev)
 
 	drm_mode_config_reset(drm);
 
-	malidp->fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+	malidp->fbdev = drm_fbdev_cma_init(drm, 32,
 					   drm->mode_config.num_connector);
 
 	if (IS_ERR(malidp->fbdev)) {
diff --git a/drivers/gpu/drm/armada/armada_fbdev.c b/drivers/gpu/drm/armada/armada_fbdev.c
index 78335100cbc3..0233e1dc33e1 100644
--- a/drivers/gpu/drm/armada/armada_fbdev.c
+++ b/drivers/gpu/drm/armada/armada_fbdev.c
@@ -137,7 +137,7 @@ int armada_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, fbh, &armada_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, fbh, 1, 1);
+	ret = drm_fb_helper_init(dev, fbh, 1);
 	if (ret) {
 		DRM_ERROR("failed to initialize drm fb helper\n");
 		goto err_fb_helper;
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index a293c8be232c..1597458d884e 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -14,14 +14,15 @@
 #include <drm/armada_drm.h>
 #include "armada_ioctlP.h"
 
-static int armada_gem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int armada_gem_vm_fault(struct vm_fault *vmf)
 {
-	struct armada_gem_object *obj = drm_to_armada_gem(vma->vm_private_data);
+	struct drm_gem_object *gobj = vmf->vma->vm_private_data;
+	struct armada_gem_object *obj = drm_to_armada_gem(gobj);
 	unsigned long pfn = obj->phys_addr >> PAGE_SHIFT;
 	int ret;
 
-	pfn += (vmf->address - vma->vm_start) >> PAGE_SHIFT;
-	ret = vm_insert_pfn(vma, vmf->address, pfn);
+	pfn += (vmf->address - vmf->vma->vm_start) >> PAGE_SHIFT;
+	ret = vm_insert_pfn(vmf->vma, vmf->address, pfn);
 
 	switch (ret) {
 	case 0:
@@ -148,8 +149,8 @@ armada_gem_linear_back(struct drm_device *dev, struct armada_gem_object *obj)
 			return -ENOSPC;
 
 		mutex_lock(&priv->linear_lock);
-		ret = drm_mm_insert_node(&priv->linear, node, size, align,
-					 DRM_MM_SEARCH_DEFAULT);
+		ret = drm_mm_insert_node_generic(&priv->linear, node,
+						 size, align, 0, 0);
 		mutex_unlock(&priv->linear_lock);
 		if (ret) {
 			kfree(node);
diff --git a/drivers/gpu/drm/ast/ast_dram_tables.h b/drivers/gpu/drm/ast/ast_dram_tables.h
index cc04539c0ff3..1d9c4e75d303 100644
--- a/drivers/gpu/drm/ast/ast_dram_tables.h
+++ b/drivers/gpu/drm/ast/ast_dram_tables.h
@@ -141,4 +141,66 @@ static const struct ast_dramstruct ast2100_dram_table_data[] = {
 	{ 0xffff, 0xffffffff },
 };
 
+/*
+ * AST2500 DRAM settings modules
+ */
+#define REGTBL_NUM           17
+#define REGIDX_010           0
+#define REGIDX_014           1
+#define REGIDX_018           2
+#define REGIDX_020           3
+#define REGIDX_024           4
+#define REGIDX_02C           5
+#define REGIDX_030           6
+#define REGIDX_214           7
+#define REGIDX_2E0           8
+#define REGIDX_2E4           9
+#define REGIDX_2E8           10
+#define REGIDX_2EC           11
+#define REGIDX_2F0           12
+#define REGIDX_2F4           13
+#define REGIDX_2F8           14
+#define REGIDX_RFC           15
+#define REGIDX_PLL           16
+
+static const u32 ast2500_ddr3_1600_timing_table[REGTBL_NUM] = {
+	0x64604D38,		     /* 0x010 */
+	0x29690599,		     /* 0x014 */
+	0x00000300,		     /* 0x018 */
+	0x00000000,		     /* 0x020 */
+	0x00000000,		     /* 0x024 */
+	0x02181E70,		     /* 0x02C */
+	0x00000040,		     /* 0x030 */
+	0x00000024,		     /* 0x214 */
+	0x02001300,		     /* 0x2E0 */
+	0x0E0000A0,		     /* 0x2E4 */
+	0x000E001B,		     /* 0x2E8 */
+	0x35B8C105,		     /* 0x2EC */
+	0x08090408,		     /* 0x2F0 */
+	0x9B000800,		     /* 0x2F4 */
+	0x0E400A00,		     /* 0x2F8 */
+	0x9971452F,		     /* tRFC  */
+	0x000071C1		     /* PLL   */
+};
+
+static const u32 ast2500_ddr4_1600_timing_table[REGTBL_NUM] = {
+	0x63604E37,		     /* 0x010 */
+	0xE97AFA99,		     /* 0x014 */
+	0x00019000,		     /* 0x018 */
+	0x08000000,		     /* 0x020 */
+	0x00000400,		     /* 0x024 */
+	0x00000410,		     /* 0x02C */
+	0x00000101,		     /* 0x030 */
+	0x00000024,		     /* 0x214 */
+	0x03002900,		     /* 0x2E0 */
+	0x0E0000A0,		     /* 0x2E4 */
+	0x000E001C,		     /* 0x2E8 */
+	0x35B8C106,		     /* 0x2EC */
+	0x08080607,		     /* 0x2F0 */
+	0x9B000900,		     /* 0x2F4 */
+	0x0E400A00,		     /* 0x2F8 */
+	0x99714545,		     /* tRFC  */
+	0x000071C1		     /* PLL   */
+};
+
 #endif
diff --git a/drivers/gpu/drm/ast/ast_drv.h b/drivers/gpu/drm/ast/ast_drv.h
index 1051181d8c0d..8880f0b62e9c 100644
--- a/drivers/gpu/drm/ast/ast_drv.h
+++ b/drivers/gpu/drm/ast/ast_drv.h
@@ -65,6 +65,7 @@ enum ast_chip {
 	AST2150,
 	AST2300,
 	AST2400,
+	AST2500,
 	AST1180,
 };
 
@@ -81,6 +82,7 @@ enum ast_tx_chip {
 #define AST_DRAM_1Gx32   3
 #define AST_DRAM_2Gx16   6
 #define AST_DRAM_4Gx16   7
+#define AST_DRAM_8Gx16   8
 
 struct ast_fbdev;
 
@@ -114,6 +116,11 @@ struct ast_private {
 	struct ttm_bo_kmap_obj cache_kmap;
 	int next_cursor;
 	bool support_wide_screen;
+	enum {
+		ast_use_p2a,
+		ast_use_dt,
+		ast_use_defaults
+	} config_mode;
 
 	enum ast_tx_chip tx_chip_type;
 	u8 dp501_maxclk;
@@ -300,8 +307,8 @@ struct ast_vbios_dclk_info {
 };
 
 struct ast_vbios_mode_info {
-	struct ast_vbios_stdtable *std_table;
-	struct ast_vbios_enhtable *enh_table;
+	const struct ast_vbios_stdtable *std_table;
+	const struct ast_vbios_enhtable *enh_table;
 };
 
 extern int ast_mode_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index b085140fae95..5d0ffab411a8 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -315,8 +315,7 @@ int ast_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, &afbdev->helper, &ast_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, &afbdev->helper,
-				 1, 1);
+	ret = drm_fb_helper_init(dev, &afbdev->helper, 1);
 	if (ret)
 		goto free;
 
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index 5992ed2166ec..262c2c0e43b4 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -32,8 +32,6 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
 
-#include "ast_dram_tables.h"
-
 void ast_set_index_reg_mask(struct ast_private *ast,
 			    uint32_t base, uint8_t index,
 			    uint8_t mask, uint8_t val)
@@ -62,30 +60,99 @@ uint8_t ast_get_index_reg_mask(struct ast_private *ast,
 	return ret;
 }
 
+static void ast_detect_config_mode(struct drm_device *dev, u32 *scu_rev)
+{
+	struct device_node *np = dev->pdev->dev.of_node;
+	struct ast_private *ast = dev->dev_private;
+	uint32_t data, jregd0, jregd1;
+
+	/* Defaults */
+	ast->config_mode = ast_use_defaults;
+	*scu_rev = 0xffffffff;
+
+	/* Check if we have device-tree properties */
+	if (np && !of_property_read_u32(np, "aspeed,scu-revision-id",
+					scu_rev)) {
+		/* We do, disable P2A access */
+		ast->config_mode = ast_use_dt;
+		DRM_INFO("Using device-tree for configuration\n");
+		return;
+	}
+
+	/* Not all families have a P2A bridge */
+	if (dev->pdev->device != PCI_CHIP_AST2000)
+		return;
+
+	/*
+	 * The BMC will set SCU 0x40 D[12] to 1 if the P2 bridge
+	 * is disabled. We force using P2A if VGA only mode bit
+	 * is set D[7]
+	 */
+	jregd0 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	jregd1 = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd1, 0xff);
+	if (!(jregd0 & 0x80) || !(jregd1 & 0x10)) {
+		/* Double check it's actually working */
+		data = ast_read32(ast, 0xf004);
+		if (data != 0xFFFFFFFF) {
+			/* P2A works, grab silicon revision */
+			ast->config_mode = ast_use_p2a;
+
+			DRM_INFO("Using P2A bridge for configuration\n");
+
+			/* Read SCU7c (silicon revision register) */
+			ast_write32(ast, 0xf004, 0x1e6e0000);
+			ast_write32(ast, 0xf000, 0x1);
+			*scu_rev = ast_read32(ast, 0x1207c);
+			return;
+		}
+	}
+
+	/* We have a P2A bridge but it's disabled */
+	DRM_INFO("P2A bridge disabled, using default configuration\n");
+}
 
 static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 {
 	struct ast_private *ast = dev->dev_private;
-	uint32_t data, jreg;
+	uint32_t jreg, scu_rev;
+
+	/*
+	 * If VGA isn't enabled, we need to enable now or subsequent
+	 * access to the scratch registers will fail. We also inform
+	 * our caller that it needs to POST the chip
+	 * (Assumption: VGA not enabled -> need to POST)
+	 */
+	if (!ast_is_vga_enabled(dev)) {
+		ast_enable_vga(dev);
+		DRM_INFO("VGA not enabled on entry, requesting chip POST\n");
+		*need_post = true;
+	} else
+		*need_post = false;
+
+
+	/* Enable extended register access */
+	ast_enable_mmio(dev);
 	ast_open_key(ast);
 
+	/* Find out whether P2A works or whether to use device-tree */
+	ast_detect_config_mode(dev, &scu_rev);
+
+	/* Identify chipset */
 	if (dev->pdev->device == PCI_CHIP_AST1180) {
 		ast->chip = AST1100;
 		DRM_INFO("AST 1180 detected\n");
 	} else {
-		if (dev->pdev->revision >= 0x30) {
+		if (dev->pdev->revision >= 0x40) {
+			ast->chip = AST2500;
+			DRM_INFO("AST 2500 detected\n");
+		} else if (dev->pdev->revision >= 0x30) {
 			ast->chip = AST2400;
 			DRM_INFO("AST 2400 detected\n");
 		} else if (dev->pdev->revision >= 0x20) {
 			ast->chip = AST2300;
 			DRM_INFO("AST 2300 detected\n");
 		} else if (dev->pdev->revision >= 0x10) {
-			uint32_t data;
-			ast_write32(ast, 0xf004, 0x1e6e0000);
-			ast_write32(ast, 0xf000, 0x1);
-
-			data = ast_read32(ast, 0x1207c);
-			switch (data & 0x0300) {
+			switch (scu_rev & 0x0300) {
 			case 0x0200:
 				ast->chip = AST1100;
 				DRM_INFO("AST 1100 detected\n");
@@ -110,20 +177,6 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 		}
 	}
 
-	/*
-	 * If VGA isn't enabled, we need to enable now or subsequent
-	 * access to the scratch registers will fail. We also inform
-	 * our caller that it needs to POST the chip
-	 * (Assumption: VGA not enabled -> need to POST)
-	 */
-	if (!ast_is_vga_enabled(dev)) {
-		ast_enable_vga(dev);
-		ast_enable_mmio(dev);
-		DRM_INFO("VGA not enabled on entry, requesting chip POST\n");
-		*need_post = true;
-	} else
-		*need_post = false;
-
 	/* Check if we support wide screen */
 	switch (ast->chip) {
 	case AST1180:
@@ -140,14 +193,14 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 			ast->support_wide_screen = true;
 		else {
 			ast->support_wide_screen = false;
-			/* Read SCU7c (silicon revision register) */
-			ast_write32(ast, 0xf004, 0x1e6e0000);
-			ast_write32(ast, 0xf000, 0x1);
-			data = ast_read32(ast, 0x1207c);
-			data &= 0x300;
-			if (ast->chip == AST2300 && data == 0x0) /* ast1300 */
+			if (ast->chip == AST2300 &&
+			    (scu_rev & 0x300) == 0x0) /* ast1300 */
+				ast->support_wide_screen = true;
+			if (ast->chip == AST2400 &&
+			    (scu_rev & 0x300) == 0x100) /* ast1400 */
 				ast->support_wide_screen = true;
-			if (ast->chip == AST2400 && data == 0x100) /* ast1400 */
+			if (ast->chip == AST2500 &&
+			    scu_rev == 0x100)           /* ast2510 */
 				ast->support_wide_screen = true;
 		}
 		break;
@@ -212,29 +265,68 @@ static int ast_detect_chip(struct drm_device *dev, bool *need_post)
 
 static int ast_get_dram_info(struct drm_device *dev)
 {
+	struct device_node *np = dev->pdev->dev.of_node;
 	struct ast_private *ast = dev->dev_private;
-	uint32_t data, data2;
-	uint32_t denum, num, div, ref_pll;
-
-	ast_write32(ast, 0xf004, 0x1e6e0000);
-	ast_write32(ast, 0xf000, 0x1);
-
-
-	ast_write32(ast, 0x10000, 0xfc600309);
+	uint32_t mcr_cfg, mcr_scu_mpll, mcr_scu_strap;
+	uint32_t denum, num, div, ref_pll, dsel;
 
-	do {
-		if (pci_channel_offline(dev->pdev))
-			return -EIO;
-	} while (ast_read32(ast, 0x10000) != 0x01);
-	data = ast_read32(ast, 0x10004);
+	switch (ast->config_mode) {
+	case ast_use_dt:
+		/*
+		 * If some properties are missing, use reasonable
+		 * defaults for AST2400
+		 */
+		if (of_property_read_u32(np, "aspeed,mcr-configuration",
+					 &mcr_cfg))
+			mcr_cfg = 0x00000577;
+		if (of_property_read_u32(np, "aspeed,mcr-scu-mpll",
+					 &mcr_scu_mpll))
+			mcr_scu_mpll = 0x000050C0;
+		if (of_property_read_u32(np, "aspeed,mcr-scu-strap",
+					 &mcr_scu_strap))
+			mcr_scu_strap = 0;
+		break;
+	case ast_use_p2a:
+		ast_write32(ast, 0xf004, 0x1e6e0000);
+		ast_write32(ast, 0xf000, 0x1);
+		mcr_cfg = ast_read32(ast, 0x10004);
+		mcr_scu_mpll = ast_read32(ast, 0x10120);
+		mcr_scu_strap = ast_read32(ast, 0x10170);
+		break;
+	case ast_use_defaults:
+	default:
+		ast->dram_bus_width = 16;
+		ast->dram_type = AST_DRAM_1Gx16;
+		if (ast->chip == AST2500)
+			ast->mclk = 800;
+		else
+			ast->mclk = 396;
+		return 0;
+	}
 
-	if (data & 0x40)
+	if (mcr_cfg & 0x40)
 		ast->dram_bus_width = 16;
 	else
 		ast->dram_bus_width = 32;
 
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
-		switch (data & 0x03) {
+	if (ast->chip == AST2500) {
+		switch (mcr_cfg & 0x03) {
+		case 0:
+			ast->dram_type = AST_DRAM_1Gx16;
+			break;
+		default:
+		case 1:
+			ast->dram_type = AST_DRAM_2Gx16;
+			break;
+		case 2:
+			ast->dram_type = AST_DRAM_4Gx16;
+			break;
+		case 3:
+			ast->dram_type = AST_DRAM_8Gx16;
+			break;
+		}
+	} else if (ast->chip == AST2300 || ast->chip == AST2400) {
+		switch (mcr_cfg & 0x03) {
 		case 0:
 			ast->dram_type = AST_DRAM_512Mx16;
 			break;
@@ -250,13 +342,13 @@ static int ast_get_dram_info(struct drm_device *dev)
 			break;
 		}
 	} else {
-		switch (data & 0x0c) {
+		switch (mcr_cfg & 0x0c) {
 		case 0:
 		case 4:
 			ast->dram_type = AST_DRAM_512Mx16;
 			break;
 		case 8:
-			if (data & 0x40)
+			if (mcr_cfg & 0x40)
 				ast->dram_type = AST_DRAM_1Gx16;
 			else
 				ast->dram_type = AST_DRAM_512Mx32;
@@ -267,17 +359,15 @@ static int ast_get_dram_info(struct drm_device *dev)
 		}
 	}
 
-	data = ast_read32(ast, 0x10120);
-	data2 = ast_read32(ast, 0x10170);
-	if (data2 & 0x2000)
+	if (mcr_scu_strap & 0x2000)
 		ref_pll = 14318;
 	else
 		ref_pll = 12000;
 
-	denum = data & 0x1f;
-	num = (data & 0x3fe0) >> 5;
-	data = (data & 0xc000) >> 14;
-	switch (data) {
+	denum = mcr_scu_mpll & 0x1f;
+	num = (mcr_scu_mpll & 0x3fe0) >> 5;
+	dsel = (mcr_scu_mpll & 0xc000) >> 14;
+	switch (dsel) {
 	case 3:
 		div = 0x4;
 		break;
@@ -289,7 +379,7 @@ static int ast_get_dram_info(struct drm_device *dev)
 		div = 0x1;
 		break;
 	}
-	ast->mclk = ref_pll * (num + 2) / (denum + 2) * (div * 1000);
+	ast->mclk = ref_pll * (num + 2) / ((denum + 2) * (div * 1000));
 	return 0;
 }
 
@@ -428,17 +518,19 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 
 	ast_detect_chip(dev, &need_post);
 
+	if (need_post)
+		ast_post_gpu(dev);
+
 	if (ast->chip != AST1180) {
 		ret = ast_get_dram_info(dev);
 		if (ret)
 			goto out_free;
 		ast->vram_size = ast_get_vram_info(dev);
-		DRM_INFO("dram %d %d %d %08x\n", ast->mclk, ast->dram_type, ast->dram_bus_width, ast->vram_size);
+		DRM_INFO("dram MCLK=%u Mhz type=%d bus_width=%d size=%08x\n",
+			 ast->mclk, ast->dram_type,
+			 ast->dram_bus_width, ast->vram_size);
 	}
 
-	if (need_post)
-		ast_post_gpu(dev);
-
 	ret = ast_mm_init(ast);
 	if (ret)
 		goto out_free;
@@ -456,6 +548,7 @@ int ast_driver_load(struct drm_device *dev, unsigned long flags)
 	    ast->chip == AST2200 ||
 	    ast->chip == AST2300 ||
 	    ast->chip == AST2400 ||
+	    ast->chip == AST2500 ||
 	    ast->chip == AST1180) {
 		dev->mode_config.max_width = 1920;
 		dev->mode_config.max_height = 2048;
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 606cb40f6c7c..47b78e52691c 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -81,9 +81,9 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 	struct ast_private *ast = crtc->dev->dev_private;
 	const struct drm_framebuffer *fb = crtc->primary->fb;
 	u32 refresh_rate_index = 0, mode_id, color_index, refresh_rate;
+	const struct ast_vbios_enhtable *best = NULL;
 	u32 hborder, vborder;
 	bool check_sync;
-	struct ast_vbios_enhtable *best = NULL;
 
 	switch (fb->format->cpp[0] * 8) {
 	case 8:
@@ -147,7 +147,7 @@ static bool ast_get_vbios_mode_info(struct drm_crtc *crtc, struct drm_display_mo
 	refresh_rate = drm_mode_vrefresh(mode);
 	check_sync = vbios_mode->enh_table->flags & WideScreenMode;
 	do {
-		struct ast_vbios_enhtable *loop = vbios_mode->enh_table;
+		const struct ast_vbios_enhtable *loop = vbios_mode->enh_table;
 
 		while (loop->refresh_rate != 0xff) {
 			if ((check_sync) &&
@@ -227,7 +227,7 @@ static void ast_set_std_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 			    struct ast_vbios_mode_info *vbios_mode)
 {
 	struct ast_private *ast = crtc->dev->dev_private;
-	struct ast_vbios_stdtable *stdtable;
+	const struct ast_vbios_stdtable *stdtable;
 	u32 i;
 	u8 jreg;
 
@@ -273,7 +273,11 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 {
 	struct ast_private *ast = crtc->dev->dev_private;
 	u8 jreg05 = 0, jreg07 = 0, jreg09 = 0, jregAC = 0, jregAD = 0, jregAE = 0;
-	u16 temp;
+	u16 temp, precache = 0;
+
+	if ((ast->chip == AST2500) &&
+	    (vbios_mode->enh_table->flags & AST2500PreCatchCRT))
+		precache = 40;
 
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x00);
 
@@ -299,12 +303,12 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 		jregAD |= 0x01;  /* HBE D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x03, 0xE0, (temp & 0x1f));
 
-	temp = (mode->crtc_hsync_start >> 3) - 1;
+	temp = ((mode->crtc_hsync_start-precache) >> 3) - 1;
 	if (temp & 0x100)
 		jregAC |= 0x40; /* HRS D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x04, 0x00, temp);
 
-	temp = ((mode->crtc_hsync_end >> 3) - 1) & 0x3f;
+	temp = (((mode->crtc_hsync_end-precache) >> 3) - 1) & 0x3f;
 	if (temp & 0x20)
 		jregAD |= 0x04; /* HRE D[5] */
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x05, 0x60, (u8)((temp & 0x1f) | jreg05));
@@ -365,6 +369,11 @@ static void ast_set_crtc_reg(struct drm_crtc *crtc, struct drm_display_mode *mod
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x09, 0xdf, jreg09);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xAE, 0x00, (jregAE | 0x80));
 
+	if (precache)
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0x3f, 0x80);
+	else
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0x3f, 0x00);
+
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0x11, 0x7f, 0x80);
 }
 
@@ -384,14 +393,18 @@ static void ast_set_dclk_reg(struct drm_device *dev, struct drm_display_mode *mo
 			     struct ast_vbios_mode_info *vbios_mode)
 {
 	struct ast_private *ast = dev->dev_private;
-	struct ast_vbios_dclk_info *clk_info;
+	const struct ast_vbios_dclk_info *clk_info;
 
-	clk_info = &dclk_table[vbios_mode->enh_table->dclk_index];
+	if (ast->chip == AST2500)
+		clk_info = &dclk_table_ast2500[vbios_mode->enh_table->dclk_index];
+	else
+		clk_info = &dclk_table[vbios_mode->enh_table->dclk_index];
 
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xc0, 0x00, clk_info->param1);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xc1, 0x00, clk_info->param2);
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xbb, 0x0f,
-			       (clk_info->param3 & 0x80) | ((clk_info->param3 & 0x3) << 4));
+			       (clk_info->param3 & 0xc0) |
+			       ((clk_info->param3 & 0x3) << 4));
 }
 
 static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode,
@@ -425,7 +438,8 @@ static void ast_set_ext_reg(struct drm_crtc *crtc, struct drm_display_mode *mode
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa8, 0xfd, jregA8);
 
 	/* Set Threshold */
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500) {
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa7, 0x78);
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, 0xa6, 0x60);
 	} else if (ast->chip == AST2100 ||
@@ -800,7 +814,9 @@ static int ast_mode_valid(struct drm_connector *connector,
 		if ((mode->hdisplay == 1600) && (mode->vdisplay == 900))
 			return MODE_OK;
 
-		if ((ast->chip == AST2100) || (ast->chip == AST2200) || (ast->chip == AST2300) || (ast->chip == AST2400) || (ast->chip == AST1180)) {
+		if ((ast->chip == AST2100) || (ast->chip == AST2200) ||
+		    (ast->chip == AST2300) || (ast->chip == AST2400) ||
+		    (ast->chip == AST2500) || (ast->chip == AST1180)) {
 			if ((mode->hdisplay == 1920) && (mode->vdisplay == 1080))
 				return MODE_OK;
 
diff --git a/drivers/gpu/drm/ast/ast_post.c b/drivers/gpu/drm/ast/ast_post.c
index 810c51d92b99..f7d421359d56 100644
--- a/drivers/gpu/drm/ast/ast_post.c
+++ b/drivers/gpu/drm/ast/ast_post.c
@@ -31,7 +31,8 @@
 
 #include "ast_dram_tables.h"
 
-static void ast_init_dram_2300(struct drm_device *dev);
+static void ast_post_chip_2300(struct drm_device *dev);
+static void ast_post_chip_2500(struct drm_device *dev);
 
 void ast_enable_vga(struct drm_device *dev)
 {
@@ -58,13 +59,9 @@ bool ast_is_vga_enabled(struct drm_device *dev)
 		/* TODO 1180 */
 	} else {
 		ch = ast_io_read8(ast, AST_IO_VGA_ENABLE_PORT);
-		if (ch) {
-			ast_open_key(ast);
-			ch = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff);
-			return ch & 0x04;
-		}
+		return !!(ch & 0x01);
 	}
-	return 0;
+	return false;
 }
 
 static const u8 extreginfo[] = { 0x0f, 0x04, 0x1c, 0xff };
@@ -79,10 +76,11 @@ ast_set_def_ext_reg(struct drm_device *dev)
 	const u8 *ext_reg_info;
 
 	/* reset scratch */
-	for (i = 0x81; i <= 0x8f; i++)
+	for (i = 0x81; i <= 0x9f; i++)
 		ast_set_index_reg(ast, AST_IO_CRTC_PORT, i, 0x00);
 
-	if (ast->chip == AST2300 || ast->chip == AST2400) {
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500) {
 		if (dev->pdev->revision >= 0x20)
 			ext_reg_info = extreginfo_ast2300;
 		else
@@ -106,7 +104,8 @@ ast_set_def_ext_reg(struct drm_device *dev)
 
 	/* Enable RAMDAC for A1 */
 	reg = 0x04;
-	if (ast->chip == AST2300 || ast->chip == AST2400)
+	if (ast->chip == AST2300 || ast->chip == AST2400 ||
+	    ast->chip == AST2500)
 		reg |= 0x20;
 	ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb6, 0xff, reg);
 }
@@ -375,16 +374,23 @@ void ast_post_gpu(struct drm_device *dev)
 	pci_write_config_dword(ast->dev->pdev, 0x04, reg);
 
 	ast_enable_vga(dev);
-	ast_enable_mmio(dev);
 	ast_open_key(ast);
+	ast_enable_mmio(dev);
 	ast_set_def_ext_reg(dev);
 
-	if (ast->chip == AST2300 || ast->chip == AST2400)
-		ast_init_dram_2300(dev);
-	else
-		ast_init_dram_reg(dev);
+	if (ast->config_mode == ast_use_p2a) {
+		if (ast->chip == AST2500)
+			ast_post_chip_2500(dev);
+		else if (ast->chip == AST2300 || ast->chip == AST2400)
+			ast_post_chip_2300(dev);
+		else
+			ast_init_dram_reg(dev);
 
-	ast_init_3rdtx(dev);
+		ast_init_3rdtx(dev);
+	} else {
+		if (ast->tx_chip_type != AST_TX_NONE)
+			ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xa3, 0xcf, 0x80);	/* Enable DVO */
+	}
 }
 
 /* AST 2300 DRAM settings */
@@ -440,85 +446,70 @@ static const u32 pattern[8] = {
 	0x7C61D253
 };
 
-static int mmc_test_burst(struct ast_private *ast, u32 datagen)
+static bool mmc_test(struct ast_private *ast, u32 datagen, u8 test_ctl)
 {
 	u32 data, timeout;
 
 	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x000000c1 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl);
 	timeout = 0;
 	do {
 		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
-		if (data & 0x2000) {
-			return 0;
-		}
+		if (data & 0x2000)
+			return false;
 		if (++timeout > TIMEOUT) {
 			ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-			return 0;
+			return false;
 		}
 	} while (!data);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	return 1;
+	ast_moutdwm(ast, 0x1e6e0070, 0x0);
+	return true;
 }
 
-static int mmc_test_burst2(struct ast_private *ast, u32 datagen)
+static u32 mmc_test2(struct ast_private *ast, u32 datagen, u8 test_ctl)
 {
 	u32 data, timeout;
 
 	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000041 | (datagen << 3));
+	ast_moutdwm(ast, 0x1e6e0070, (datagen << 3) | test_ctl);
 	timeout = 0;
 	do {
 		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
 		if (++timeout > TIMEOUT) {
 			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return -1;
+			return 0xffffffff;
 		}
 	} while (!data);
 	data = ast_mindwm(ast, 0x1e6e0078);
 	data = (data | (data >> 16)) & 0xffff;
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
+	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
 	return data;
 }
 
-static int mmc_test_single(struct ast_private *ast, u32 datagen)
+
+static bool mmc_test_burst(struct ast_private *ast, u32 datagen)
 {
-	u32 data, timeout;
+	return mmc_test(ast, datagen, 0xc1);
+}
 
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x000000c5 | (datagen << 3));
-	timeout = 0;
-	do {
-		data = ast_mindwm(ast, 0x1e6e0070) & 0x3000;
-		if (data & 0x2000)
-			return 0;
-		if (++timeout > TIMEOUT) {
-			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return 0;
-		}
-	} while (!data);
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
-	return 1;
+static u32 mmc_test_burst2(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test2(ast, datagen, 0x41);
 }
 
-static int mmc_test_single2(struct ast_private *ast, u32 datagen)
+static bool mmc_test_single(struct ast_private *ast, u32 datagen)
 {
-	u32 data, timeout;
+	return mmc_test(ast, datagen, 0xc5);
+}
 
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000000);
-	ast_moutdwm(ast, 0x1e6e0070, 0x00000005 | (datagen << 3));
-	timeout = 0;
-	do {
-		data = ast_mindwm(ast, 0x1e6e0070) & 0x1000;
-		if (++timeout > TIMEOUT) {
-			ast_moutdwm(ast, 0x1e6e0070, 0x0);
-			return -1;
-		}
-	} while (!data);
-	data = ast_mindwm(ast, 0x1e6e0078);
-	data = (data | (data >> 16)) & 0xffff;
-	ast_moutdwm(ast, 0x1e6e0070, 0x0);
-	return data;
+static u32 mmc_test_single2(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test2(ast, datagen, 0x05);
+}
+
+static bool mmc_test_single_2500(struct ast_private *ast, u32 datagen)
+{
+	return mmc_test(ast, datagen, 0x85);
 }
 
 static int cbr_test(struct ast_private *ast)
@@ -596,16 +587,16 @@ static u32 cbr_scan2(struct ast_private *ast)
 	return data2;
 }
 
-static u32 cbr_test3(struct ast_private *ast)
+static bool cbr_test3(struct ast_private *ast)
 {
 	if (!mmc_test_burst(ast, 0))
-		return 0;
+		return false;
 	if (!mmc_test_single(ast, 0))
-		return 0;
-	return 1;
+		return false;
+	return true;
 }
 
-static u32 cbr_scan3(struct ast_private *ast)
+static bool cbr_scan3(struct ast_private *ast)
 {
 	u32 patcnt, loop;
 
@@ -616,9 +607,9 @@ static u32 cbr_scan3(struct ast_private *ast)
 				break;
 		}
 		if (loop == 2)
-			return 0;
+			return false;
 	}
-	return 1;
+	return true;
 }
 
 static bool finetuneDQI_L(struct ast_private *ast, struct ast2300_dram_param *param)
@@ -1604,7 +1595,7 @@ ddr2_init_start:
 
 }
 
-static void ast_init_dram_2300(struct drm_device *dev)
+static void ast_post_chip_2300(struct drm_device *dev)
 {
 	struct ast_private *ast = dev->dev_private;
 	struct ast2300_dram_param param;
@@ -1630,12 +1621,44 @@ static void ast_init_dram_2300(struct drm_device *dev)
 		temp |= 0x73;
 		ast_write32(ast, 0x12008, temp);
 
+		param.dram_freq = 396;
 		param.dram_type = AST_DDR3;
+		temp = ast_mindwm(ast, 0x1e6e2070);
 		if (temp & 0x01000000)
 			param.dram_type = AST_DDR2;
-		param.dram_chipid = ast->dram_type;
-		param.dram_freq = ast->mclk;
-		param.vram_size = ast->vram_size;
+                switch (temp & 0x18000000) {
+		case 0:
+			param.dram_chipid = AST_DRAM_512Mx16;
+			break;
+		default:
+		case 0x08000000:
+			param.dram_chipid = AST_DRAM_1Gx16;
+			break;
+		case 0x10000000:
+			param.dram_chipid = AST_DRAM_2Gx16;
+			break;
+		case 0x18000000:
+			param.dram_chipid = AST_DRAM_4Gx16;
+			break;
+		}
+                switch (temp & 0x0c) {
+                default:
+		case 0x00:
+			param.vram_size = AST_VIDMEM_SIZE_8M;
+			break;
+
+		case 0x04:
+			param.vram_size = AST_VIDMEM_SIZE_16M;
+			break;
+
+		case 0x08:
+			param.vram_size = AST_VIDMEM_SIZE_32M;
+			break;
+
+		case 0x0c:
+			param.vram_size = AST_VIDMEM_SIZE_64M;
+			break;
+		}
 
 		if (param.dram_type == AST_DDR3) {
 			get_ddr3_info(ast, &param);
@@ -1655,3 +1678,404 @@ static void ast_init_dram_2300(struct drm_device *dev)
 	} while ((reg & 0x40) == 0);
 }
 
+static bool cbr_test_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF);
+	ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00);
+	if (!mmc_test_burst(ast, 0))
+		return false;
+	if (!mmc_test_single_2500(ast, 0))
+		return false;
+	return true;
+}
+
+static bool ddr_test_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0074, 0x0000FFFF);
+	ast_moutdwm(ast, 0x1E6E007C, 0xFF00FF00);
+	if (!mmc_test_burst(ast, 0))
+		return false;
+	if (!mmc_test_burst(ast, 1))
+		return false;
+	if (!mmc_test_burst(ast, 2))
+		return false;
+	if (!mmc_test_burst(ast, 3))
+		return false;
+	if (!mmc_test_single_2500(ast, 0))
+		return false;
+	return true;
+}
+
+static void ddr_init_common_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020080);
+	ast_moutdwm(ast, 0x1E6E0008, 0x2003000F);
+	ast_moutdwm(ast, 0x1E6E0038, 0x00000FFF);
+	ast_moutdwm(ast, 0x1E6E0040, 0x88448844);
+	ast_moutdwm(ast, 0x1E6E0044, 0x24422288);
+	ast_moutdwm(ast, 0x1E6E0048, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E004C, 0x22222222);
+	ast_moutdwm(ast, 0x1E6E0050, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E0208, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0218, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0220, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0228, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0230, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E02A8, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E02B0, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0240, 0x86000000);
+	ast_moutdwm(ast, 0x1E6E0244, 0x00008600);
+	ast_moutdwm(ast, 0x1E6E0248, 0x80000000);
+	ast_moutdwm(ast, 0x1E6E024C, 0x80808080);
+}
+
+static void ddr_phy_init_2500(struct ast_private *ast)
+{
+	u32 data, pass, timecnt;
+
+	pass = 0;
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000005);
+	while (!pass) {
+		for (timecnt = 0; timecnt < TIMEOUT; timecnt++) {
+			data = ast_mindwm(ast, 0x1E6E0060) & 0x1;
+			if (!data)
+				break;
+		}
+		if (timecnt != TIMEOUT) {
+			data = ast_mindwm(ast, 0x1E6E0300) & 0x000A0000;
+			if (!data)
+				pass = 1;
+		}
+		if (!pass) {
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			udelay(10); /* delay 10 us */
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000005);
+		}
+	}
+
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000006);
+}
+
+/*
+ * Check DRAM Size
+ * 1Gb : 0x80000000 ~ 0x87FFFFFF
+ * 2Gb : 0x80000000 ~ 0x8FFFFFFF
+ * 4Gb : 0x80000000 ~ 0x9FFFFFFF
+ * 8Gb : 0x80000000 ~ 0xBFFFFFFF
+ */
+static void check_dram_size_2500(struct ast_private *ast, u32 tRFC)
+{
+	u32 reg_04, reg_14;
+
+	reg_04 = ast_mindwm(ast, 0x1E6E0004) & 0xfffffffc;
+	reg_14 = ast_mindwm(ast, 0x1E6E0014) & 0xffffff00;
+
+	ast_moutdwm(ast, 0xA0100000, 0x41424344);
+	ast_moutdwm(ast, 0x90100000, 0x35363738);
+	ast_moutdwm(ast, 0x88100000, 0x292A2B2C);
+	ast_moutdwm(ast, 0x80100000, 0x1D1E1F10);
+
+	/* Check 8Gbit */
+	if (ast_mindwm(ast, 0xA0100000) == 0x41424344) {
+		reg_04 |= 0x03;
+		reg_14 |= (tRFC >> 24) & 0xFF;
+		/* Check 4Gbit */
+	} else if (ast_mindwm(ast, 0x90100000) == 0x35363738) {
+		reg_04 |= 0x02;
+		reg_14 |= (tRFC >> 16) & 0xFF;
+		/* Check 2Gbit */
+	} else if (ast_mindwm(ast, 0x88100000) == 0x292A2B2C) {
+		reg_04 |= 0x01;
+		reg_14 |= (tRFC >> 8) & 0xFF;
+	} else {
+		reg_14 |= tRFC & 0xFF;
+	}
+	ast_moutdwm(ast, 0x1E6E0004, reg_04);
+	ast_moutdwm(ast, 0x1E6E0014, reg_14);
+}
+
+static void enable_cache_2500(struct ast_private *ast)
+{
+	u32 reg_04, data;
+
+	reg_04 = ast_mindwm(ast, 0x1E6E0004);
+	ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x1000);
+
+	do
+		data = ast_mindwm(ast, 0x1E6E0004);
+	while (!(data & 0x80000));
+	ast_moutdwm(ast, 0x1E6E0004, reg_04 | 0x400);
+}
+
+static void set_mpll_2500(struct ast_private *ast)
+{
+	u32 addr, data, param;
+
+	/* Reset MMC */
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020080);
+	for (addr = 0x1e6e0004; addr < 0x1e6e0090;) {
+		ast_moutdwm(ast, addr, 0x0);
+		addr += 4;
+	}
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020000);
+
+	ast_moutdwm(ast, 0x1E6E2000, 0x1688A8A8);
+	data = ast_mindwm(ast, 0x1E6E2070) & 0x00800000;
+	if (data) {
+		/* CLKIN = 25MHz */
+		param = 0x930023E0;
+		ast_moutdwm(ast, 0x1E6E2160, 0x00011320);
+	} else {
+		/* CLKIN = 24MHz */
+		param = 0x93002400;
+	}
+	ast_moutdwm(ast, 0x1E6E2020, param);
+	udelay(100);
+}
+
+static void reset_mmc_2500(struct ast_private *ast)
+{
+	ast_moutdwm(ast, 0x1E78505C, 0x00000004);
+	ast_moutdwm(ast, 0x1E785044, 0x00000001);
+	ast_moutdwm(ast, 0x1E785048, 0x00004755);
+	ast_moutdwm(ast, 0x1E78504C, 0x00000013);
+	mdelay(100);
+	ast_moutdwm(ast, 0x1E785054, 0x00000077);
+	ast_moutdwm(ast, 0x1E6E0000, 0xFC600309);
+}
+
+static void ddr3_init_2500(struct ast_private *ast, const u32 *ddr_table)
+{
+
+	ast_moutdwm(ast, 0x1E6E0004, 0x00000303);
+	ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]);
+	ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]);
+	ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]);
+	ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]);	     /* MODEREG4/6 */
+	ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]);	     /* MODEREG5 */
+	ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */
+	ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]);	     /* MODEREG1/3 */
+
+	/* DDR PHY Setting */
+	ast_moutdwm(ast, 0x1E6E0200, 0x02492AAE);
+	ast_moutdwm(ast, 0x1E6E0204, 0x00001001);
+	ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B);
+	ast_moutdwm(ast, 0x1E6E0210, 0x20000000);
+	ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]);
+	ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]);
+	ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]);
+	ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]);
+	ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]);
+	ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]);
+	ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]);
+	ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]);
+	ast_moutdwm(ast, 0x1E6E0290, 0x00100008);
+	ast_moutdwm(ast, 0x1E6E02C0, 0x00000006);
+
+	/* Controller Setting */
+	ast_moutdwm(ast, 0x1E6E0034, 0x00020091);
+
+	/* Wait DDR PHY init done */
+	ddr_phy_init_2500(ast);
+
+	ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]);
+	ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81);
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93);
+
+	check_dram_size_2500(ast, ddr_table[REGIDX_RFC]);
+	enable_cache_2500(ast);
+	ast_moutdwm(ast, 0x1E6E001C, 0x00000008);
+	ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00);
+}
+
+static void ddr4_init_2500(struct ast_private *ast, const u32 *ddr_table)
+{
+	u32 data, data2, pass, retrycnt;
+	u32 ddr_vref, phy_vref;
+	u32 min_ddr_vref = 0, min_phy_vref = 0;
+	u32 max_ddr_vref = 0, max_phy_vref = 0;
+
+	ast_moutdwm(ast, 0x1E6E0004, 0x00000313);
+	ast_moutdwm(ast, 0x1E6E0010, ddr_table[REGIDX_010]);
+	ast_moutdwm(ast, 0x1E6E0014, ddr_table[REGIDX_014]);
+	ast_moutdwm(ast, 0x1E6E0018, ddr_table[REGIDX_018]);
+	ast_moutdwm(ast, 0x1E6E0020, ddr_table[REGIDX_020]);	     /* MODEREG4/6 */
+	ast_moutdwm(ast, 0x1E6E0024, ddr_table[REGIDX_024]);	     /* MODEREG5 */
+	ast_moutdwm(ast, 0x1E6E002C, ddr_table[REGIDX_02C] | 0x100); /* MODEREG0/2 */
+	ast_moutdwm(ast, 0x1E6E0030, ddr_table[REGIDX_030]);	     /* MODEREG1/3 */
+
+	/* DDR PHY Setting */
+	ast_moutdwm(ast, 0x1E6E0200, 0x42492AAE);
+	ast_moutdwm(ast, 0x1E6E0204, 0x09002000);
+	ast_moutdwm(ast, 0x1E6E020C, 0x55E00B0B);
+	ast_moutdwm(ast, 0x1E6E0210, 0x20000000);
+	ast_moutdwm(ast, 0x1E6E0214, ddr_table[REGIDX_214]);
+	ast_moutdwm(ast, 0x1E6E02E0, ddr_table[REGIDX_2E0]);
+	ast_moutdwm(ast, 0x1E6E02E4, ddr_table[REGIDX_2E4]);
+	ast_moutdwm(ast, 0x1E6E02E8, ddr_table[REGIDX_2E8]);
+	ast_moutdwm(ast, 0x1E6E02EC, ddr_table[REGIDX_2EC]);
+	ast_moutdwm(ast, 0x1E6E02F0, ddr_table[REGIDX_2F0]);
+	ast_moutdwm(ast, 0x1E6E02F4, ddr_table[REGIDX_2F4]);
+	ast_moutdwm(ast, 0x1E6E02F8, ddr_table[REGIDX_2F8]);
+	ast_moutdwm(ast, 0x1E6E0290, 0x00100008);
+	ast_moutdwm(ast, 0x1E6E02C4, 0x3C183C3C);
+	ast_moutdwm(ast, 0x1E6E02C8, 0x00631E0E);
+
+	/* Controller Setting */
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001A991);
+
+	/* Train PHY Vref first */
+	pass = 0;
+
+	for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) {
+		max_phy_vref = 0x0;
+		pass = 0;
+		ast_moutdwm(ast, 0x1E6E02C0, 0x00001C06);
+		for (phy_vref = 0x40; phy_vref < 0x80; phy_vref++) {
+			ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E02CC, phy_vref | (phy_vref << 8));
+			/* Fire DFI Init */
+			ddr_phy_init_2500(ast);
+			ast_moutdwm(ast, 0x1E6E000C, 0x00005C01);
+			if (cbr_test_2500(ast)) {
+				pass++;
+				data = ast_mindwm(ast, 0x1E6E03D0);
+				data2 = data >> 8;
+				data  = data & 0xff;
+				if (data > data2)
+					data = data2;
+				if (max_phy_vref < data) {
+					max_phy_vref = data;
+					min_phy_vref = phy_vref;
+				}
+			} else if (pass > 0)
+				break;
+		}
+	}
+	ast_moutdwm(ast, 0x1E6E02CC, min_phy_vref | (min_phy_vref << 8));
+
+	/* Train DDR Vref next */
+	pass = 0;
+
+	for (retrycnt = 0; retrycnt < 4 && pass == 0; retrycnt++) {
+		min_ddr_vref = 0xFF;
+		max_ddr_vref = 0x0;
+		pass = 0;
+		for (ddr_vref = 0x00; ddr_vref < 0x40; ddr_vref++) {
+			ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+			ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8));
+			/* Fire DFI Init */
+			ddr_phy_init_2500(ast);
+			ast_moutdwm(ast, 0x1E6E000C, 0x00005C01);
+			if (cbr_test_2500(ast)) {
+				pass++;
+				if (min_ddr_vref > ddr_vref)
+					min_ddr_vref = ddr_vref;
+				if (max_ddr_vref < ddr_vref)
+					max_ddr_vref = ddr_vref;
+			} else if (pass != 0)
+				break;
+		}
+	}
+
+	ast_moutdwm(ast, 0x1E6E000C, 0x00000000);
+	ast_moutdwm(ast, 0x1E6E0060, 0x00000000);
+	ddr_vref = (min_ddr_vref + max_ddr_vref + 1) >> 1;
+	ast_moutdwm(ast, 0x1E6E02C0, 0x00000006 | (ddr_vref << 8));
+
+	/* Wait DDR PHY init done */
+	ddr_phy_init_2500(ast);
+
+	ast_moutdwm(ast, 0x1E6E0120, ddr_table[REGIDX_PLL]);
+	ast_moutdwm(ast, 0x1E6E000C, 0x42AA5C81);
+	ast_moutdwm(ast, 0x1E6E0034, 0x0001AF93);
+
+	check_dram_size_2500(ast, ddr_table[REGIDX_RFC]);
+	enable_cache_2500(ast);
+	ast_moutdwm(ast, 0x1E6E001C, 0x00000008);
+	ast_moutdwm(ast, 0x1E6E0038, 0xFFFFFF00);
+}
+
+static bool ast_dram_init_2500(struct ast_private *ast)
+{
+	u32 data;
+	u32 max_tries = 5;
+
+	do {
+		if (max_tries-- == 0)
+			return false;
+		set_mpll_2500(ast);
+		reset_mmc_2500(ast);
+		ddr_init_common_2500(ast);
+
+		data = ast_mindwm(ast, 0x1E6E2070);
+		if (data & 0x01000000)
+			ddr4_init_2500(ast, ast2500_ddr4_1600_timing_table);
+		else
+			ddr3_init_2500(ast, ast2500_ddr3_1600_timing_table);
+	} while (!ddr_test_2500(ast));
+
+	ast_moutdwm(ast, 0x1E6E2040, ast_mindwm(ast, 0x1E6E2040) | 0x41);
+
+	/* Patch code */
+	data = ast_mindwm(ast, 0x1E6E200C) & 0xF9FFFFFF;
+	ast_moutdwm(ast, 0x1E6E200C, data | 0x10000000);
+
+	return true;
+}
+
+void ast_post_chip_2500(struct drm_device *dev)
+{
+	struct ast_private *ast = dev->dev_private;
+	u32 temp;
+	u8 reg;
+
+	reg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	if ((reg & 0x80) == 0) {/* vga only */
+		/* Clear bus lock condition */
+		ast_moutdwm(ast, 0x1e600000, 0xAEED1A03);
+		ast_moutdwm(ast, 0x1e600084, 0x00010000);
+		ast_moutdwm(ast, 0x1e600088, 0x00000000);
+		ast_moutdwm(ast, 0x1e6e2000, 0x1688A8A8);
+		ast_write32(ast, 0xf004, 0x1e6e0000);
+		ast_write32(ast, 0xf000, 0x1);
+		ast_write32(ast, 0x12000, 0x1688a8a8);
+		while (ast_read32(ast, 0x12000) != 0x1)
+			;
+
+		ast_write32(ast, 0x10000, 0xfc600309);
+		while (ast_read32(ast, 0x10000) != 0x1)
+			;
+
+		/* Slow down CPU/AHB CLK in VGA only mode */
+		temp = ast_read32(ast, 0x12008);
+		temp |= 0x73;
+		ast_write32(ast, 0x12008, temp);
+
+		/* Reset USB port to patch USB unknown device issue */
+		ast_moutdwm(ast, 0x1e6e2090, 0x20000000);
+		temp  = ast_mindwm(ast, 0x1e6e2094);
+		temp |= 0x00004000;
+		ast_moutdwm(ast, 0x1e6e2094, temp);
+		temp  = ast_mindwm(ast, 0x1e6e2070);
+		if (temp & 0x00800000) {
+			ast_moutdwm(ast, 0x1e6e207c, 0x00800000);
+			mdelay(100);
+			ast_moutdwm(ast, 0x1e6e2070, 0x00800000);
+		}
+
+		if (!ast_dram_init_2500(ast))
+			DRM_ERROR("DRAM init failed !\n");
+
+		temp = ast_mindwm(ast, 0x1e6e2040);
+		ast_moutdwm(ast, 0x1e6e2040, temp | 0x40);
+	}
+
+	/* wait ready */
+	do {
+		reg = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xd0, 0xff);
+	} while ((reg & 0x40) == 0);
+}
diff --git a/drivers/gpu/drm/ast/ast_tables.h b/drivers/gpu/drm/ast/ast_tables.h
index 3608d5aa7451..5f4c2e833a65 100644
--- a/drivers/gpu/drm/ast/ast_tables.h
+++ b/drivers/gpu/drm/ast/ast_tables.h
@@ -47,6 +47,7 @@
 #define SyncPN			(PVSync | NHSync)
 #define SyncNP			(NVSync | PHSync)
 #define SyncNN			(NVSync | NHSync)
+#define AST2500PreCatchCRT		0x00004000
 
 /* DCLK Index */
 #define VCLK25_175     		0x00
@@ -78,37 +79,67 @@
 #define VCLK97_75     		0x19
 #define VCLK118_25			0x1A
 
-static struct ast_vbios_dclk_info dclk_table[] = {
-	{0x2C, 0xE7, 0x03},					/* 00: VCLK25_175	*/
-	{0x95, 0x62, 0x03},				        /* 01: VCLK28_322	*/
-	{0x67, 0x63, 0x01},				        /* 02: VCLK31_5         */
-	{0x76, 0x63, 0x01},				        /* 03: VCLK36         	*/
-	{0xEE, 0x67, 0x01},				        /* 04: VCLK40          	*/
-	{0x82, 0x62, 0x01}, 			        /* 05: VCLK49_5        	*/
-	{0xC6, 0x64, 0x01},                        	        /* 06: VCLK50          	*/
-	{0x94, 0x62, 0x01},                        	        /* 07: VCLK56_25       	*/
-	{0x80, 0x64, 0x00},                        	        /* 08: VCLK65		*/
-	{0x7B, 0x63, 0x00},                        	        /* 09: VCLK75	        */
-	{0x67, 0x62, 0x00},				        /* 0A: VCLK78_75       	*/
-	{0x7C, 0x62, 0x00},                        	        /* 0B: VCLK94_5        	*/
-	{0x8E, 0x62, 0x00},                        	        /* 0C: VCLK108         	*/
-	{0x85, 0x24, 0x00},                        	        /* 0D: VCLK135         	*/
-	{0x67, 0x22, 0x00},                        	        /* 0E: VCLK157_5       	*/
-	{0x6A, 0x22, 0x00},				        /* 0F: VCLK162         	*/
-	{0x4d, 0x4c, 0x80},				        /* 10: VCLK154      	*/
-	{0xa7, 0x78, 0x80},					/* 11: VCLK83.5         */
-	{0x28, 0x49, 0x80},					/* 12: VCLK106.5        */
-	{0x37, 0x49, 0x80},					/* 13: VCLK146.25       */
-	{0x1f, 0x45, 0x80},					/* 14: VCLK148.5        */
-	{0x47, 0x6c, 0x80},					/* 15: VCLK71       */
-	{0x25, 0x65, 0x80},					/* 16: VCLK88.75    */
-	{0x77, 0x58, 0x80},					/* 17: VCLK119      */
-	{0x32, 0x67, 0x80},				    /* 18: VCLK85_5     */
-	{0x6a, 0x6d, 0x80},					/* 19: VCLK97_75	*/
-	{0x3b, 0x2c, 0x81},					/* 1A: VCLK118_25	*/
+static const struct ast_vbios_dclk_info dclk_table[] = {
+	{0x2C, 0xE7, 0x03},			/* 00: VCLK25_175	*/
+	{0x95, 0x62, 0x03},			/* 01: VCLK28_322	*/
+	{0x67, 0x63, 0x01},			/* 02: VCLK31_5		*/
+	{0x76, 0x63, 0x01},			/* 03: VCLK36		*/
+	{0xEE, 0x67, 0x01},			/* 04: VCLK40		*/
+	{0x82, 0x62, 0x01},			/* 05: VCLK49_5		*/
+	{0xC6, 0x64, 0x01},			/* 06: VCLK50		*/
+	{0x94, 0x62, 0x01},			/* 07: VCLK56_25	*/
+	{0x80, 0x64, 0x00},			/* 08: VCLK65		*/
+	{0x7B, 0x63, 0x00},			/* 09: VCLK75		*/
+	{0x67, 0x62, 0x00},			/* 0A: VCLK78_75	*/
+	{0x7C, 0x62, 0x00},			/* 0B: VCLK94_5		*/
+	{0x8E, 0x62, 0x00},			/* 0C: VCLK108		*/
+	{0x85, 0x24, 0x00},			/* 0D: VCLK135		*/
+	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
+	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
+	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
+	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
+	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
+	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
+	{0x47, 0x6c, 0x80},			/* 15: VCLK71		*/
+	{0x25, 0x65, 0x80},			/* 16: VCLK88.75	*/
+	{0x77, 0x58, 0x80},			/* 17: VCLK119		*/
+	{0x32, 0x67, 0x80},			/* 18: VCLK85_5		*/
+	{0x6a, 0x6d, 0x80},			/* 19: VCLK97_75	*/
+	{0x3b, 0x2c, 0x81},			/* 1A: VCLK118_25	*/
 };
 
-static struct ast_vbios_stdtable vbios_stdtable[] = {
+static const struct ast_vbios_dclk_info dclk_table_ast2500[] = {
+	{0x2C, 0xE7, 0x03},			/* 00: VCLK25_175	*/
+	{0x95, 0x62, 0x03},			/* 01: VCLK28_322	*/
+	{0x67, 0x63, 0x01},			/* 02: VCLK31_5		*/
+	{0x76, 0x63, 0x01},			/* 03: VCLK36		*/
+	{0xEE, 0x67, 0x01},			/* 04: VCLK40		*/
+	{0x82, 0x62, 0x01},			/* 05: VCLK49_5		*/
+	{0xC6, 0x64, 0x01},			/* 06: VCLK50		*/
+	{0x94, 0x62, 0x01},			/* 07: VCLK56_25	*/
+	{0x80, 0x64, 0x00},			/* 08: VCLK65		*/
+	{0x7B, 0x63, 0x00},			/* 09: VCLK75		*/
+	{0x67, 0x62, 0x00},			/* 0A: VCLK78_75	*/
+	{0x7C, 0x62, 0x00},			/* 0B: VCLK94_5		*/
+	{0x8E, 0x62, 0x00},			/* 0C: VCLK108		*/
+	{0x85, 0x24, 0x00},			/* 0D: VCLK135		*/
+	{0x67, 0x22, 0x00},			/* 0E: VCLK157_5	*/
+	{0x6A, 0x22, 0x00},			/* 0F: VCLK162		*/
+	{0x4d, 0x4c, 0x80},			/* 10: VCLK154		*/
+	{0xa7, 0x78, 0x80},			/* 11: VCLK83.5		*/
+	{0x28, 0x49, 0x80},			/* 12: VCLK106.5	*/
+	{0x37, 0x49, 0x80},			/* 13: VCLK146.25	*/
+	{0x1f, 0x45, 0x80},			/* 14: VCLK148.5	*/
+	{0x47, 0x6c, 0x80},			/* 15: VCLK71		*/
+	{0x25, 0x65, 0x80},			/* 16: VCLK88.75	*/
+	{0x58, 0x01, 0x42},			/* 17: VCLK119		*/
+	{0x32, 0x67, 0x80},			/* 18: VCLK85_5		*/
+	{0x6a, 0x6d, 0x80},			/* 19: VCLK97_75	*/
+	{0x44, 0x20, 0x43},			/* 1A: VCLK118_25	*/
+};
+
+static const struct ast_vbios_stdtable vbios_stdtable[] = {
 	/* MD_2_3_400 */
 	{
 		0x67,
@@ -181,21 +212,21 @@ static struct ast_vbios_stdtable vbios_stdtable[] = {
 	},
 };
 
-static struct ast_vbios_enhtable res_640x480[] = {
+static const struct ast_vbios_enhtable res_640x480[] = {
 	{ 800, 640, 8, 96, 525, 480, 2, 2, VCLK25_175,	/* 60Hz */
 	  (SyncNN | HBorder | VBorder | Charx8Dot), 60, 1, 0x2E },
 	{ 832, 640, 16, 40, 520, 480, 1, 3, VCLK31_5,	/* 72Hz */
 	  (SyncNN | HBorder | VBorder | Charx8Dot), 72, 2, 0x2E  },
 	{ 840, 640, 16, 64, 500, 480, 1, 3, VCLK31_5,	/* 75Hz */
 	  (SyncNN | Charx8Dot) , 75, 3, 0x2E },
-	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,		/* 85Hz */
+	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,	/* 85Hz */
 	  (SyncNN | Charx8Dot) , 85, 4, 0x2E },
-	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,		/* end */
+	{ 832, 640, 56, 56, 509, 480, 1, 3, VCLK36,	/* end */
 	  (SyncNN | Charx8Dot) , 0xFF, 4, 0x2E },
 };
 
-static struct ast_vbios_enhtable res_800x600[] = {
-	{1024, 800, 24, 72, 625, 600, 1, 2, VCLK36,		/* 56Hz */
+static const struct ast_vbios_enhtable res_800x600[] = {
+	{1024, 800, 24, 72, 625, 600, 1, 2, VCLK36,	/* 56Hz */
 	 (SyncPP | Charx8Dot), 56, 1, 0x30 },
 	{1056, 800, 40, 128, 628, 600, 1, 4, VCLK40,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 2, 0x30 },
@@ -210,7 +241,7 @@ static struct ast_vbios_enhtable res_800x600[] = {
 };
 
 
-static struct ast_vbios_enhtable res_1024x768[] = {
+static const struct ast_vbios_enhtable res_1024x768[] = {
 	{1344, 1024, 24, 136, 806, 768, 3, 6, VCLK65,	/* 60Hz */
 	 (SyncNN | Charx8Dot), 60, 1, 0x31 },
 	{1328, 1024, 24, 136, 806, 768, 3, 6, VCLK75,	/* 70Hz */
@@ -223,7 +254,7 @@ static struct ast_vbios_enhtable res_1024x768[] = {
 	 (SyncPP | Charx8Dot), 0xFF, 4, 0x31 },
 };
 
-static struct ast_vbios_enhtable res_1280x1024[] = {
+static const struct ast_vbios_enhtable res_1280x1024[] = {
 	{1688, 1280, 48, 112, 1066, 1024, 1, 3, VCLK108,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 1, 0x32 },
 	{1688, 1280, 16, 144, 1066, 1024, 1, 3, VCLK135,	/* 75Hz */
@@ -234,7 +265,7 @@ static struct ast_vbios_enhtable res_1280x1024[] = {
 	 (SyncPP | Charx8Dot), 0xFF, 3, 0x32 },
 };
 
-static struct ast_vbios_enhtable res_1600x1200[] = {
+static const struct ast_vbios_enhtable res_1600x1200[] = {
 	{2160, 1600, 64, 192, 1250, 1200, 1, 3, VCLK162,	/* 60Hz */
 	 (SyncPP | Charx8Dot), 60, 1, 0x33 },
 	{2160, 1600, 64, 192, 1250, 1200, 1, 3, VCLK162,	/* end */
@@ -242,34 +273,39 @@ static struct ast_vbios_enhtable res_1600x1200[] = {
 };
 
 /* 16:9 */
-static struct ast_vbios_enhtable res_1360x768[] = {
-	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* 60Hz */
+static const struct ast_vbios_enhtable res_1360x768[] = {
+	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,		/* 60Hz */
 	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x39 },
-	{1792, 1360, 64,112, 795,  768, 3, 6, VCLK85_5,	         /* end */
-	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x39 },
+	{1792, 1360, 64, 112, 795, 768, 3, 6, VCLK85_5,	         /* end */
+	 (SyncPP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x39 },
 };
 
-static struct ast_vbios_enhtable res_1600x900[] = {
-	{1760, 1600, 48, 32, 926,  900, 3, 5, VCLK97_75,	/* 60Hz CVT RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x3A },
-	{2112, 1600, 88,168, 934,  900, 3, 5, VCLK118_25,	/* 60Hz CVT */
+static const struct ast_vbios_enhtable res_1600x900[] = {
+	{1760, 1600, 48, 32, 926, 900, 3, 5, VCLK97_75,		/* 60Hz CVT RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x3A },
+	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x3A },
-	{2112, 1600, 88,168, 934,  900, 3, 5, VCLK118_25,	/* 60Hz CVT */
+	{2112, 1600, 88, 168, 934, 900, 3, 5, VCLK118_25,	/* 60Hz CVT */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x3A },
 };
 
-static struct ast_vbios_enhtable res_1920x1080[] = {
+static const struct ast_vbios_enhtable res_1920x1080[] = {
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x38 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x38 },
 	{2200, 1920, 88, 44, 1125, 1080, 4, 5, VCLK148_5,	/* 60Hz */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x38 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x38 },
 };
 
 
 /* 16:10 */
-static struct ast_vbios_enhtable res_1280x800[] = {
-	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,	/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x35 },
+static const struct ast_vbios_enhtable res_1280x800[] = {
+	{1440, 1280, 48, 32,  823,  800, 3, 6, VCLK71,		/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x35 },
 	{1680, 1280, 72,128,  831,  800, 3, 6, VCLK83_5,	/* 60Hz */
@@ -277,29 +313,33 @@ static struct ast_vbios_enhtable res_1280x800[] = {
 
 };
 
-static struct ast_vbios_enhtable res_1440x900[] = {
+static const struct ast_vbios_enhtable res_1440x900[] = {
 	{1600, 1440, 48, 32,  926,  900, 3, 6, VCLK88_75,	/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x36 },
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x36 },
 	{1904, 1440, 80,152,  934,  900, 3, 6, VCLK106_5,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x36 },
 };
 
-static struct ast_vbios_enhtable res_1680x1050[] = {
-	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,	/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x37 },
+static const struct ast_vbios_enhtable res_1680x1050[] = {
+	{1840, 1680, 48, 32, 1080, 1050, 3, 6, VCLK119,		/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 2, 0x37 },
 	{2240, 1680,104,176, 1089, 1050, 3, 6, VCLK146_25,	/* 60Hz */
 	 (SyncPN | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 2, 0x37 },
 };
 
-static struct ast_vbios_enhtable res_1920x1200[] = {
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz RB*/
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 60, 1, 0x34 },
-	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,	/* 60Hz RB */
-	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo), 0xFF, 1, 0x34 },
+static const struct ast_vbios_enhtable res_1920x1200[] = {
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB*/
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 60, 1, 0x34 },
+	{2080, 1920, 48, 32, 1235, 1200, 3, 6, VCLK154,		/* 60Hz RB */
+	 (SyncNP | Charx8Dot | LineCompareOff | WideScreenMode | NewModeInfo |
+	  AST2500PreCatchCRT), 0xFF, 1, 0x34 },
 };
 
 #endif
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
index 0bf32d6ac39b..427bdff425c2 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_dc.c
@@ -647,7 +647,6 @@ static int atmel_hlcdc_dc_load(struct drm_device *dev)
 	platform_set_drvdata(pdev, dev);
 
 	dc->fbdev = drm_fbdev_cma_init(dev, 24,
-			dev->mode_config.num_crtc,
 			dev->mode_config.num_connector);
 	if (IS_ERR(dc->fbdev))
 		dc->fbdev = NULL;
diff --git a/drivers/gpu/drm/bochs/bochs_drv.c b/drivers/gpu/drm/bochs/bochs_drv.c
index 8a12b3f6fc66..aa342515ddf4 100644
--- a/drivers/gpu/drm/bochs/bochs_drv.c
+++ b/drivers/gpu/drm/bochs/bochs_drv.c
@@ -12,6 +12,10 @@
 
 #include "bochs.h"
 
+static int bochs_modeset = -1;
+module_param_named(modeset, bochs_modeset, int, 0444);
+MODULE_PARM_DESC(modeset, "enable/disable kernel modesetting");
+
 static bool enable_fbdev = true;
 module_param_named(fbdev, enable_fbdev, bool, 0444);
 MODULE_PARM_DESC(fbdev, "register fbdev device");
@@ -214,6 +218,12 @@ static struct pci_driver bochs_pci_driver = {
 
 static int __init bochs_init(void)
 {
+	if (vgacon_text_force() && bochs_modeset == -1)
+		return -EINVAL;
+
+	if (bochs_modeset == 0)
+		return -EINVAL;
+
 	return drm_pci_init(&bochs_driver, &bochs_pci_driver);
 }
 
diff --git a/drivers/gpu/drm/bochs/bochs_fbdev.c b/drivers/gpu/drm/bochs/bochs_fbdev.c
index 0317c3df6a22..932a769637ef 100644
--- a/drivers/gpu/drm/bochs/bochs_fbdev.c
+++ b/drivers/gpu/drm/bochs/bochs_fbdev.c
@@ -169,8 +169,7 @@ int bochs_fbdev_init(struct bochs_device *bochs)
 	drm_fb_helper_prepare(bochs->dev, &bochs->fb.helper,
 			      &bochs_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(bochs->dev, &bochs->fb.helper,
-				 1, 1);
+	ret = drm_fb_helper_init(bochs->dev, &bochs->fb.helper, 1);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/bridge/sil-sii8620.c b/drivers/gpu/drm/bridge/sil-sii8620.c
index b2c267df7ee7..cdd0a9d44ba1 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.c
+++ b/drivers/gpu/drm/bridge/sil-sii8620.c
@@ -9,6 +9,8 @@
  * published by the Free Software Foundation.
  */
 
+#include <asm/unaligned.h>
+
 #include <drm/bridge/mhl.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
@@ -28,7 +30,10 @@
 
 #include "sil-sii8620.h"
 
-#define VAL_RX_HDMI_CTRL2_DEFVAL	VAL_RX_HDMI_CTRL2_IDLE_CNT(3)
+#define SII8620_BURST_BUF_LEN 288
+#define VAL_RX_HDMI_CTRL2_DEFVAL VAL_RX_HDMI_CTRL2_IDLE_CNT(3)
+#define MHL1_MAX_LCLK 225000
+#define MHL3_MAX_LCLK 600000
 
 enum sii8620_mode {
 	CM_DISCONNECTED,
@@ -59,6 +64,9 @@ struct sii8620 {
 	struct regulator_bulk_data supplies[2];
 	struct mutex lock; /* context lock, protects fields below */
 	int error;
+	int pixel_clock;
+	unsigned int use_packed_pixel:1;
+	int video_code;
 	enum sii8620_mode mode;
 	enum sii8620_sink_type sink_type;
 	u8 cbus_status;
@@ -66,11 +74,20 @@ struct sii8620 {
 	u8 xstat[MHL_XDS_SIZE];
 	u8 devcap[MHL_DCAP_SIZE];
 	u8 xdevcap[MHL_XDC_SIZE];
-	u8 avif[19];
+	u8 avif[HDMI_INFOFRAME_SIZE(AVI)];
 	struct edid *edid;
 	unsigned int gen2_write_burst:1;
 	enum sii8620_mt_state mt_state;
 	struct list_head mt_queue;
+	struct {
+		int r_size;
+		int r_count;
+		int rx_ack;
+		int rx_count;
+		u8 rx_buf[32];
+		int tx_count;
+		u8 tx_buf[32];
+	} burst;
 };
 
 struct sii8620_mt_msg;
@@ -78,12 +95,15 @@ struct sii8620_mt_msg;
 typedef void (*sii8620_mt_msg_cb)(struct sii8620 *ctx,
 				  struct sii8620_mt_msg *msg);
 
+typedef void (*sii8620_cb)(struct sii8620 *ctx, int ret);
+
 struct sii8620_mt_msg {
 	struct list_head node;
 	u8 reg[4];
 	u8 ret;
 	sii8620_mt_msg_cb send;
 	sii8620_mt_msg_cb recv;
+	sii8620_cb continuation;
 };
 
 static const u8 sii8620_i2c_page[] = {
@@ -101,6 +121,7 @@ static void sii8620_fetch_edid(struct sii8620 *ctx);
 static void sii8620_set_upstream_edid(struct sii8620 *ctx);
 static void sii8620_enable_hpd(struct sii8620 *ctx);
 static void sii8620_mhl_disconnected(struct sii8620 *ctx);
+static void sii8620_disconnect(struct sii8620 *ctx);
 
 static int sii8620_clear_error(struct sii8620 *ctx)
 {
@@ -227,6 +248,11 @@ static void sii8620_setbits(struct sii8620 *ctx, u16 addr, u8 mask, u8 val)
 	sii8620_write(ctx, addr, val);
 }
 
+static inline bool sii8620_is_mhl3(struct sii8620 *ctx)
+{
+	return ctx->mode >= CM_MHL3;
+}
+
 static void sii8620_mt_cleanup(struct sii8620 *ctx)
 {
 	struct sii8620_mt_msg *msg, *n;
@@ -251,9 +277,11 @@ static void sii8620_mt_work(struct sii8620 *ctx)
 		ctx->mt_state = MT_STATE_READY;
 		msg = list_first_entry(&ctx->mt_queue, struct sii8620_mt_msg,
 				       node);
+		list_del(&msg->node);
 		if (msg->recv)
 			msg->recv(ctx, msg);
-		list_del(&msg->node);
+		if (msg->continuation)
+			msg->continuation(ctx, msg->ret);
 		kfree(msg);
 	}
 
@@ -266,9 +294,59 @@ static void sii8620_mt_work(struct sii8620 *ctx)
 		msg->send(ctx, msg);
 }
 
+static void sii8620_enable_gen2_write_burst(struct sii8620 *ctx)
+{
+	u8 ctrl = BIT_MDT_RCV_CTRL_MDT_RCV_EN;
+
+	if (ctx->gen2_write_burst)
+		return;
+
+	if (ctx->mode >= CM_MHL1)
+		ctrl |= BIT_MDT_RCV_CTRL_MDT_DELAY_RCV_EN;
+
+	sii8620_write_seq(ctx,
+		REG_MDT_RCV_TIMEOUT, 100,
+		REG_MDT_RCV_CTRL, ctrl
+	);
+	ctx->gen2_write_burst = 1;
+}
+
+static void sii8620_disable_gen2_write_burst(struct sii8620 *ctx)
+{
+	if (!ctx->gen2_write_burst)
+		return;
+
+	sii8620_write_seq_static(ctx,
+		REG_MDT_XMIT_CTRL, 0,
+		REG_MDT_RCV_CTRL, 0
+	);
+	ctx->gen2_write_burst = 0;
+}
+
+static void sii8620_start_gen2_write_burst(struct sii8620 *ctx)
+{
+	sii8620_write_seq_static(ctx,
+		REG_MDT_INT_1_MASK, BIT_MDT_RCV_TIMEOUT
+			| BIT_MDT_RCV_SM_ABORT_PKT_RCVD | BIT_MDT_RCV_SM_ERROR
+			| BIT_MDT_XMIT_TIMEOUT | BIT_MDT_XMIT_SM_ABORT_PKT_RCVD
+			| BIT_MDT_XMIT_SM_ERROR,
+		REG_MDT_INT_0_MASK, BIT_MDT_XFIFO_EMPTY
+			| BIT_MDT_IDLE_AFTER_HAWB_DISABLE
+			| BIT_MDT_RFIFO_DATA_RDY
+	);
+	sii8620_enable_gen2_write_burst(ctx);
+}
+
 static void sii8620_mt_msc_cmd_send(struct sii8620 *ctx,
 				    struct sii8620_mt_msg *msg)
 {
+	if (msg->reg[0] == MHL_SET_INT &&
+	    msg->reg[1] == MHL_INT_REG(RCHANGE) &&
+	    msg->reg[2] == MHL_INT_RC_FEAT_REQ)
+		sii8620_enable_gen2_write_burst(ctx);
+	else
+		sii8620_disable_gen2_write_burst(ctx);
+
 	switch (msg->reg[0]) {
 	case MHL_WRITE_STAT:
 	case MHL_SET_INT:
@@ -281,6 +359,12 @@ static void sii8620_mt_msc_cmd_send(struct sii8620 *ctx,
 		sii8620_write(ctx, REG_MSC_COMMAND_START,
 			      BIT_MSC_COMMAND_START_MSC_MSG);
 		break;
+	case MHL_READ_DEVCAP_REG:
+	case MHL_READ_XDEVCAP_REG:
+		sii8620_write(ctx, REG_MSC_CMD_OR_OFFSET, msg->reg[1]);
+		sii8620_write(ctx, REG_MSC_COMMAND_START,
+			      BIT_MSC_COMMAND_START_READ_DEVCAP);
+		break;
 	default:
 		dev_err(ctx->dev, "%s: command %#x not supported\n", __func__,
 			msg->reg[0]);
@@ -299,6 +383,21 @@ static struct sii8620_mt_msg *sii8620_mt_msg_new(struct sii8620 *ctx)
 	return msg;
 }
 
+static void sii8620_mt_set_cont(struct sii8620 *ctx, sii8620_cb cont)
+{
+	struct sii8620_mt_msg *msg;
+
+	if (ctx->error)
+		return;
+
+	if (list_empty(&ctx->mt_queue)) {
+		ctx->error = -EINVAL;
+		return;
+	}
+	msg = list_last_entry(&ctx->mt_queue, struct sii8620_mt_msg, node);
+	msg->continuation = cont;
+}
+
 static void sii8620_mt_msc_cmd(struct sii8620 *ctx, u8 cmd, u8 arg1, u8 arg2)
 {
 	struct sii8620_mt_msg *msg = sii8620_mt_msg_new(ctx);
@@ -358,7 +457,7 @@ static void sii8620_update_array(u8 *dst, u8 *src, int count)
 	}
 }
 
-static void sii8620_mr_devcap(struct sii8620 *ctx)
+static void sii8620_sink_detected(struct sii8620 *ctx, int ret)
 {
 	static const char * const sink_str[] = {
 		[SINK_NONE] = "NONE",
@@ -366,23 +465,10 @@ static void sii8620_mr_devcap(struct sii8620 *ctx)
 		[SINK_DVI] = "DVI"
 	};
 
-	u8 dcap[MHL_DCAP_SIZE];
 	char sink_name[20];
 	struct device *dev = ctx->dev;
 
-	sii8620_read_buf(ctx, REG_EDID_FIFO_RD_DATA, dcap, MHL_DCAP_SIZE);
-	if (ctx->error < 0)
-		return;
-
-	dev_info(dev, "dcap: %*ph\n", MHL_DCAP_SIZE, dcap);
-	dev_info(dev, "detected dongle MHL %d.%d, ChipID %02x%02x:%02x%02x\n",
-		 dcap[MHL_DCAP_MHL_VERSION] / 16,
-		 dcap[MHL_DCAP_MHL_VERSION] % 16, dcap[MHL_DCAP_ADOPTER_ID_H],
-		 dcap[MHL_DCAP_ADOPTER_ID_L], dcap[MHL_DCAP_DEVICE_ID_H],
-		 dcap[MHL_DCAP_DEVICE_ID_L]);
-	sii8620_update_array(ctx->devcap, dcap, MHL_DCAP_SIZE);
-
-	if (!(dcap[MHL_DCAP_CAT] & MHL_DCAP_CAT_SINK))
+	if (ret < 0)
 		return;
 
 	sii8620_fetch_edid(ctx);
@@ -401,18 +487,76 @@ static void sii8620_mr_devcap(struct sii8620 *ctx)
 
 	dev_info(dev, "detected sink(type: %s): %s\n",
 		 sink_str[ctx->sink_type], sink_name);
+}
+
+static void sii8620_hsic_init(struct sii8620 *ctx)
+{
+	if (!sii8620_is_mhl3(ctx))
+		return;
+
+	sii8620_write(ctx, REG_FCGC,
+		BIT_FCGC_HSIC_HOSTMODE | BIT_FCGC_HSIC_ENABLE);
+	sii8620_setbits(ctx, REG_HRXCTRL3,
+		BIT_HRXCTRL3_HRX_STAY_RESET | BIT_HRXCTRL3_STATUS_EN, ~0);
+	sii8620_setbits(ctx, REG_TTXNUMB, MSK_TTXNUMB_TTX_NUMBPS, 4);
+	sii8620_setbits(ctx, REG_TRXCTRL, BIT_TRXCTRL_TRX_FROM_SE_COC, ~0);
+	sii8620_setbits(ctx, REG_HTXCTRL, BIT_HTXCTRL_HTX_DRVCONN1, 0);
+	sii8620_setbits(ctx, REG_KEEPER, MSK_KEEPER_MODE, VAL_KEEPER_MODE_HOST);
+	sii8620_write_seq_static(ctx,
+		REG_TDMLLCTL, 0,
+		REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST |
+			BIT_UTSRST_KEEPER_SRST | BIT_UTSRST_FC_SRST,
+		REG_UTSRST, BIT_UTSRST_HRX_SRST | BIT_UTSRST_HTX_SRST,
+		REG_HRXINTL, 0xff,
+		REG_HRXINTH, 0xff,
+		REG_TTXINTL, 0xff,
+		REG_TTXINTH, 0xff,
+		REG_TRXINTL, 0xff,
+		REG_TRXINTH, 0xff,
+		REG_HTXINTL, 0xff,
+		REG_HTXINTH, 0xff,
+		REG_FCINTR0, 0xff,
+		REG_FCINTR1, 0xff,
+		REG_FCINTR2, 0xff,
+		REG_FCINTR3, 0xff,
+		REG_FCINTR4, 0xff,
+		REG_FCINTR5, 0xff,
+		REG_FCINTR6, 0xff,
+		REG_FCINTR7, 0xff
+	);
+}
+
+static void sii8620_edid_read(struct sii8620 *ctx, int ret)
+{
+	if (ret < 0)
+		return;
+
 	sii8620_set_upstream_edid(ctx);
+	sii8620_hsic_init(ctx);
 	sii8620_enable_hpd(ctx);
 }
 
+static void sii8620_mr_devcap(struct sii8620 *ctx)
+{
+	u8 dcap[MHL_DCAP_SIZE];
+	struct device *dev = ctx->dev;
+
+	sii8620_read_buf(ctx, REG_EDID_FIFO_RD_DATA, dcap, MHL_DCAP_SIZE);
+	if (ctx->error < 0)
+		return;
+
+	dev_info(dev, "detected dongle MHL %d.%d, ChipID %02x%02x:%02x%02x\n",
+		 dcap[MHL_DCAP_MHL_VERSION] / 16,
+		 dcap[MHL_DCAP_MHL_VERSION] % 16,
+		 dcap[MHL_DCAP_ADOPTER_ID_H], dcap[MHL_DCAP_ADOPTER_ID_L],
+		 dcap[MHL_DCAP_DEVICE_ID_H], dcap[MHL_DCAP_DEVICE_ID_L]);
+	sii8620_update_array(ctx->devcap, dcap, MHL_DCAP_SIZE);
+}
+
 static void sii8620_mr_xdevcap(struct sii8620 *ctx)
 {
 	sii8620_read_buf(ctx, REG_EDID_FIFO_RD_DATA, ctx->xdevcap,
 			 MHL_XDC_SIZE);
-
-	sii8620_mt_write_stat(ctx, MHL_XDS_REG(CURR_ECBUS_MODE),
-			      MHL_XDS_ECBUS_S | MHL_XDS_SLOT_MODE_8BIT);
-	sii8620_mt_rap(ctx, MHL_RAP_CBUS_MODE_UP);
 }
 
 static void sii8620_mt_read_devcap_recv(struct sii8620 *ctx,
@@ -450,6 +594,197 @@ static void sii8620_mt_read_devcap(struct sii8620 *ctx, bool xdevcap)
 	msg->recv = sii8620_mt_read_devcap_recv;
 }
 
+static void sii8620_mt_read_devcap_reg_recv(struct sii8620 *ctx,
+		struct sii8620_mt_msg *msg)
+{
+	u8 reg = msg->reg[0] & 0x7f;
+
+	if (msg->reg[0] & 0x80)
+		ctx->xdevcap[reg] = msg->ret;
+	else
+		ctx->devcap[reg] = msg->ret;
+}
+
+static void sii8620_mt_read_devcap_reg(struct sii8620 *ctx, u8 reg)
+{
+	struct sii8620_mt_msg *msg = sii8620_mt_msg_new(ctx);
+
+	if (!msg)
+		return;
+
+	msg->reg[0] = (reg & 0x80) ? MHL_READ_XDEVCAP_REG : MHL_READ_DEVCAP_REG;
+	msg->reg[1] = reg;
+	msg->send = sii8620_mt_msc_cmd_send;
+	msg->recv = sii8620_mt_read_devcap_reg_recv;
+}
+
+static inline void sii8620_mt_read_xdevcap_reg(struct sii8620 *ctx, u8 reg)
+{
+	sii8620_mt_read_devcap_reg(ctx, reg | 0x80);
+}
+
+static void *sii8620_burst_get_tx_buf(struct sii8620 *ctx, int len)
+{
+	u8 *buf = &ctx->burst.tx_buf[ctx->burst.tx_count];
+	int size = len + 2;
+
+	if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) {
+		dev_err(ctx->dev, "TX-BLK buffer exhausted\n");
+		ctx->error = -EINVAL;
+		return NULL;
+	}
+
+	ctx->burst.tx_count += size;
+	buf[1] = len;
+
+	return buf + 2;
+}
+
+static u8 *sii8620_burst_get_rx_buf(struct sii8620 *ctx, int len)
+{
+	u8 *buf = &ctx->burst.rx_buf[ctx->burst.rx_count];
+	int size = len + 1;
+
+	if (ctx->burst.tx_count + size > ARRAY_SIZE(ctx->burst.tx_buf)) {
+		dev_err(ctx->dev, "RX-BLK buffer exhausted\n");
+		ctx->error = -EINVAL;
+		return NULL;
+	}
+
+	ctx->burst.rx_count += size;
+	buf[0] = len;
+
+	return buf + 1;
+}
+
+static void sii8620_burst_send(struct sii8620 *ctx)
+{
+	int tx_left = ctx->burst.tx_count;
+	u8 *d = ctx->burst.tx_buf;
+
+	while (tx_left > 0) {
+		int len = d[1] + 2;
+
+		if (ctx->burst.r_count + len > ctx->burst.r_size)
+			break;
+		d[0] = min(ctx->burst.rx_ack, 255);
+		ctx->burst.rx_ack -= d[0];
+		sii8620_write_buf(ctx, REG_EMSC_XMIT_WRITE_PORT, d, len);
+		ctx->burst.r_count += len;
+		tx_left -= len;
+		d += len;
+	}
+
+	ctx->burst.tx_count = tx_left;
+
+	while (ctx->burst.rx_ack > 0) {
+		u8 b[2] = { min(ctx->burst.rx_ack, 255), 0 };
+
+		if (ctx->burst.r_count + 2 > ctx->burst.r_size)
+			break;
+		ctx->burst.rx_ack -= b[0];
+		sii8620_write_buf(ctx, REG_EMSC_XMIT_WRITE_PORT, b, 2);
+		ctx->burst.r_count += 2;
+	}
+}
+
+static void sii8620_burst_receive(struct sii8620 *ctx)
+{
+	u8 buf[3], *d;
+	int count;
+
+	sii8620_read_buf(ctx, REG_EMSCRFIFOBCNTL, buf, 2);
+	count = get_unaligned_le16(buf);
+	while (count > 0) {
+		int len = min(count, 3);
+
+		sii8620_read_buf(ctx, REG_EMSC_RCV_READ_PORT, buf, len);
+		count -= len;
+		ctx->burst.rx_ack += len - 1;
+		ctx->burst.r_count -= buf[1];
+		if (ctx->burst.r_count < 0)
+			ctx->burst.r_count = 0;
+
+		if (len < 3 || !buf[2])
+			continue;
+
+		len = buf[2];
+		d = sii8620_burst_get_rx_buf(ctx, len);
+		if (!d)
+			continue;
+		sii8620_read_buf(ctx, REG_EMSC_RCV_READ_PORT, d, len);
+		count -= len;
+		ctx->burst.rx_ack += len;
+	}
+}
+
+static void sii8620_burst_tx_rbuf_info(struct sii8620 *ctx, int size)
+{
+	struct mhl_burst_blk_rcv_buffer_info *d =
+		sii8620_burst_get_tx_buf(ctx, sizeof(*d));
+	if (!d)
+		return;
+
+	d->id = cpu_to_be16(MHL_BURST_ID_BLK_RCV_BUFFER_INFO);
+	d->size = cpu_to_le16(size);
+}
+
+static u8 sii8620_checksum(void *ptr, int size)
+{
+	u8 *d = ptr, sum = 0;
+
+	while (size--)
+		sum += *d++;
+
+	return sum;
+}
+
+static void sii8620_mhl_burst_hdr_set(struct mhl3_burst_header *h,
+	enum mhl_burst_id id)
+{
+	h->id = cpu_to_be16(id);
+	h->total_entries = 1;
+	h->sequence_index = 1;
+}
+
+static void sii8620_burst_tx_bits_per_pixel_fmt(struct sii8620 *ctx, u8 fmt)
+{
+	struct mhl_burst_bits_per_pixel_fmt *d;
+	const int size = sizeof(*d) + sizeof(d->desc[0]);
+
+	d = sii8620_burst_get_tx_buf(ctx, size);
+	if (!d)
+		return;
+
+	sii8620_mhl_burst_hdr_set(&d->hdr, MHL_BURST_ID_BITS_PER_PIXEL_FMT);
+	d->num_entries = 1;
+	d->desc[0].stream_id = 0;
+	d->desc[0].pixel_format = fmt;
+	d->hdr.checksum -= sii8620_checksum(d, size);
+}
+
+static void sii8620_burst_rx_all(struct sii8620 *ctx)
+{
+	u8 *d = ctx->burst.rx_buf;
+	int count = ctx->burst.rx_count;
+
+	while (count-- > 0) {
+		int len = *d++;
+		int id = get_unaligned_be16(&d[0]);
+
+		switch (id) {
+		case MHL_BURST_ID_BLK_RCV_BUFFER_INFO:
+			ctx->burst.r_size = get_unaligned_le16(&d[2]);
+			break;
+		default:
+			break;
+		}
+		count -= len;
+		d += len;
+	}
+	ctx->burst.rx_count = 0;
+}
+
 static void sii8620_fetch_edid(struct sii8620 *ctx)
 {
 	u8 lm_ddc, ddc_cmd, int3, cbus;
@@ -537,12 +872,12 @@ static void sii8620_fetch_edid(struct sii8620 *ctx)
 				edid = new_edid;
 			}
 		}
-
-		if (fetched + FETCH_SIZE == edid_len)
-			sii8620_write(ctx, REG_INTR3, int3);
 	}
 
-	sii8620_write(ctx, REG_LM_DDC, lm_ddc);
+	sii8620_write_seq(ctx,
+		REG_INTR3_MASK, BIT_DDC_CMD_DONE,
+		REG_LM_DDC, lm_ddc
+	);
 
 end:
 	kfree(ctx->edid);
@@ -641,11 +976,10 @@ static void sii8620_hw_reset(struct sii8620 *ctx)
 
 static void sii8620_cbus_reset(struct sii8620 *ctx)
 {
-	sii8620_write_seq_static(ctx,
-		REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST
-			| BIT_PWD_SRST_CBUS_RST_SW_EN,
-		REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST_SW_EN
-	);
+	sii8620_write(ctx, REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST
+		      | BIT_PWD_SRST_CBUS_RST_SW_EN);
+	usleep_range(10000, 20000);
+	sii8620_write(ctx, REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST_SW_EN);
 }
 
 static void sii8620_set_auto_zone(struct sii8620 *ctx)
@@ -683,48 +1017,208 @@ static void sii8620_stop_video(struct sii8620 *ctx)
 			| BIT_TPI_SC_TPI_AV_MUTE;
 		break;
 	case SINK_HDMI:
+	default:
 		val = BIT_TPI_SC_REG_TMDS_OE_POWER_DOWN
 			| BIT_TPI_SC_TPI_AV_MUTE
 			| BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI;
 		break;
-	default:
-		return;
 	}
 
 	sii8620_write(ctx, REG_TPI_SC, val);
 }
 
+static void sii8620_set_format(struct sii8620 *ctx)
+{
+	u8 out_fmt;
+
+	if (sii8620_is_mhl3(ctx)) {
+		sii8620_setbits(ctx, REG_M3_P0CTRL,
+				BIT_M3_P0CTRL_MHL3_P0_PIXEL_MODE_PACKED,
+				ctx->use_packed_pixel ? ~0 : 0);
+	} else {
+		if (ctx->use_packed_pixel)
+			sii8620_write_seq_static(ctx,
+				REG_VID_MODE, BIT_VID_MODE_M1080P,
+				REG_MHL_TOP_CTL, BIT_MHL_TOP_CTL_MHL_PP_SEL | 1,
+				REG_MHLTX_CTL6, 0x60
+			);
+		else
+			sii8620_write_seq_static(ctx,
+				REG_VID_MODE, 0,
+				REG_MHL_TOP_CTL, 1,
+				REG_MHLTX_CTL6, 0xa0
+			);
+	}
+
+	if (ctx->use_packed_pixel)
+		out_fmt = VAL_TPI_FORMAT(YCBCR422, FULL) |
+			BIT_TPI_OUTPUT_CSCMODE709;
+	else
+		out_fmt = VAL_TPI_FORMAT(RGB, FULL);
+
+	sii8620_write_seq(ctx,
+		REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL),
+		REG_TPI_OUTPUT, out_fmt,
+	);
+}
+
+static int mhl3_infoframe_init(struct mhl3_infoframe *frame)
+{
+	memset(frame, 0, sizeof(*frame));
+
+	frame->version = 3;
+	frame->hev_format = -1;
+	return 0;
+}
+
+static ssize_t mhl3_infoframe_pack(struct mhl3_infoframe *frame,
+		 void *buffer, size_t size)
+{
+	const int frm_len = HDMI_INFOFRAME_HEADER_SIZE + MHL3_INFOFRAME_SIZE;
+	u8 *ptr = buffer;
+
+	if (size < frm_len)
+		return -ENOSPC;
+
+	memset(buffer, 0, size);
+	ptr[0] = HDMI_INFOFRAME_TYPE_VENDOR;
+	ptr[1] = frame->version;
+	ptr[2] = MHL3_INFOFRAME_SIZE;
+	ptr[4] = MHL3_IEEE_OUI & 0xff;
+	ptr[5] = (MHL3_IEEE_OUI >> 8) & 0xff;
+	ptr[6] = (MHL3_IEEE_OUI >> 16) & 0xff;
+	ptr[7] = frame->video_format & 0x3;
+	ptr[7] |= (frame->format_type & 0x7) << 2;
+	ptr[7] |= frame->sep_audio ? BIT(5) : 0;
+	if (frame->hev_format >= 0) {
+		ptr[9] = 1;
+		ptr[10] = (frame->hev_format >> 8) & 0xff;
+		ptr[11] = frame->hev_format & 0xff;
+	}
+	if (frame->av_delay) {
+		bool sign = frame->av_delay < 0;
+		int delay = sign ? -frame->av_delay : frame->av_delay;
+
+		ptr[12] = (delay >> 16) & 0xf;
+		if (sign)
+			ptr[12] |= BIT(4);
+		ptr[13] = (delay >> 8) & 0xff;
+		ptr[14] = delay & 0xff;
+	}
+	ptr[3] -= sii8620_checksum(buffer, frm_len);
+	return frm_len;
+}
+
+static void sii8620_set_infoframes(struct sii8620 *ctx)
+{
+	struct mhl3_infoframe mhl_frm;
+	union hdmi_infoframe frm;
+	u8 buf[31];
+	int ret;
+
+	if (!sii8620_is_mhl3(ctx) || !ctx->use_packed_pixel) {
+		sii8620_write(ctx, REG_TPI_SC,
+			BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI);
+		sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, ctx->avif + 3,
+			ARRAY_SIZE(ctx->avif) - 3);
+		sii8620_write(ctx, REG_PKT_FILTER_0,
+			BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT |
+			BIT_PKT_FILTER_0_DROP_MPEG_PKT |
+			BIT_PKT_FILTER_0_DROP_GCP_PKT,
+			BIT_PKT_FILTER_1_DROP_GEN_PKT);
+		return;
+	}
+
+	ret = hdmi_avi_infoframe_init(&frm.avi);
+	frm.avi.colorspace = HDMI_COLORSPACE_YUV422;
+	frm.avi.active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
+	frm.avi.picture_aspect = HDMI_PICTURE_ASPECT_16_9;
+	frm.avi.colorimetry = HDMI_COLORIMETRY_ITU_709;
+	frm.avi.video_code = ctx->video_code;
+	if (!ret)
+		ret = hdmi_avi_infoframe_pack(&frm.avi, buf, ARRAY_SIZE(buf));
+	if (ret > 0)
+		sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, buf + 3, ret - 3);
+	sii8620_write(ctx, REG_PKT_FILTER_0,
+		BIT_PKT_FILTER_0_DROP_CEA_GAMUT_PKT |
+		BIT_PKT_FILTER_0_DROP_MPEG_PKT |
+		BIT_PKT_FILTER_0_DROP_AVI_PKT |
+		BIT_PKT_FILTER_0_DROP_GCP_PKT,
+		BIT_PKT_FILTER_1_VSI_OVERRIDE_DIS |
+		BIT_PKT_FILTER_1_DROP_GEN_PKT |
+		BIT_PKT_FILTER_1_DROP_VSIF_PKT);
+
+	sii8620_write(ctx, REG_TPI_INFO_FSEL, BIT_TPI_INFO_FSEL_EN
+		| BIT_TPI_INFO_FSEL_RPT | VAL_TPI_INFO_FSEL_VSI);
+	ret = mhl3_infoframe_init(&mhl_frm);
+	if (!ret)
+		ret = mhl3_infoframe_pack(&mhl_frm, buf, ARRAY_SIZE(buf));
+	sii8620_write_buf(ctx, REG_TPI_INFO_B0, buf, ret);
+}
+
 static void sii8620_start_hdmi(struct sii8620 *ctx)
 {
 	sii8620_write_seq_static(ctx,
 		REG_RX_HDMI_CTRL2, VAL_RX_HDMI_CTRL2_DEFVAL
 			| BIT_RX_HDMI_CTRL2_USE_AV_MUTE,
 		REG_VID_OVRRD, BIT_VID_OVRRD_PP_AUTO_DISABLE
-			| BIT_VID_OVRRD_M1080P_OVRRD,
-		REG_VID_MODE, 0,
-		REG_MHL_TOP_CTL, 0x1,
-		REG_MHLTX_CTL6, 0xa0,
-		REG_TPI_INPUT, VAL_TPI_FORMAT(RGB, FULL),
-		REG_TPI_OUTPUT, VAL_TPI_FORMAT(RGB, FULL),
-	);
-
-	sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
-			      MHL_DST_LM_CLK_MODE_NORMAL |
-			      MHL_DST_LM_PATH_ENABLED);
+			| BIT_VID_OVRRD_M1080P_OVRRD);
+	sii8620_set_format(ctx);
 
-	sii8620_set_auto_zone(ctx);
+	if (!sii8620_is_mhl3(ctx)) {
+		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
+			MHL_DST_LM_CLK_MODE_NORMAL | MHL_DST_LM_PATH_ENABLED);
+		sii8620_set_auto_zone(ctx);
+	} else {
+		static const struct {
+			int max_clk;
+			u8 zone;
+			u8 link_rate;
+			u8 rrp_decode;
+		} clk_spec[] = {
+			{ 150000, VAL_TX_ZONE_CTL3_TX_ZONE_1_5GBPS,
+			  MHL_XDS_LINK_RATE_1_5_GBPS, 0x38 },
+			{ 300000, VAL_TX_ZONE_CTL3_TX_ZONE_3GBPS,
+			  MHL_XDS_LINK_RATE_3_0_GBPS, 0x40 },
+			{ 600000, VAL_TX_ZONE_CTL3_TX_ZONE_6GBPS,
+			  MHL_XDS_LINK_RATE_6_0_GBPS, 0x40 },
+		};
+		u8 p0_ctrl = BIT_M3_P0CTRL_MHL3_P0_PORT_EN;
+		int clk = ctx->pixel_clock * (ctx->use_packed_pixel ? 2 : 3);
+		int i;
+
+		for (i = 0; i < ARRAY_SIZE(clk_spec); ++i)
+			if (clk < clk_spec[i].max_clk)
+				break;
 
-	sii8620_write(ctx, REG_TPI_SC, BIT_TPI_SC_TPI_OUTPUT_MODE_0_HDMI);
+		if (100 * clk >= 98 * clk_spec[i].max_clk)
+			p0_ctrl |= BIT_M3_P0CTRL_MHL3_P0_UNLIMIT_EN;
 
-	sii8620_write_buf(ctx, REG_TPI_AVI_CHSUM, ctx->avif,
-			  ARRAY_SIZE(ctx->avif));
+		sii8620_burst_tx_bits_per_pixel_fmt(ctx, ctx->use_packed_pixel);
+		sii8620_burst_send(ctx);
+		sii8620_write_seq(ctx,
+			REG_MHL_DP_CTL0, 0xf0,
+			REG_MHL3_TX_ZONE_CTL, clk_spec[i].zone);
+		sii8620_setbits(ctx, REG_M3_P0CTRL,
+			BIT_M3_P0CTRL_MHL3_P0_PORT_EN
+			| BIT_M3_P0CTRL_MHL3_P0_UNLIMIT_EN, p0_ctrl);
+		sii8620_setbits(ctx, REG_M3_POSTM, MSK_M3_POSTM_RRP_DECODE,
+			clk_spec[i].rrp_decode);
+		sii8620_write_seq_static(ctx,
+			REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE
+				| BIT_M3_CTRL_H2M_SWRST,
+			REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE
+		);
+		sii8620_mt_write_stat(ctx, MHL_XDS_REG(AVLINK_MODE_CONTROL),
+			clk_spec[i].link_rate);
+	}
 
-	sii8620_write(ctx, REG_PKT_FILTER_0, 0xa1, 0x2);
+	sii8620_set_infoframes(ctx);
 }
 
 static void sii8620_start_video(struct sii8620 *ctx)
 {
-	if (ctx->mode < CM_MHL3)
+	if (!sii8620_is_mhl3(ctx))
 		sii8620_stop_video(ctx);
 
 	switch (ctx->sink_type) {
@@ -757,44 +1251,6 @@ static void sii8620_enable_hpd(struct sii8620 *ctx)
 	);
 }
 
-static void sii8620_enable_gen2_write_burst(struct sii8620 *ctx)
-{
-	if (ctx->gen2_write_burst)
-		return;
-
-	sii8620_write_seq_static(ctx,
-		REG_MDT_RCV_TIMEOUT, 100,
-		REG_MDT_RCV_CTRL, BIT_MDT_RCV_CTRL_MDT_RCV_EN
-	);
-	ctx->gen2_write_burst = 1;
-}
-
-static void sii8620_disable_gen2_write_burst(struct sii8620 *ctx)
-{
-	if (!ctx->gen2_write_burst)
-		return;
-
-	sii8620_write_seq_static(ctx,
-		REG_MDT_XMIT_CTRL, 0,
-		REG_MDT_RCV_CTRL, 0
-	);
-	ctx->gen2_write_burst = 0;
-}
-
-static void sii8620_start_gen2_write_burst(struct sii8620 *ctx)
-{
-	sii8620_write_seq_static(ctx,
-		REG_MDT_INT_1_MASK, BIT_MDT_RCV_TIMEOUT
-			| BIT_MDT_RCV_SM_ABORT_PKT_RCVD | BIT_MDT_RCV_SM_ERROR
-			| BIT_MDT_XMIT_TIMEOUT | BIT_MDT_XMIT_SM_ABORT_PKT_RCVD
-			| BIT_MDT_XMIT_SM_ERROR,
-		REG_MDT_INT_0_MASK, BIT_MDT_XFIFO_EMPTY
-			| BIT_MDT_IDLE_AFTER_HAWB_DISABLE
-			| BIT_MDT_RFIFO_DATA_RDY
-	);
-	sii8620_enable_gen2_write_burst(ctx);
-}
-
 static void sii8620_mhl_discover(struct sii8620 *ctx)
 {
 	sii8620_write_seq_static(ctx,
@@ -838,7 +1294,7 @@ static void sii8620_mhl_discover(struct sii8620 *ctx)
 
 static void sii8620_peer_specific_init(struct sii8620 *ctx)
 {
-	if (ctx->mode == CM_MHL3)
+	if (sii8620_is_mhl3(ctx))
 		sii8620_write_seq_static(ctx,
 			REG_SYS_CTRL1, BIT_SYS_CTRL1_BLOCK_DDC_BY_HPD,
 			REG_EMSCINTRMASK1,
@@ -948,21 +1404,51 @@ static void sii8620_mhl_init(struct sii8620 *ctx)
 	);
 	sii8620_disable_gen2_write_burst(ctx);
 
-	/* currently MHL3 is not supported, so we force version to 0 */
-	sii8620_mt_write_stat(ctx, MHL_DST_REG(VERSION), 0);
+	sii8620_mt_write_stat(ctx, MHL_DST_REG(VERSION), SII8620_MHL_VERSION);
 	sii8620_mt_write_stat(ctx, MHL_DST_REG(CONNECTED_RDY),
 			      MHL_DST_CONN_DCAP_RDY | MHL_DST_CONN_XDEVCAPP_SUPP
 			      | MHL_DST_CONN_POW_STAT);
 	sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE), MHL_INT_RC_DCAP_CHG);
 }
 
+static void sii8620_emsc_enable(struct sii8620 *ctx)
+{
+	u8 reg;
+
+	sii8620_setbits(ctx, REG_GENCTL, BIT_GENCTL_EMSC_EN
+					 | BIT_GENCTL_CLR_EMSC_RFIFO
+					 | BIT_GENCTL_CLR_EMSC_XFIFO, ~0);
+	sii8620_setbits(ctx, REG_GENCTL, BIT_GENCTL_CLR_EMSC_RFIFO
+					 | BIT_GENCTL_CLR_EMSC_XFIFO, 0);
+	sii8620_setbits(ctx, REG_COMMECNT, BIT_COMMECNT_I2C_TO_EMSC_EN, ~0);
+	reg = sii8620_readb(ctx, REG_EMSCINTR);
+	sii8620_write(ctx, REG_EMSCINTR, reg);
+	sii8620_write(ctx, REG_EMSCINTRMASK, BIT_EMSCINTR_SPI_DVLD);
+}
+
+static int sii8620_wait_for_fsm_state(struct sii8620 *ctx, u8 state)
+{
+	int i;
+
+	for (i = 0; i < 10; ++i) {
+		u8 s = sii8620_readb(ctx, REG_COC_STAT_0);
+
+		if ((s & MSK_COC_STAT_0_FSM_STATE) == state)
+			return 0;
+		if (!(s & BIT_COC_STAT_0_PLL_LOCKED))
+			return -EBUSY;
+		usleep_range(4000, 6000);
+	}
+	return -ETIMEDOUT;
+}
+
 static void sii8620_set_mode(struct sii8620 *ctx, enum sii8620_mode mode)
 {
+	int ret;
+
 	if (ctx->mode == mode)
 		return;
 
-	ctx->mode = mode;
-
 	switch (mode) {
 	case CM_MHL1:
 		sii8620_write_seq_static(ctx,
@@ -972,15 +1458,46 @@ static void sii8620_set_mode(struct sii8620 *ctx, enum sii8620_mode mode)
 				| BIT_DPD_OSC_EN,
 			REG_COC_INTR_MASK, 0
 		);
+		ctx->mode = mode;
 		break;
 	case CM_MHL3:
+		sii8620_write(ctx, REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE);
+		ctx->mode = mode;
+		return;
+	case CM_ECBUS_S:
+		sii8620_emsc_enable(ctx);
 		sii8620_write_seq_static(ctx,
-			REG_M3_CTRL, VAL_M3_CTRL_MHL3_VALUE,
-			REG_COC_CTL0, 0x40,
-			REG_MHL_COC_CTL1, 0x07
+			REG_TTXSPINUMS, 4,
+			REG_TRXSPINUMS, 4,
+			REG_TTXHSICNUMS, 0x14,
+			REG_TRXHSICNUMS, 0x14,
+			REG_TTXTOTNUMS, 0x18,
+			REG_TRXTOTNUMS, 0x18,
+			REG_PWD_SRST, BIT_PWD_SRST_COC_DOC_RST
+				      | BIT_PWD_SRST_CBUS_RST_SW_EN,
+			REG_MHL_COC_CTL1, 0xbd,
+			REG_PWD_SRST, BIT_PWD_SRST_CBUS_RST_SW_EN,
+			REG_COC_CTLB, 0x01,
+			REG_COC_CTL0, 0x5c,
+			REG_COC_CTL14, 0x03,
+			REG_COC_CTL15, 0x80,
+			REG_MHL_DP_CTL6, BIT_MHL_DP_CTL6_DP_TAP1_SGN
+					 | BIT_MHL_DP_CTL6_DP_TAP1_EN
+					 | BIT_MHL_DP_CTL6_DT_PREDRV_FEEDCAP_EN,
+			REG_MHL_DP_CTL8, 0x03
 		);
-		break;
+		ret = sii8620_wait_for_fsm_state(ctx, 0x03);
+		sii8620_write_seq_static(ctx,
+			REG_COC_CTL14, 0x00,
+			REG_COC_CTL15, 0x80
+		);
+		if (!ret)
+			sii8620_write(ctx, REG_CBUS3_CNVT, 0x85);
+		else
+			sii8620_disconnect(ctx);
+		return;
 	case CM_DISCONNECTED:
+		ctx->mode = mode;
 		break;
 	default:
 		dev_err(ctx->dev, "%s mode %d not supported\n", __func__, mode);
@@ -1007,10 +1524,12 @@ static void sii8620_disconnect(struct sii8620 *ctx)
 {
 	sii8620_disable_gen2_write_burst(ctx);
 	sii8620_stop_video(ctx);
-	msleep(50);
+	msleep(100);
 	sii8620_cbus_reset(ctx);
 	sii8620_set_mode(ctx, CM_DISCONNECTED);
 	sii8620_write_seq_static(ctx,
+		REG_TX_ZONE_CTL1, 0,
+		REG_MHL_PLL_CTL0, 0x07,
 		REG_COC_CTL0, 0x40,
 		REG_CBUS3_CNVT, 0x84,
 		REG_COC_CTL14, 0x00,
@@ -1123,24 +1642,45 @@ static void sii8620_irq_disc(struct sii8620 *ctx)
 	sii8620_write(ctx, REG_CBUS_DISC_INTR0, stat);
 }
 
+static void sii8620_read_burst(struct sii8620 *ctx)
+{
+	u8 buf[17];
+
+	sii8620_read_buf(ctx, REG_MDT_RCV_READ_PORT, buf, ARRAY_SIZE(buf));
+	sii8620_write(ctx, REG_MDT_RCV_CTRL, BIT_MDT_RCV_CTRL_MDT_RCV_EN |
+		      BIT_MDT_RCV_CTRL_MDT_DELAY_RCV_EN |
+		      BIT_MDT_RCV_CTRL_MDT_RFIFO_CLR_CUR);
+	sii8620_readb(ctx, REG_MDT_RFIFO_STAT);
+}
+
 static void sii8620_irq_g2wb(struct sii8620 *ctx)
 {
 	u8 stat = sii8620_readb(ctx, REG_MDT_INT_0);
 
 	if (stat & BIT_MDT_IDLE_AFTER_HAWB_DISABLE)
-		dev_dbg(ctx->dev, "HAWB idle\n");
+		if (sii8620_is_mhl3(ctx))
+			sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE),
+				MHL_INT_RC_FEAT_COMPLETE);
+
+	if (stat & BIT_MDT_RFIFO_DATA_RDY)
+		sii8620_read_burst(ctx);
+
+	if (stat & BIT_MDT_XFIFO_EMPTY)
+		sii8620_write(ctx, REG_MDT_XMIT_CTRL, 0);
 
 	sii8620_write(ctx, REG_MDT_INT_0, stat);
 }
 
-static void sii8620_status_changed_dcap(struct sii8620 *ctx)
+static void sii8620_status_dcap_ready(struct sii8620 *ctx)
 {
-	if (ctx->stat[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY) {
-		sii8620_set_mode(ctx, CM_MHL1);
-		sii8620_peer_specific_init(ctx);
-		sii8620_write(ctx, REG_INTR9_MASK, BIT_INTR9_DEVCAP_DONE
-			       | BIT_INTR9_EDID_DONE | BIT_INTR9_EDID_ERROR);
-	}
+	enum sii8620_mode mode;
+
+	mode = ctx->stat[MHL_DST_VERSION] >= 0x30 ? CM_MHL3 : CM_MHL1;
+	if (mode > ctx->mode)
+		sii8620_set_mode(ctx, mode);
+	sii8620_peer_specific_init(ctx);
+	sii8620_write(ctx, REG_INTR9_MASK, BIT_INTR9_DEVCAP_DONE
+		      | BIT_INTR9_EDID_DONE | BIT_INTR9_EDID_ERROR);
 }
 
 static void sii8620_status_changed_path(struct sii8620 *ctx)
@@ -1149,7 +1689,9 @@ static void sii8620_status_changed_path(struct sii8620 *ctx)
 		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
 				      MHL_DST_LM_CLK_MODE_NORMAL
 				      | MHL_DST_LM_PATH_ENABLED);
-		sii8620_mt_read_devcap(ctx, false);
+		if (!sii8620_is_mhl3(ctx))
+			sii8620_mt_read_devcap(ctx, false);
+		sii8620_mt_set_cont(ctx, sii8620_sink_detected);
 	} else {
 		sii8620_mt_write_stat(ctx, MHL_DST_REG(LINK_MODE),
 				      MHL_DST_LM_CLK_MODE_NORMAL);
@@ -1166,19 +1708,75 @@ static void sii8620_msc_mr_write_stat(struct sii8620 *ctx)
 	sii8620_update_array(ctx->stat, st, MHL_DST_SIZE);
 	sii8620_update_array(ctx->xstat, xst, MHL_XDS_SIZE);
 
-	if (st[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY)
-		sii8620_status_changed_dcap(ctx);
+	if (ctx->stat[MHL_DST_CONNECTED_RDY] & MHL_DST_CONN_DCAP_RDY)
+		sii8620_status_dcap_ready(ctx);
 
 	if (st[MHL_DST_LINK_MODE] & MHL_DST_LM_PATH_ENABLED)
 		sii8620_status_changed_path(ctx);
 }
 
+static void sii8620_ecbus_up(struct sii8620 *ctx, int ret)
+{
+	if (ret < 0)
+		return;
+
+	sii8620_set_mode(ctx, CM_ECBUS_S);
+}
+
+static void sii8620_got_ecbus_speed(struct sii8620 *ctx, int ret)
+{
+	if (ret < 0)
+		return;
+
+	sii8620_mt_write_stat(ctx, MHL_XDS_REG(CURR_ECBUS_MODE),
+			      MHL_XDS_ECBUS_S | MHL_XDS_SLOT_MODE_8BIT);
+	sii8620_mt_rap(ctx, MHL_RAP_CBUS_MODE_UP);
+	sii8620_mt_set_cont(ctx, sii8620_ecbus_up);
+}
+
+static void sii8620_mhl_burst_emsc_support_set(struct mhl_burst_emsc_support *d,
+	enum mhl_burst_id id)
+{
+	sii8620_mhl_burst_hdr_set(&d->hdr, MHL_BURST_ID_EMSC_SUPPORT);
+	d->num_entries = 1;
+	d->burst_id[0] = cpu_to_be16(id);
+}
+
+static void sii8620_send_features(struct sii8620 *ctx)
+{
+	u8 buf[16];
+
+	sii8620_write(ctx, REG_MDT_XMIT_CTRL, BIT_MDT_XMIT_CTRL_EN
+		| BIT_MDT_XMIT_CTRL_FIXED_BURST_LEN);
+	sii8620_mhl_burst_emsc_support_set((void *)buf,
+		MHL_BURST_ID_HID_PAYLOAD);
+	sii8620_write_buf(ctx, REG_MDT_XMIT_WRITE_PORT, buf, ARRAY_SIZE(buf));
+}
+
 static void sii8620_msc_mr_set_int(struct sii8620 *ctx)
 {
 	u8 ints[MHL_INT_SIZE];
 
 	sii8620_read_buf(ctx, REG_MHL_INT_0, ints, MHL_INT_SIZE);
 	sii8620_write_buf(ctx, REG_MHL_INT_0, ints, MHL_INT_SIZE);
+
+	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_DCAP_CHG) {
+		switch (ctx->mode) {
+		case CM_MHL3:
+			sii8620_mt_read_xdevcap_reg(ctx, MHL_XDC_ECBUS_SPEEDS);
+			sii8620_mt_set_cont(ctx, sii8620_got_ecbus_speed);
+			break;
+		case CM_ECBUS_S:
+			sii8620_mt_read_devcap(ctx, true);
+			break;
+		default:
+			break;
+		}
+	}
+	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_REQ)
+		sii8620_send_features(ctx);
+	if (ints[MHL_INT_RCHANGE] & MHL_INT_RC_FEAT_COMPLETE)
+		sii8620_edid_read(ctx, 0);
 }
 
 static struct sii8620_mt_msg *sii8620_msc_msg_first(struct sii8620 *ctx)
@@ -1261,6 +1859,19 @@ static void sii8620_irq_coc(struct sii8620 *ctx)
 {
 	u8 stat = sii8620_readb(ctx, REG_COC_INTR);
 
+	if (stat & BIT_COC_CALIBRATION_DONE) {
+		u8 cstat = sii8620_readb(ctx, REG_COC_STAT_0);
+
+		cstat &= BIT_COC_STAT_0_PLL_LOCKED | MSK_COC_STAT_0_FSM_STATE;
+		if (cstat == (BIT_COC_STAT_0_PLL_LOCKED | 0x02)) {
+			sii8620_write_seq_static(ctx,
+				REG_COC_CTLB, 0,
+				REG_TRXINTMH, BIT_TDM_INTR_SYNC_DATA
+					      | BIT_TDM_INTR_SYNC_WAIT
+			);
+		}
+	}
+
 	sii8620_write(ctx, REG_COC_INTR, stat);
 }
 
@@ -1289,17 +1900,6 @@ static void sii8620_scdt_high(struct sii8620 *ctx)
 	);
 }
 
-static void sii8620_scdt_low(struct sii8620 *ctx)
-{
-	sii8620_write(ctx, REG_TMDS_CSTAT_P3,
-		      BIT_TMDS_CSTAT_P3_SCDT_CLR_AVI_DIS |
-		      BIT_TMDS_CSTAT_P3_CLR_AVI);
-
-	sii8620_stop_video(ctx);
-
-	sii8620_write(ctx, REG_INTR8_MASK, 0);
-}
-
 static void sii8620_irq_scdt(struct sii8620 *ctx)
 {
 	u8 stat = sii8620_readb(ctx, REG_INTR5);
@@ -1309,8 +1909,6 @@ static void sii8620_irq_scdt(struct sii8620 *ctx)
 
 		if (cstat & BIT_TMDS_CSTAT_P3_SCDT)
 			sii8620_scdt_high(ctx);
-		else
-			sii8620_scdt_low(ctx);
 	}
 
 	sii8620_write(ctx, REG_INTR5, stat);
@@ -1351,6 +1949,65 @@ static void sii8620_irq_infr(struct sii8620 *ctx)
 		sii8620_start_video(ctx);
 }
 
+static void sii8620_got_xdevcap(struct sii8620 *ctx, int ret)
+{
+	if (ret < 0)
+		return;
+
+	sii8620_mt_read_devcap(ctx, false);
+}
+
+static void sii8620_irq_tdm(struct sii8620 *ctx)
+{
+	u8 stat = sii8620_readb(ctx, REG_TRXINTH);
+	u8 tdm = sii8620_readb(ctx, REG_TRXSTA2);
+
+	if ((tdm & MSK_TDM_SYNCHRONIZED) == VAL_TDM_SYNCHRONIZED) {
+		ctx->mode = CM_ECBUS_S;
+		ctx->burst.rx_ack = 0;
+		ctx->burst.r_size = SII8620_BURST_BUF_LEN;
+		sii8620_burst_tx_rbuf_info(ctx, SII8620_BURST_BUF_LEN);
+		sii8620_mt_read_devcap(ctx, true);
+		sii8620_mt_set_cont(ctx, sii8620_got_xdevcap);
+	} else {
+		sii8620_write_seq_static(ctx,
+			REG_MHL_PLL_CTL2, 0,
+			REG_MHL_PLL_CTL2, BIT_MHL_PLL_CTL2_CLKDETECT_EN
+		);
+	}
+
+	sii8620_write(ctx, REG_TRXINTH, stat);
+}
+
+static void sii8620_irq_block(struct sii8620 *ctx)
+{
+	u8 stat = sii8620_readb(ctx, REG_EMSCINTR);
+
+	if (stat & BIT_EMSCINTR_SPI_DVLD) {
+		u8 bstat = sii8620_readb(ctx, REG_SPIBURSTSTAT);
+
+		if (bstat & BIT_SPIBURSTSTAT_EMSC_NORMAL_MODE)
+			sii8620_burst_receive(ctx);
+	}
+
+	sii8620_write(ctx, REG_EMSCINTR, stat);
+}
+
+static void sii8620_irq_ddc(struct sii8620 *ctx)
+{
+	u8 stat = sii8620_readb(ctx, REG_INTR3);
+
+	if (stat & BIT_DDC_CMD_DONE) {
+		sii8620_write(ctx, REG_INTR3_MASK, 0);
+		if (sii8620_is_mhl3(ctx))
+			sii8620_mt_set_int(ctx, MHL_INT_REG(RCHANGE),
+					   MHL_INT_RC_FEAT_REQ);
+		else
+			sii8620_edid_read(ctx, 0);
+	}
+	sii8620_write(ctx, REG_INTR3, stat);
+}
+
 /* endian agnostic, non-volatile version of test_bit */
 static bool sii8620_test_bit(unsigned int nr, const u8 *addr)
 {
@@ -1366,9 +2023,12 @@ static irqreturn_t sii8620_irq_thread(int irq, void *data)
 		{ BIT_FAST_INTR_STAT_DISC, sii8620_irq_disc },
 		{ BIT_FAST_INTR_STAT_G2WB, sii8620_irq_g2wb },
 		{ BIT_FAST_INTR_STAT_COC, sii8620_irq_coc },
+		{ BIT_FAST_INTR_STAT_TDM, sii8620_irq_tdm },
 		{ BIT_FAST_INTR_STAT_MSC, sii8620_irq_msc },
 		{ BIT_FAST_INTR_STAT_MERR, sii8620_irq_merr },
+		{ BIT_FAST_INTR_STAT_BLOCK, sii8620_irq_block },
 		{ BIT_FAST_INTR_STAT_EDID, sii8620_irq_edid },
+		{ BIT_FAST_INTR_STAT_DDC, sii8620_irq_ddc },
 		{ BIT_FAST_INTR_STAT_SCDT, sii8620_irq_scdt },
 		{ BIT_FAST_INTR_STAT_INFR, sii8620_irq_infr },
 	};
@@ -1383,7 +2043,9 @@ static irqreturn_t sii8620_irq_thread(int irq, void *data)
 		if (sii8620_test_bit(irq_vec[i].bit, stats))
 			irq_vec[i].handler(ctx);
 
+	sii8620_burst_rx_all(ctx);
 	sii8620_mt_work(ctx);
+	sii8620_burst_send(ctx);
 
 	ret = sii8620_clear_error(ctx);
 	if (ret) {
@@ -1450,22 +2112,41 @@ static bool sii8620_mode_fixup(struct drm_bridge *bridge,
 			       struct drm_display_mode *adjusted_mode)
 {
 	struct sii8620 *ctx = bridge_to_sii8620(bridge);
-	bool ret = false;
-	int max_clock = 74250;
+	int max_lclk;
+	bool ret = true;
 
 	mutex_lock(&ctx->lock);
 
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-		goto out;
-
-	if (ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL)
-		max_clock = 300000;
-
-	ret = mode->clock <= max_clock;
-
-out:
+	max_lclk = sii8620_is_mhl3(ctx) ? MHL3_MAX_LCLK : MHL1_MAX_LCLK;
+	if (max_lclk > 3 * adjusted_mode->clock) {
+		ctx->use_packed_pixel = 0;
+		goto end;
+	}
+	if ((ctx->devcap[MHL_DCAP_VID_LINK_MODE] & MHL_DCAP_VID_LINK_PPIXEL) &&
+	    max_lclk > 2 * adjusted_mode->clock) {
+		ctx->use_packed_pixel = 1;
+		goto end;
+	}
+	ret = false;
+end:
+	if (ret) {
+		u8 vic = drm_match_cea_mode(adjusted_mode);
+
+		if (!vic) {
+			union hdmi_infoframe frm;
+			u8 mhl_vic[] = { 0, 95, 94, 93, 98 };
+
+			drm_hdmi_vendor_infoframe_from_display_mode(
+				&frm.vendor.hdmi, adjusted_mode);
+			vic = frm.vendor.hdmi.vic;
+			if (vic >= ARRAY_SIZE(mhl_vic))
+				vic = 0;
+			vic = mhl_vic[vic];
+		}
+		ctx->video_code = vic;
+		ctx->pixel_clock = adjusted_mode->clock;
+	}
 	mutex_unlock(&ctx->lock);
-
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/bridge/sil-sii8620.h b/drivers/gpu/drm/bridge/sil-sii8620.h
index 6ff616a4f6ce..51ab540cf092 100644
--- a/drivers/gpu/drm/bridge/sil-sii8620.h
+++ b/drivers/gpu/drm/bridge/sil-sii8620.h
@@ -353,7 +353,7 @@
 #define REG_TTXNUMB				0x0116
 #define MSK_TTXNUMB_TTX_AFFCTRL_3_0		0xf0
 #define BIT_TTXNUMB_TTX_COM1_AT_SYNC_WAIT	BIT(3)
-#define MSK_TTXNUMB_TTX_NUMBPS_2_0		0x07
+#define MSK_TTXNUMB_TTX_NUMBPS			0x07
 
 /* TDM TX NUMSPISYM, default value: 0x04 */
 #define REG_TTXSPINUMS				0x0117
@@ -403,12 +403,16 @@
 
 /* TDM RX Status 2nd, default value: 0x00 */
 #define REG_TRXSTA2				0x015c
+#define MSK_TDM_SYNCHRONIZED			0xc0
+#define VAL_TDM_SYNCHRONIZED			0x80
 
 /* TDM RX INT Low, default value: 0x00 */
 #define REG_TRXINTL				0x0163
 
 /* TDM RX INT High, default value: 0x00 */
 #define REG_TRXINTH				0x0164
+#define BIT_TDM_INTR_SYNC_DATA			BIT(0)
+#define BIT_TDM_INTR_SYNC_WAIT			BIT(1)
 
 /* TDM RX INTMASK High, default value: 0x00 */
 #define REG_TRXINTMH				0x0166
@@ -429,12 +433,14 @@
 
 /* HSIC Keeper, default value: 0x00 */
 #define REG_KEEPER				0x0181
-#define MSK_KEEPER_KEEPER_MODE_1_0		0x03
+#define MSK_KEEPER_MODE				0x03
+#define VAL_KEEPER_MODE_HOST			0
+#define VAL_KEEPER_MODE_DEVICE			2
 
 /* HSIC Flow Control General, default value: 0x02 */
 #define REG_FCGC				0x0183
-#define BIT_FCGC_HSIC_FC_HOSTMODE		BIT(1)
-#define BIT_FCGC_HSIC_FC_ENABLE			BIT(0)
+#define BIT_FCGC_HSIC_HOSTMODE			BIT(1)
+#define BIT_FCGC_HSIC_ENABLE			BIT(0)
 
 /* HSIC Flow Control CTR13, default value: 0xfc */
 #define REG_FCCTR13				0x0191
@@ -841,6 +847,8 @@
 #define MSK_MHL_DP_CTL7_DT_DRV_VBIAS_CASCTL	0xf0
 #define MSK_MHL_DP_CTL7_DT_DRV_IREF_CTL		0x0f
 
+#define REG_MHL_DP_CTL8				0x0352
+
 /* Tx Zone Ctl1, default value: 0x00 */
 #define REG_TX_ZONE_CTL1			0x0361
 #define VAL_TX_ZONE_CTL1_TX_ZONE_CTRL_MODE	0x08
@@ -1078,16 +1086,26 @@
 
 /* TPI Info Frame Select, default value: 0x00 */
 #define REG_TPI_INFO_FSEL			0x06bf
-#define BIT_TPI_INFO_FSEL_TPI_INFO_EN		BIT(7)
-#define BIT_TPI_INFO_FSEL_TPI_INFO_RPT		BIT(6)
-#define BIT_TPI_INFO_FSEL_TPI_INFO_READ_FLAG	BIT(5)
-#define MSK_TPI_INFO_FSEL_TPI_INFO_SEL		0x07
+#define BIT_TPI_INFO_FSEL_EN			BIT(7)
+#define BIT_TPI_INFO_FSEL_RPT			BIT(6)
+#define BIT_TPI_INFO_FSEL_READ_FLAG		BIT(5)
+#define MSK_TPI_INFO_FSEL_PKT			0x07
+#define VAL_TPI_INFO_FSEL_AVI			0x00
+#define VAL_TPI_INFO_FSEL_SPD			0x01
+#define VAL_TPI_INFO_FSEL_AUD			0x02
+#define VAL_TPI_INFO_FSEL_MPG			0x03
+#define VAL_TPI_INFO_FSEL_GEN			0x04
+#define VAL_TPI_INFO_FSEL_GEN2			0x05
+#define VAL_TPI_INFO_FSEL_VSI			0x06
 
 /* TPI Info Byte #0, default value: 0x00 */
 #define REG_TPI_INFO_B0				0x06c0
 
 /* CoC Status, default value: 0x00 */
 #define REG_COC_STAT_0				0x0700
+#define BIT_COC_STAT_0_PLL_LOCKED		BIT(7)
+#define MSK_COC_STAT_0_FSM_STATE		0x0f
+
 #define REG_COC_STAT_1				0x0701
 #define REG_COC_STAT_2				0x0702
 #define REG_COC_STAT_3				0x0703
@@ -1282,14 +1300,14 @@
 
 /* MDT Transmit Control, default value: 0x70 */
 #define REG_MDT_XMIT_CTRL			0x0588
-#define BIT_MDT_XMIT_CTRL_MDT_XMIT_EN		BIT(7)
-#define BIT_MDT_XMIT_CTRL_MDT_XMIT_CMD_MERGE_EN	BIT(6)
-#define BIT_MDT_XMIT_CTRL_MDT_XMIT_FIXED_BURST_LEN BIT(5)
-#define BIT_MDT_XMIT_CTRL_MDT_XMIT_FIXED_AID	BIT(4)
-#define BIT_MDT_XMIT_CTRL_MDT_XMIT_SINGLE_RUN_EN BIT(3)
-#define BIT_MDT_XMIT_CTRL_MDT_CLR_ABORT_WAIT	BIT(2)
-#define BIT_MDT_XMIT_CTRL_MDT_XFIFO_CLR_ALL	BIT(1)
-#define BIT_MDT_XMIT_CTRL_MDT_XFIFO_CLR_CUR	BIT(0)
+#define BIT_MDT_XMIT_CTRL_EN			BIT(7)
+#define BIT_MDT_XMIT_CTRL_CMD_MERGE_EN		BIT(6)
+#define BIT_MDT_XMIT_CTRL_FIXED_BURST_LEN	BIT(5)
+#define BIT_MDT_XMIT_CTRL_FIXED_AID		BIT(4)
+#define BIT_MDT_XMIT_CTRL_SINGLE_RUN_EN		BIT(3)
+#define BIT_MDT_XMIT_CTRL_CLR_ABORT_WAIT	BIT(2)
+#define BIT_MDT_XMIT_CTRL_XFIFO_CLR_ALL		BIT(1)
+#define BIT_MDT_XMIT_CTRL_XFIFO_CLR_CUR		BIT(0)
 
 /* MDT Receive WRITE Port, default value: 0x00 */
 #define REG_MDT_XMIT_WRITE_PORT			0x0589
diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
index 79a5cd108245..4cc679278182 100644
--- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c
+++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
@@ -289,7 +289,7 @@ int cirrus_fbdev_init(struct cirrus_device *cdev)
 			      &cirrus_fb_helper_funcs);
 
 	ret = drm_fb_helper_init(cdev->dev, &gfbdev->helper,
-				 cdev->num_crtc, CIRRUSFB_CONN_LIMIT);
+				 CIRRUSFB_CONN_LIMIT);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index e5b738660d66..a5673107db26 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -286,15 +286,15 @@ drm_atomic_get_crtc_state(struct drm_atomic_state *state,
 EXPORT_SYMBOL(drm_atomic_get_crtc_state);
 
 static void set_out_fence_for_crtc(struct drm_atomic_state *state,
-				   struct drm_crtc *crtc, s64 __user *fence_ptr)
+				   struct drm_crtc *crtc, s32 __user *fence_ptr)
 {
 	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = fence_ptr;
 }
 
-static s64 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
+static s32 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
 					  struct drm_crtc *crtc)
 {
-	s64 __user *fence_ptr;
+	s32 __user *fence_ptr;
 
 	fence_ptr = state->crtcs[drm_crtc_index(crtc)].out_fence_ptr;
 	state->crtcs[drm_crtc_index(crtc)].out_fence_ptr = NULL;
@@ -307,9 +307,8 @@ static s64 __user *get_out_fence_for_crtc(struct drm_atomic_state *state,
  * @state: the CRTC whose incoming state to update
  * @mode: kernel-internal mode to use for the CRTC, or NULL to disable
  *
- * Set a mode (originating from the kernel) on the desired CRTC state. Does
- * not change any other state properties, including enable, active, or
- * mode_changed.
+ * Set a mode (originating from the kernel) on the desired CRTC state and update
+ * the enable property.
  *
  * RETURNS:
  * Zero on success, error code on failure. Cannot return -EDEADLK.
@@ -506,7 +505,7 @@ int drm_atomic_crtc_set_property(struct drm_crtc *crtc,
 		state->color_mgmt_changed |= replaced;
 		return ret;
 	} else if (property == config->prop_out_fence_ptr) {
-		s64 __user *fence_ptr = u64_to_user_ptr(val);
+		s32 __user *fence_ptr = u64_to_user_ptr(val);
 
 		if (!fence_ptr)
 			return 0;
@@ -1903,7 +1902,7 @@ EXPORT_SYMBOL(drm_atomic_clean_old_fb);
  */
 
 struct drm_out_fence_state {
-	s64 __user *out_fence_ptr;
+	s32 __user *out_fence_ptr;
 	struct sync_file *sync_file;
 	int fd;
 };
@@ -1940,7 +1939,7 @@ static int prepare_crtc_signaling(struct drm_device *dev,
 		return 0;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
-		u64 __user *fence_ptr;
+		s32 __user *fence_ptr;
 
 		fence_ptr = get_out_fence_for_crtc(crtc_state->state, crtc);
 
@@ -2020,13 +2019,16 @@ static void complete_crtc_signaling(struct drm_device *dev,
 	}
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct drm_pending_vblank_event *event = crtc_state->event;
 		/*
-		 * TEST_ONLY and PAGE_FLIP_EVENT are mutually
-		 * exclusive, if they weren't, this code should be
-		 * called on success for TEST_ONLY too.
+		 * Free the allocated event. drm_atomic_helper_setup_commit
+		 * can allocate an event too, so only free it if it's ours
+		 * to prevent a double free in drm_atomic_state_clear.
 		 */
-		if (crtc_state->event)
-			drm_event_cancel_free(dev, &crtc_state->event->base);
+		if (event && (event->base.fence || event->base.file_priv)) {
+			drm_event_cancel_free(dev, &event->base);
+			crtc_state->event = NULL;
+		}
 	}
 
 	if (!fence_state)
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index 9a08445a7a7a..01d936b7be43 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -369,7 +369,7 @@ mode_fixup(struct drm_atomic_state *state)
 	struct drm_connector *connector;
 	struct drm_connector_state *conn_state;
 	int i;
-	bool ret;
+	int ret;
 
 	for_each_crtc_in_state(state, crtc, crtc_state, i) {
 		if (!crtc_state->mode_changed &&
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index 789b4c65cd69..cc23b9a505c0 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -88,6 +88,30 @@
  */
 
 /**
+ * drm_color_lut_extract - clamp and round LUT entries
+ * @user_input: input value
+ * @bit_precision: number of bits the hw LUT supports
+ *
+ * Extract a degamma/gamma LUT value provided by user (in the form of
+ * &drm_color_lut entries) and round it to the precision supported by the
+ * hardware.
+ */
+uint32_t drm_color_lut_extract(uint32_t user_input, uint32_t bit_precision)
+{
+	uint32_t val = user_input;
+	uint32_t max = 0xffff >> (16 - bit_precision);
+
+	/* Round only if we're not using full precision. */
+	if (bit_precision < 16) {
+		val += 1UL << (16 - bit_precision - 1);
+		val >>= 16 - bit_precision;
+	}
+
+	return clamp_val(val, 0, max);
+}
+EXPORT_SYMBOL(drm_color_lut_extract);
+
+/**
  * drm_crtc_enable_color_mgmt - enable color management properties
  * @crtc: DRM CRTC
  * @degamma_lut_size: the size of the degamma lut (before CSC)
diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c
index e4d2c8a49076..45464c8b797d 100644
--- a/drivers/gpu/drm/drm_connector.c
+++ b/drivers/gpu/drm/drm_connector.c
@@ -378,6 +378,9 @@ int drm_connector_register(struct drm_connector *connector)
 {
 	int ret = 0;
 
+	if (!connector->dev->registered)
+		return 0;
+
 	mutex_lock(&connector->mutex);
 	if (connector->registered)
 		goto unlock;
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 1bdcfd566695..955c5690bf64 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -207,3 +207,6 @@ int drm_mode_cursor2_ioctl(struct drm_device *dev,
 			   void *data, struct drm_file *file_priv);
 int drm_mode_page_flip_ioctl(struct drm_device *dev,
 			     void *data, struct drm_file *file_priv);
+
+/* drm_edid.c */
+void drm_mode_fixup_1366x768(struct drm_display_mode *mode);
diff --git a/drivers/gpu/drm/drm_dp_mst_topology.c b/drivers/gpu/drm/drm_dp_mst_topology.c
index 122a1b04bebc..f2cc375907d0 100644
--- a/drivers/gpu/drm/drm_dp_mst_topology.c
+++ b/drivers/gpu/drm/drm_dp_mst_topology.c
@@ -1817,7 +1817,7 @@ int drm_dp_update_payload_part1(struct drm_dp_mst_topology_mgr *mgr)
 				mgr->payloads[i].vcpi = req_payload.vcpi;
 			} else if (mgr->payloads[i].num_slots) {
 				mgr->payloads[i].num_slots = 0;
-				drm_dp_destroy_payload_step1(mgr, port, port->vcpi.vcpi, &mgr->payloads[i]);
+				drm_dp_destroy_payload_step1(mgr, port, mgr->payloads[i].vcpi, &mgr->payloads[i]);
 				req_payload.payload_state = mgr->payloads[i].payload_state;
 				mgr->payloads[i].start_slot = 0;
 			}
diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c
index 6cbd67f4fbc5..b5c6bb46a425 100644
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@@ -465,7 +465,10 @@ static void drm_fs_inode_free(struct inode *inode)
  * that do embed &struct drm_device it must be placed first in the overall
  * structure, and the overall structure must be allocated using kmalloc(): The
  * drm core's release function unconditionally calls kfree() on the @dev pointer
- * when the final reference is released.
+ * when the final reference is released. To override this behaviour, and so
+ * allow embedding of the drm_device inside the driver's device struct at an
+ * arbitrary offset, you must supply a &drm_driver.release callback and control
+ * the finalization explicitly.
  *
  * RETURNS:
  * 0 on success, or error code on failure.
@@ -553,6 +556,41 @@ err_free:
 EXPORT_SYMBOL(drm_dev_init);
 
 /**
+ * drm_dev_fini - Finalize a dead DRM device
+ * @dev: DRM device
+ *
+ * Finalize a dead DRM device. This is the converse to drm_dev_init() and
+ * frees up all data allocated by it. All driver private data should be
+ * finalized first. Note that this function does not free the @dev, that is
+ * left to the caller.
+ *
+ * The ref-count of @dev must be zero, and drm_dev_fini() should only be called
+ * from a &drm_driver.release callback.
+ */
+void drm_dev_fini(struct drm_device *dev)
+{
+	drm_vblank_cleanup(dev);
+
+	if (drm_core_check_feature(dev, DRIVER_GEM))
+		drm_gem_destroy(dev);
+
+	drm_legacy_ctxbitmap_cleanup(dev);
+	drm_ht_remove(&dev->map_hash);
+	drm_fs_inode_free(dev->anon_inode);
+
+	drm_minor_free(dev, DRM_MINOR_PRIMARY);
+	drm_minor_free(dev, DRM_MINOR_RENDER);
+	drm_minor_free(dev, DRM_MINOR_CONTROL);
+
+	mutex_destroy(&dev->master_mutex);
+	mutex_destroy(&dev->ctxlist_mutex);
+	mutex_destroy(&dev->filelist_mutex);
+	mutex_destroy(&dev->struct_mutex);
+	kfree(dev->unique);
+}
+EXPORT_SYMBOL(drm_dev_fini);
+
+/**
  * drm_dev_alloc - Allocate new DRM device
  * @driver: DRM driver to allocate device for
  * @parent: Parent device object
@@ -598,25 +636,12 @@ static void drm_dev_release(struct kref *ref)
 {
 	struct drm_device *dev = container_of(ref, struct drm_device, ref);
 
-	drm_vblank_cleanup(dev);
-
-	if (drm_core_check_feature(dev, DRIVER_GEM))
-		drm_gem_destroy(dev);
-
-	drm_legacy_ctxbitmap_cleanup(dev);
-	drm_ht_remove(&dev->map_hash);
-	drm_fs_inode_free(dev->anon_inode);
-
-	drm_minor_free(dev, DRM_MINOR_PRIMARY);
-	drm_minor_free(dev, DRM_MINOR_RENDER);
-	drm_minor_free(dev, DRM_MINOR_CONTROL);
-
-	mutex_destroy(&dev->master_mutex);
-	mutex_destroy(&dev->ctxlist_mutex);
-	mutex_destroy(&dev->filelist_mutex);
-	mutex_destroy(&dev->struct_mutex);
-	kfree(dev->unique);
-	kfree(dev);
+	if (dev->driver->release) {
+		dev->driver->release(dev);
+	} else {
+		drm_dev_fini(dev);
+		kfree(dev);
+	}
 }
 
 /**
@@ -751,6 +776,8 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 	if (ret)
 		goto err_minors;
 
+	dev->registered = true;
+
 	if (dev->driver->load) {
 		ret = dev->driver->load(dev, flags);
 		if (ret)
@@ -798,6 +825,8 @@ void drm_dev_unregister(struct drm_device *dev)
 
 	drm_lastclose(dev);
 
+	dev->registered = false;
+
 	if (drm_core_check_feature(dev, DRIVER_MODESET))
 		drm_modeset_unregister_all(dev);
 
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index baa6ccb3e18b..c8baab9bee0d 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -38,6 +38,8 @@
 #include <drm/drm_encoder.h>
 #include <drm/drm_displayid.h>
 
+#include "drm_crtc_internal.h"
+
 #define version_greater(edid, maj, min) \
 	(((edid)->version > (maj)) || \
 	 ((edid)->version == (maj) && (edid)->revision > (min)))
@@ -2153,7 +2155,7 @@ drm_dmt_modes_for_range(struct drm_connector *connector, struct edid *edid,
 /* fix up 1366x768 mode from 1368x768;
  * GFT/CVT can't express 1366 width which isn't dividable by 8
  */
-static void fixup_mode_1366x768(struct drm_display_mode *mode)
+void drm_mode_fixup_1366x768(struct drm_display_mode *mode)
 {
 	if (mode->hdisplay == 1368 && mode->vdisplay == 768) {
 		mode->hdisplay = 1366;
@@ -2177,7 +2179,7 @@ drm_gtf_modes_for_range(struct drm_connector *connector, struct edid *edid,
 		if (!newmode)
 			return modes;
 
-		fixup_mode_1366x768(newmode);
+		drm_mode_fixup_1366x768(newmode);
 		if (!mode_in_range(newmode, edid, timing) ||
 		    !valid_inferred_mode(connector, newmode)) {
 			drm_mode_destroy(dev, newmode);
@@ -2206,7 +2208,7 @@ drm_cvt_modes_for_range(struct drm_connector *connector, struct edid *edid,
 		if (!newmode)
 			return modes;
 
-		fixup_mode_1366x768(newmode);
+		drm_mode_fixup_1366x768(newmode);
 		if (!mode_in_range(newmode, edid, timing) ||
 		    !valid_inferred_mode(connector, newmode)) {
 			drm_mode_destroy(dev, newmode);
diff --git a/drivers/gpu/drm/drm_fb_cma_helper.c b/drivers/gpu/drm/drm_fb_cma_helper.c
index 0ef8b284a4b8..596fabf18c3e 100644
--- a/drivers/gpu/drm/drm_fb_cma_helper.c
+++ b/drivers/gpu/drm/drm_fb_cma_helper.c
@@ -489,15 +489,14 @@ static const struct drm_fb_helper_funcs drm_fb_cma_helper_funcs = {
  * drm_fbdev_cma_init_with_funcs() - Allocate and initializes a drm_fbdev_cma struct
  * @dev: DRM device
  * @preferred_bpp: Preferred bits per pixel for the device
- * @num_crtc: Number of CRTCs
  * @max_conn_count: Maximum number of connectors
  * @funcs: fb helper functions, in particular a custom dirty() callback
  *
  * Returns a newly allocated drm_fbdev_cma struct or a ERR_PTR.
  */
 struct drm_fbdev_cma *drm_fbdev_cma_init_with_funcs(struct drm_device *dev,
-	unsigned int preferred_bpp, unsigned int num_crtc,
-	unsigned int max_conn_count, const struct drm_framebuffer_funcs *funcs)
+	unsigned int preferred_bpp, unsigned int max_conn_count,
+	const struct drm_framebuffer_funcs *funcs)
 {
 	struct drm_fbdev_cma *fbdev_cma;
 	struct drm_fb_helper *helper;
@@ -514,7 +513,7 @@ struct drm_fbdev_cma *drm_fbdev_cma_init_with_funcs(struct drm_device *dev,
 
 	drm_fb_helper_prepare(dev, helper, &drm_fb_cma_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, helper, num_crtc, max_conn_count);
+	ret = drm_fb_helper_init(dev, helper, max_conn_count);
 	if (ret < 0) {
 		dev_err(dev->dev, "Failed to initialize drm fb helper.\n");
 		goto err_free;
@@ -554,11 +553,11 @@ EXPORT_SYMBOL_GPL(drm_fbdev_cma_init_with_funcs);
  * Returns a newly allocated drm_fbdev_cma struct or a ERR_PTR.
  */
 struct drm_fbdev_cma *drm_fbdev_cma_init(struct drm_device *dev,
-	unsigned int preferred_bpp, unsigned int num_crtc,
-	unsigned int max_conn_count)
+	unsigned int preferred_bpp, unsigned int max_conn_count)
 {
-	return drm_fbdev_cma_init_with_funcs(dev, preferred_bpp, num_crtc,
-				max_conn_count, &drm_fb_cma_funcs);
+	return drm_fbdev_cma_init_with_funcs(dev, preferred_bpp,
+					     max_conn_count,
+					     &drm_fb_cma_funcs);
 }
 EXPORT_SYMBOL_GPL(drm_fbdev_cma_init);
 
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index c7fafa175755..f6d4d9700734 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -712,7 +712,6 @@ EXPORT_SYMBOL(drm_fb_helper_prepare);
  * drm_fb_helper_init - initialize a drm_fb_helper structure
  * @dev: drm device
  * @fb_helper: driver-allocated fbdev helper structure to initialize
- * @crtc_count: maximum number of crtcs to support in this fbdev emulation
  * @max_conn_count: max connector count
  *
  * This allocates the structures for the fbdev helper with the given limits.
@@ -727,9 +726,10 @@ EXPORT_SYMBOL(drm_fb_helper_prepare);
  */
 int drm_fb_helper_init(struct drm_device *dev,
 		       struct drm_fb_helper *fb_helper,
-		       int crtc_count, int max_conn_count)
+		       int max_conn_count)
 {
 	struct drm_crtc *crtc;
+	struct drm_mode_config *config = &dev->mode_config;
 	int i;
 
 	if (!drm_fbdev_emulation)
@@ -738,11 +738,11 @@ int drm_fb_helper_init(struct drm_device *dev,
 	if (!max_conn_count)
 		return -EINVAL;
 
-	fb_helper->crtc_info = kcalloc(crtc_count, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL);
+	fb_helper->crtc_info = kcalloc(config->num_crtc, sizeof(struct drm_fb_helper_crtc), GFP_KERNEL);
 	if (!fb_helper->crtc_info)
 		return -ENOMEM;
 
-	fb_helper->crtc_count = crtc_count;
+	fb_helper->crtc_count = config->num_crtc;
 	fb_helper->connector_info = kcalloc(dev->mode_config.num_connector, sizeof(struct drm_fb_helper_connector *), GFP_KERNEL);
 	if (!fb_helper->connector_info) {
 		kfree(fb_helper->crtc_info);
@@ -751,7 +751,7 @@ int drm_fb_helper_init(struct drm_device *dev,
 	fb_helper->connector_info_alloc_count = dev->mode_config.num_connector;
 	fb_helper->connector_count = 0;
 
-	for (i = 0; i < crtc_count; i++) {
+	for (i = 0; i < fb_helper->crtc_count; i++) {
 		fb_helper->crtc_info[i].mode_set.connectors =
 			kcalloc(max_conn_count,
 				sizeof(struct drm_connector *),
@@ -860,6 +860,9 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 	if (!drm_fbdev_emulation)
 		return;
 
+	cancel_work_sync(&fb_helper->resume_work);
+	cancel_work_sync(&fb_helper->dirty_work);
+
 	mutex_lock(&kernel_fb_helper_lock);
 	if (!list_empty(&fb_helper->kernel_fb_list)) {
 		list_del(&fb_helper->kernel_fb_list);
diff --git a/drivers/gpu/drm/drm_gem_cma_helper.c b/drivers/gpu/drm/drm_gem_cma_helper.c
index 5cf38a474845..f7ba32bfe39b 100644
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@@ -447,7 +447,7 @@ void drm_gem_cma_describe(struct drm_gem_cma_object *cma_obj,
 	off = drm_vma_node_start(&obj->vma_node);
 
 	seq_printf(m, "%2d (%2d) %08llx %pad %p %zu",
-			obj->name, obj->refcount.refcount.counter,
+			obj->name, kref_read(&obj->refcount),
 			off, &cma_obj->paddr, cma_obj->vaddr, obj->size);
 
 	seq_printf(m, "\n");
diff --git a/drivers/gpu/drm/drm_info.c b/drivers/gpu/drm/drm_info.c
index ffb2ab389d1d..6b68e9088436 100644
--- a/drivers/gpu/drm/drm_info.c
+++ b/drivers/gpu/drm/drm_info.c
@@ -118,7 +118,7 @@ static int drm_gem_one_name_info(int id, void *ptr, void *data)
 	seq_printf(m, "%6d %8zd %7d %8d\n",
 		   obj->name, obj->size,
 		   obj->handle_count,
-		   atomic_read(&obj->refcount.refcount));
+		   kref_read(&obj->refcount));
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c
index 32d43f86a8f2..96bb6badb818 100644
--- a/drivers/gpu/drm/drm_lock.c
+++ b/drivers/gpu/drm/drm_lock.c
@@ -34,6 +34,8 @@
  */
 
 #include <linux/export.h>
+#include <linux/sched/signal.h>
+
 #include <drm/drmP.h>
 #include "drm_legacy.h"
 #include "drm_internal.h"
diff --git a/drivers/gpu/drm/drm_mm.c b/drivers/gpu/drm/drm_mm.c
index e51876e588d6..8bfb0b327267 100644
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@@ -97,14 +97,6 @@
  * locking would be fully redundant.
  */
 
-static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
-						u64 size,
-						u64 alignment,
-						unsigned long color,
-						u64 start,
-						u64 end,
-						enum drm_mm_search_flags flags);
-
 #ifdef CONFIG_DRM_DEBUG_MM
 #include <linux/stackdepot.h>
 
@@ -226,69 +218,151 @@ static void drm_mm_interval_tree_add_node(struct drm_mm_node *hole_node,
 			    &drm_mm_interval_tree_augment);
 }
 
-static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
-				 struct drm_mm_node *node,
-				 u64 size, u64 alignment,
-				 unsigned long color,
-				 u64 range_start, u64 range_end,
-				 enum drm_mm_allocator_flags flags)
+#define RB_INSERT(root, member, expr) do { \
+	struct rb_node **link = &root.rb_node, *rb = NULL; \
+	u64 x = expr(node); \
+	while (*link) { \
+		rb = *link; \
+		if (x < expr(rb_entry(rb, struct drm_mm_node, member))) \
+			link = &rb->rb_left; \
+		else \
+			link = &rb->rb_right; \
+	} \
+	rb_link_node(&node->member, rb, link); \
+	rb_insert_color(&node->member, &root); \
+} while (0)
+
+#define HOLE_SIZE(NODE) ((NODE)->hole_size)
+#define HOLE_ADDR(NODE) (__drm_mm_hole_node_start(NODE))
+
+static void add_hole(struct drm_mm_node *node)
 {
-	struct drm_mm *mm = hole_node->mm;
-	u64 hole_start = drm_mm_hole_node_start(hole_node);
-	u64 hole_end = drm_mm_hole_node_end(hole_node);
-	u64 adj_start = hole_start;
-	u64 adj_end = hole_end;
+	struct drm_mm *mm = node->mm;
 
-	DRM_MM_BUG_ON(!drm_mm_hole_follows(hole_node) || node->allocated);
+	node->hole_size =
+		__drm_mm_hole_node_end(node) - __drm_mm_hole_node_start(node);
+	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
-	if (mm->color_adjust)
-		mm->color_adjust(hole_node, color, &adj_start, &adj_end);
+	RB_INSERT(mm->holes_size, rb_hole_size, HOLE_SIZE);
+	RB_INSERT(mm->holes_addr, rb_hole_addr, HOLE_ADDR);
 
-	adj_start = max(adj_start, range_start);
-	adj_end = min(adj_end, range_end);
+	list_add(&node->hole_stack, &mm->hole_stack);
+}
 
-	if (flags & DRM_MM_CREATE_TOP)
-		adj_start = adj_end - size;
+static void rm_hole(struct drm_mm_node *node)
+{
+	DRM_MM_BUG_ON(!drm_mm_hole_follows(node));
 
-	if (alignment) {
-		u64 rem;
+	list_del(&node->hole_stack);
+	rb_erase(&node->rb_hole_size, &node->mm->holes_size);
+	rb_erase(&node->rb_hole_addr, &node->mm->holes_addr);
+	node->hole_size = 0;
 
-		div64_u64_rem(adj_start, alignment, &rem);
-		if (rem) {
-			if (flags & DRM_MM_CREATE_TOP)
-				adj_start -= rem;
-			else
-				adj_start += alignment - rem;
+	DRM_MM_BUG_ON(drm_mm_hole_follows(node));
+}
+
+static inline struct drm_mm_node *rb_hole_size_to_node(struct rb_node *rb)
+{
+	return rb_entry_safe(rb, struct drm_mm_node, rb_hole_size);
+}
+
+static inline struct drm_mm_node *rb_hole_addr_to_node(struct rb_node *rb)
+{
+	return rb_entry_safe(rb, struct drm_mm_node, rb_hole_addr);
+}
+
+static inline u64 rb_hole_size(struct rb_node *rb)
+{
+	return rb_entry(rb, struct drm_mm_node, rb_hole_size)->hole_size;
+}
+
+static struct drm_mm_node *best_hole(struct drm_mm *mm, u64 size)
+{
+	struct rb_node *best = NULL;
+	struct rb_node **link = &mm->holes_size.rb_node;
+
+	while (*link) {
+		struct rb_node *rb = *link;
+
+		if (size <= rb_hole_size(rb)) {
+			link = &rb->rb_left;
+			best = rb;
+		} else {
+			link = &rb->rb_right;
 		}
 	}
 
-	if (adj_start == hole_start) {
-		hole_node->hole_follows = 0;
-		list_del(&hole_node->hole_stack);
+	return rb_hole_size_to_node(best);
+}
+
+static struct drm_mm_node *find_hole(struct drm_mm *mm, u64 addr)
+{
+	struct drm_mm_node *node = NULL;
+	struct rb_node **link = &mm->holes_addr.rb_node;
+
+	while (*link) {
+		u64 hole_start;
+
+		node = rb_hole_addr_to_node(*link);
+		hole_start = __drm_mm_hole_node_start(node);
+
+		if (addr < hole_start)
+			link = &node->rb_hole_addr.rb_left;
+		else if (addr > hole_start + node->hole_size)
+			link = &node->rb_hole_addr.rb_right;
+		else
+			break;
 	}
 
-	node->start = adj_start;
-	node->size = size;
-	node->mm = mm;
-	node->color = color;
-	node->allocated = 1;
+	return node;
+}
 
-	list_add(&node->node_list, &hole_node->node_list);
+static struct drm_mm_node *
+first_hole(struct drm_mm *mm,
+	   u64 start, u64 end, u64 size,
+	   enum drm_mm_insert_mode mode)
+{
+	if (RB_EMPTY_ROOT(&mm->holes_size))
+		return NULL;
 
-	drm_mm_interval_tree_add_node(hole_node, node);
+	switch (mode) {
+	default:
+	case DRM_MM_INSERT_BEST:
+		return best_hole(mm, size);
 
-	DRM_MM_BUG_ON(node->start < range_start);
-	DRM_MM_BUG_ON(node->start < adj_start);
-	DRM_MM_BUG_ON(node->start + node->size > adj_end);
-	DRM_MM_BUG_ON(node->start + node->size > range_end);
+	case DRM_MM_INSERT_LOW:
+		return find_hole(mm, start);
 
-	node->hole_follows = 0;
-	if (__drm_mm_hole_node_start(node) < hole_end) {
-		list_add(&node->hole_stack, &mm->hole_stack);
-		node->hole_follows = 1;
+	case DRM_MM_INSERT_HIGH:
+		return find_hole(mm, end);
+
+	case DRM_MM_INSERT_EVICT:
+		return list_first_entry_or_null(&mm->hole_stack,
+						struct drm_mm_node,
+						hole_stack);
 	}
+}
 
-	save_stack(node);
+static struct drm_mm_node *
+next_hole(struct drm_mm *mm,
+	  struct drm_mm_node *node,
+	  enum drm_mm_insert_mode mode)
+{
+	switch (mode) {
+	default:
+	case DRM_MM_INSERT_BEST:
+		return rb_hole_size_to_node(rb_next(&node->rb_hole_size));
+
+	case DRM_MM_INSERT_LOW:
+		return rb_hole_addr_to_node(rb_next(&node->rb_hole_addr));
+
+	case DRM_MM_INSERT_HIGH:
+		return rb_hole_addr_to_node(rb_prev(&node->rb_hole_addr));
+
+	case DRM_MM_INSERT_EVICT:
+		node = list_next_entry(node, hole_stack);
+		return &node->hole_stack == &mm->hole_stack ? NULL : node;
+	}
 }
 
 /**
@@ -317,21 +391,12 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 		return -ENOSPC;
 
 	/* Find the relevant hole to add our node to */
-	hole = drm_mm_interval_tree_iter_first(&mm->interval_tree,
-					       node->start, ~(u64)0);
-	if (hole) {
-		if (hole->start < end)
-			return -ENOSPC;
-	} else {
-		hole = list_entry(drm_mm_nodes(mm), typeof(*hole), node_list);
-	}
-
-	hole = list_last_entry(&hole->node_list, typeof(*hole), node_list);
-	if (!drm_mm_hole_follows(hole))
+	hole = find_hole(mm, node->start);
+	if (!hole)
 		return -ENOSPC;
 
 	adj_start = hole_start = __drm_mm_hole_node_start(hole);
-	adj_end = hole_end = __drm_mm_hole_node_end(hole);
+	adj_end = hole_end = hole_start + hole->hole_size;
 
 	if (mm->color_adjust)
 		mm->color_adjust(hole, node->color, &adj_start, &adj_end);
@@ -340,70 +405,130 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 		return -ENOSPC;
 
 	node->mm = mm;
-	node->allocated = 1;
 
 	list_add(&node->node_list, &hole->node_list);
-
 	drm_mm_interval_tree_add_node(hole, node);
+	node->allocated = true;
+	node->hole_size = 0;
 
-	if (node->start == hole_start) {
-		hole->hole_follows = 0;
-		list_del(&hole->hole_stack);
-	}
-
-	node->hole_follows = 0;
-	if (end != hole_end) {
-		list_add(&node->hole_stack, &mm->hole_stack);
-		node->hole_follows = 1;
-	}
+	rm_hole(hole);
+	if (node->start > hole_start)
+		add_hole(hole);
+	if (end < hole_end)
+		add_hole(node);
 
 	save_stack(node);
-
 	return 0;
 }
 EXPORT_SYMBOL(drm_mm_reserve_node);
 
 /**
- * drm_mm_insert_node_in_range_generic - ranged search for space and insert @node
+ * drm_mm_insert_node_in_range - ranged search for space and insert @node
  * @mm: drm_mm to allocate from
  * @node: preallocate node to insert
  * @size: size of the allocation
  * @alignment: alignment of the allocation
  * @color: opaque tag value to use for this node
- * @start: start of the allowed range for this node
- * @end: end of the allowed range for this node
- * @sflags: flags to fine-tune the allocation search
- * @aflags: flags to fine-tune the allocation behavior
+ * @range_start: start of the allowed range for this node
+ * @range_end: end of the allowed range for this node
+ * @mode: fine-tune the allocation search and placement
  *
  * The preallocated @node must be cleared to 0.
  *
  * Returns:
  * 0 on success, -ENOSPC if there's no suitable hole.
  */
-int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
-					u64 size, u64 alignment,
-					unsigned long color,
-					u64 start, u64 end,
-					enum drm_mm_search_flags sflags,
-					enum drm_mm_allocator_flags aflags)
+int drm_mm_insert_node_in_range(struct drm_mm * const mm,
+				struct drm_mm_node * const node,
+				u64 size, u64 alignment,
+				unsigned long color,
+				u64 range_start, u64 range_end,
+				enum drm_mm_insert_mode mode)
 {
-	struct drm_mm_node *hole_node;
+	struct drm_mm_node *hole;
+	u64 remainder_mask;
 
-	if (WARN_ON(size == 0))
-		return -EINVAL;
+	DRM_MM_BUG_ON(range_start >= range_end);
 
-	hole_node = drm_mm_search_free_in_range_generic(mm,
-							size, alignment, color,
-							start, end, sflags);
-	if (!hole_node)
+	if (unlikely(size == 0 || range_end - range_start < size))
 		return -ENOSPC;
 
-	drm_mm_insert_helper(hole_node, node,
-			     size, alignment, color,
-			     start, end, aflags);
-	return 0;
+	if (alignment <= 1)
+		alignment = 0;
+
+	remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
+	for (hole = first_hole(mm, range_start, range_end, size, mode); hole;
+	     hole = next_hole(mm, hole, mode)) {
+		u64 hole_start = __drm_mm_hole_node_start(hole);
+		u64 hole_end = hole_start + hole->hole_size;
+		u64 adj_start, adj_end;
+		u64 col_start, col_end;
+
+		if (mode == DRM_MM_INSERT_LOW && hole_start >= range_end)
+			break;
+
+		if (mode == DRM_MM_INSERT_HIGH && hole_end <= range_start)
+			break;
+
+		col_start = hole_start;
+		col_end = hole_end;
+		if (mm->color_adjust)
+			mm->color_adjust(hole, color, &col_start, &col_end);
+
+		adj_start = max(col_start, range_start);
+		adj_end = min(col_end, range_end);
+
+		if (adj_end <= adj_start || adj_end - adj_start < size)
+			continue;
+
+		if (mode == DRM_MM_INSERT_HIGH)
+			adj_start = adj_end - size;
+
+		if (alignment) {
+			u64 rem;
+
+			if (likely(remainder_mask))
+				rem = adj_start & remainder_mask;
+			else
+				div64_u64_rem(adj_start, alignment, &rem);
+			if (rem) {
+				adj_start -= rem;
+				if (mode != DRM_MM_INSERT_HIGH)
+					adj_start += alignment;
+
+				if (adj_start < max(col_start, range_start) ||
+				    min(col_end, range_end) - adj_start < size)
+					continue;
+
+				if (adj_end <= adj_start ||
+				    adj_end - adj_start < size)
+					continue;
+			}
+		}
+
+		node->mm = mm;
+		node->size = size;
+		node->start = adj_start;
+		node->color = color;
+		node->hole_size = 0;
+
+		list_add(&node->node_list, &hole->node_list);
+		drm_mm_interval_tree_add_node(hole, node);
+		node->allocated = true;
+
+		rm_hole(hole);
+		if (adj_start > hole_start)
+			add_hole(hole);
+		if (adj_start + size < hole_end)
+			add_hole(node);
+
+		save_stack(node);
+		return 0;
+	}
+
+	return -ENOSPC;
 }
-EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);
+EXPORT_SYMBOL(drm_mm_insert_node_in_range);
 
 /**
  * drm_mm_remove_node - Remove a memory node from the allocator.
@@ -421,92 +546,20 @@ void drm_mm_remove_node(struct drm_mm_node *node)
 	DRM_MM_BUG_ON(!node->allocated);
 	DRM_MM_BUG_ON(node->scanned_block);
 
-	prev_node =
-	    list_entry(node->node_list.prev, struct drm_mm_node, node_list);
-
-	if (drm_mm_hole_follows(node)) {
-		DRM_MM_BUG_ON(__drm_mm_hole_node_start(node) ==
-			      __drm_mm_hole_node_end(node));
-		list_del(&node->hole_stack);
-	} else {
-		DRM_MM_BUG_ON(__drm_mm_hole_node_start(node) !=
-			      __drm_mm_hole_node_end(node));
-	}
+	prev_node = list_prev_entry(node, node_list);
 
-	if (!drm_mm_hole_follows(prev_node)) {
-		prev_node->hole_follows = 1;
-		list_add(&prev_node->hole_stack, &mm->hole_stack);
-	} else
-		list_move(&prev_node->hole_stack, &mm->hole_stack);
+	if (drm_mm_hole_follows(node))
+		rm_hole(node);
 
 	drm_mm_interval_tree_remove(node, &mm->interval_tree);
 	list_del(&node->node_list);
-	node->allocated = 0;
-}
-EXPORT_SYMBOL(drm_mm_remove_node);
-
-static int check_free_hole(u64 start, u64 end, u64 size, u64 alignment)
-{
-	if (end - start < size)
-		return 0;
-
-	if (alignment) {
-		u64 rem;
-
-		div64_u64_rem(start, alignment, &rem);
-		if (rem)
-			start += alignment - rem;
-	}
+	node->allocated = false;
 
-	return end >= start + size;
-}
-
-static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_mm *mm,
-							u64 size,
-							u64 alignment,
-							unsigned long color,
-							u64 start,
-							u64 end,
-							enum drm_mm_search_flags flags)
-{
-	struct drm_mm_node *entry;
-	struct drm_mm_node *best;
-	u64 adj_start;
-	u64 adj_end;
-	u64 best_size;
-
-	DRM_MM_BUG_ON(mm->scan_active);
-
-	best = NULL;
-	best_size = ~0UL;
-
-	__drm_mm_for_each_hole(entry, mm, adj_start, adj_end,
-			       flags & DRM_MM_SEARCH_BELOW) {
-		u64 hole_size = adj_end - adj_start;
-
-		if (mm->color_adjust) {
-			mm->color_adjust(entry, color, &adj_start, &adj_end);
-			if (adj_end <= adj_start)
-				continue;
-		}
-
-		adj_start = max(adj_start, start);
-		adj_end = min(adj_end, end);
-
-		if (!check_free_hole(adj_start, adj_end, size, alignment))
-			continue;
-
-		if (!(flags & DRM_MM_SEARCH_BEST))
-			return entry;
-
-		if (hole_size < best_size) {
-			best = entry;
-			best_size = hole_size;
-		}
-	}
-
-	return best;
+	if (drm_mm_hole_follows(prev_node))
+		rm_hole(prev_node);
+	add_hole(prev_node);
 }
+EXPORT_SYMBOL(drm_mm_remove_node);
 
 /**
  * drm_mm_replace_node - move an allocation from @old to @new
@@ -521,18 +574,23 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
 	DRM_MM_BUG_ON(!old->allocated);
 
+	*new = *old;
+
 	list_replace(&old->node_list, &new->node_list);
-	list_replace(&old->hole_stack, &new->hole_stack);
 	rb_replace_node(&old->rb, &new->rb, &old->mm->interval_tree);
-	new->hole_follows = old->hole_follows;
-	new->mm = old->mm;
-	new->start = old->start;
-	new->size = old->size;
-	new->color = old->color;
-	new->__subtree_last = old->__subtree_last;
-
-	old->allocated = 0;
-	new->allocated = 1;
+
+	if (drm_mm_hole_follows(old)) {
+		list_replace(&old->hole_stack, &new->hole_stack);
+		rb_replace_node(&old->rb_hole_size,
+				&new->rb_hole_size,
+				&old->mm->holes_size);
+		rb_replace_node(&old->rb_hole_addr,
+				&new->rb_hole_addr,
+				&old->mm->holes_addr);
+	}
+
+	old->allocated = false;
+	new->allocated = true;
 }
 EXPORT_SYMBOL(drm_mm_replace_node);
 
@@ -577,7 +635,7 @@ EXPORT_SYMBOL(drm_mm_replace_node);
  * @color: opaque tag value to use for the allocation
  * @start: start of the allowed range for the allocation
  * @end: end of the allowed range for the allocation
- * @flags: flags to specify how the allocation will be performed afterwards
+ * @mode: fine-tune the allocation search and placement
  *
  * This simply sets up the scanning routines with the parameters for the desired
  * hole.
@@ -593,7 +651,7 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan,
 				 unsigned long color,
 				 u64 start,
 				 u64 end,
-				 unsigned int flags)
+				 enum drm_mm_insert_mode mode)
 {
 	DRM_MM_BUG_ON(start >= end);
 	DRM_MM_BUG_ON(!size || size > end - start);
@@ -608,7 +666,7 @@ void drm_mm_scan_init_with_range(struct drm_mm_scan *scan,
 	scan->alignment = alignment;
 	scan->remainder_mask = is_power_of_2(alignment) ? alignment - 1 : 0;
 	scan->size = size;
-	scan->flags = flags;
+	scan->mode = mode;
 
 	DRM_MM_BUG_ON(end <= start);
 	scan->range_start = start;
@@ -667,7 +725,7 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
 	if (adj_end <= adj_start || adj_end - adj_start < scan->size)
 		return false;
 
-	if (scan->flags == DRM_MM_CREATE_TOP)
+	if (scan->mode == DRM_MM_INSERT_HIGH)
 		adj_start = adj_end - scan->size;
 
 	if (scan->alignment) {
@@ -679,7 +737,7 @@ bool drm_mm_scan_add_block(struct drm_mm_scan *scan,
 			div64_u64_rem(adj_start, scan->alignment, &rem);
 		if (rem) {
 			adj_start -= rem;
-			if (scan->flags != DRM_MM_CREATE_TOP)
+			if (scan->mode != DRM_MM_INSERT_HIGH)
 				adj_start += scan->alignment;
 			if (adj_start < max(col_start, scan->range_start) ||
 			    min(col_end, scan->range_end) - adj_start < scan->size)
@@ -775,7 +833,7 @@ struct drm_mm_node *drm_mm_scan_color_evict(struct drm_mm_scan *scan)
 
 	hole = list_first_entry(&mm->hole_stack, typeof(*hole), hole_stack);
 	hole_start = __drm_mm_hole_node_start(hole);
-	hole_end = __drm_mm_hole_node_end(hole);
+	hole_end = hole_start + hole->hole_size;
 
 	DRM_MM_BUG_ON(hole_start > scan->hit_start);
 	DRM_MM_BUG_ON(hole_end < scan->hit_end);
@@ -802,21 +860,22 @@ void drm_mm_init(struct drm_mm *mm, u64 start, u64 size)
 {
 	DRM_MM_BUG_ON(start + size <= start);
 
+	mm->color_adjust = NULL;
+
 	INIT_LIST_HEAD(&mm->hole_stack);
-	mm->scan_active = 0;
+	mm->interval_tree = RB_ROOT;
+	mm->holes_size = RB_ROOT;
+	mm->holes_addr = RB_ROOT;
 
 	/* Clever trick to avoid a special case in the free hole tracking. */
 	INIT_LIST_HEAD(&mm->head_node.node_list);
-	mm->head_node.allocated = 0;
-	mm->head_node.hole_follows = 1;
+	mm->head_node.allocated = false;
 	mm->head_node.mm = mm;
 	mm->head_node.start = start + size;
-	mm->head_node.size = start - mm->head_node.start;
-	list_add_tail(&mm->head_node.hole_stack, &mm->hole_stack);
+	mm->head_node.size = -size;
+	add_hole(&mm->head_node);
 
-	mm->interval_tree = RB_ROOT;
-
-	mm->color_adjust = NULL;
+	mm->scan_active = 0;
 }
 EXPORT_SYMBOL(drm_mm_init);
 
@@ -837,20 +896,17 @@ EXPORT_SYMBOL(drm_mm_takedown);
 
 static u64 drm_mm_dump_hole(struct drm_printer *p, const struct drm_mm_node *entry)
 {
-	u64 hole_start, hole_end, hole_size;
-
-	if (entry->hole_follows) {
-		hole_start = drm_mm_hole_node_start(entry);
-		hole_end = drm_mm_hole_node_end(entry);
-		hole_size = hole_end - hole_start;
-		drm_printf(p, "%#018llx-%#018llx: %llu: free\n", hole_start,
-			   hole_end, hole_size);
-		return hole_size;
+	u64 start, size;
+
+	size = entry->hole_size;
+	if (size) {
+		start = drm_mm_hole_node_start(entry);
+		drm_printf(p, "%#018llx-%#018llx: %llu: free\n",
+			   start, start + size, size);
 	}
 
-	return 0;
+	return size;
 }
-
 /**
  * drm_mm_print - print allocator state
  * @mm: drm_mm allocator to print
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index 14543ff08c51..220a6c1f4ab9 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -160,7 +160,7 @@ EXPORT_SYMBOL(drm_mode_object_find);
 void drm_mode_object_unreference(struct drm_mode_object *obj)
 {
 	if (obj->free_cb) {
-		DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount));
+		DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount));
 		kref_put(&obj->refcount, obj->free_cb);
 	}
 }
@@ -177,7 +177,7 @@ EXPORT_SYMBOL(drm_mode_object_unreference);
 void drm_mode_object_reference(struct drm_mode_object *obj)
 {
 	if (obj->free_cb) {
-		DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, atomic_read(&obj->refcount.refcount));
+		DRM_DEBUG("OBJ ID: %d (%d)\n", obj->id, kref_read(&obj->refcount));
 		kref_get(&obj->refcount);
 	}
 }
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index a8616b1a8d22..fd22c1c891bf 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1481,12 +1481,8 @@ drm_mode_create_from_cmdline_mode(struct drm_device *dev,
 
 	mode->type |= DRM_MODE_TYPE_USERDEF;
 	/* fix up 1368x768: GFT/CVT can't express 1366 width due to alignment */
-	if (cmd->xres == 1366 && mode->hdisplay == 1368) {
-		mode->hdisplay = 1366;
-		mode->hsync_start--;
-		mode->hsync_end--;
-		drm_mode_set_name(mode);
-	}
+	if (cmd->xres == 1366)
+		drm_mode_fixup_1366x768(mode);
 	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
 	return mode;
 }
diff --git a/drivers/gpu/drm/drm_panel.c b/drivers/gpu/drm/drm_panel.c
index 3dfe3c886502..308d442a531b 100644
--- a/drivers/gpu/drm/drm_panel.c
+++ b/drivers/gpu/drm/drm_panel.c
@@ -137,7 +137,7 @@ EXPORT_SYMBOL(drm_panel_detach);
  * Return: A pointer to the panel registered for the specified device tree
  * node or NULL if no panel matching the device tree node can be found.
  */
-struct drm_panel *of_drm_find_panel(struct device_node *np)
+struct drm_panel *of_drm_find_panel(const struct device_node *np)
 {
 	struct drm_panel *panel;
 
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 93381454bdf7..dc4419ada12c 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -220,8 +220,8 @@ drm_connector_detect(struct drm_connector *connector, bool force)
  *    - drm_mode_validate_basic() performs basic sanity checks
  *    - drm_mode_validate_size() filters out modes larger than @maxX and @maxY
  *      (if specified)
- *    - drm_mode_validate_flag() checks the modes againt basic connector
- *      capabilites (interlace_allowed,doublescan_allowed,stereo_allowed)
+ *    - drm_mode_validate_flag() checks the modes against basic connector
+ *      capabilities (interlace_allowed,doublescan_allowed,stereo_allowed)
  *    - the optional &drm_connector_helper_funcs.mode_valid helper can perform
  *      driver and/or hardware specific checks
  *
diff --git a/drivers/gpu/drm/drm_vm.c b/drivers/gpu/drm/drm_vm.c
index bd311c77c254..1170b3209a12 100644
--- a/drivers/gpu/drm/drm_vm.c
+++ b/drivers/gpu/drm/drm_vm.c
@@ -96,8 +96,9 @@ static pgprot_t drm_dma_prot(uint32_t map_type, struct vm_area_struct *vma)
  * map, get the page, increment the use count and return it.
  */
 #if IS_ENABLED(CONFIG_AGP)
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_local_map *map = NULL;
@@ -168,7 +169,7 @@ vm_fault_error:
 	return VM_FAULT_SIGBUS;	/* Disallow mremap */
 }
 #else
-static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_fault(struct vm_fault *vmf)
 {
 	return VM_FAULT_SIGBUS;
 }
@@ -184,8 +185,9 @@ static int drm_do_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
  * Get the mapping, find the real physical page to map, get the page, and
  * return it.
  */
-static int drm_do_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_shm_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_local_map *map = vma->vm_private_data;
 	unsigned long offset;
 	unsigned long i;
@@ -280,14 +282,14 @@ static void drm_vm_shm_close(struct vm_area_struct *vma)
 /**
  * \c fault method for DMA virtual memory.
  *
- * \param vma virtual memory area.
  * \param address access address.
  * \return pointer to the page structure.
  *
  * Determine the page number from the page offset and get it from drm_device_dma::pagelist.
  */
-static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_dma_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_device_dma *dma = dev->dma;
@@ -315,14 +317,14 @@ static int drm_do_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 /**
  * \c fault method for scatter-gather virtual memory.
  *
- * \param vma virtual memory area.
  * \param address access address.
  * \return pointer to the page structure.
  *
  * Determine the map offset from the page offset and get it from drm_sg_mem::pagelist.
  */
-static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int drm_vm_sg_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_local_map *map = vma->vm_private_data;
 	struct drm_file *priv = vma->vm_file->private_data;
 	struct drm_device *dev = priv->minor->dev;
@@ -347,26 +349,6 @@ static int drm_do_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	return 0;
 }
 
-static int drm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	return drm_do_vm_fault(vma, vmf);
-}
-
-static int drm_vm_shm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	return drm_do_vm_shm_fault(vma, vmf);
-}
-
-static int drm_vm_dma_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	return drm_do_vm_dma_fault(vma, vmf);
-}
-
-static int drm_vm_sg_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	return drm_do_vm_sg_fault(vma, vmf);
-}
-
 /** AGP virtual memory operations */
 static const struct vm_operations_struct drm_vm_ops = {
 	.fault = drm_vm_fault,
diff --git a/drivers/gpu/drm/drm_vma_manager.c b/drivers/gpu/drm/drm_vma_manager.c
index 20cc33d1bfc1..d9100b565198 100644
--- a/drivers/gpu/drm/drm_vma_manager.c
+++ b/drivers/gpu/drm/drm_vma_manager.c
@@ -212,8 +212,7 @@ int drm_vma_offset_add(struct drm_vma_offset_manager *mgr,
 		goto out_unlock;
 	}
 
-	ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node,
-				 pages, 0, DRM_MM_SEARCH_DEFAULT);
+	ret = drm_mm_insert_node(&mgr->vm_addr_space_mm, &node->vm_node, pages);
 	if (ret)
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/etnaviv/Makefile b/drivers/gpu/drm/etnaviv/Makefile
index 1086e9876f91..4f76c992043f 100644
--- a/drivers/gpu/drm/etnaviv/Makefile
+++ b/drivers/gpu/drm/etnaviv/Makefile
@@ -1,6 +1,7 @@
 etnaviv-y := \
 	etnaviv_buffer.o \
 	etnaviv_cmd_parser.o \
+	etnaviv_cmdbuf.o \
 	etnaviv_drv.o \
 	etnaviv_dump.o \
 	etnaviv_gem_prime.o \
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index d9230132dfbc..ed9588f36bc9 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -15,6 +15,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
@@ -125,7 +126,7 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
 	u32 *ptr = buf->vaddr + off;
 
 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
-			ptr, etnaviv_iommu_get_cmdbuf_va(gpu, buf) + off, size - len * 4 - off);
+			ptr, etnaviv_cmdbuf_get_va(buf) + off, size - len * 4 - off);
 
 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
 			ptr, len * 4, 0);
@@ -158,7 +159,7 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
 		buffer->user_size = 0;
 
-	return etnaviv_iommu_get_cmdbuf_va(gpu, buffer) + buffer->user_size;
+	return etnaviv_cmdbuf_get_va(buffer) + buffer->user_size;
 }
 
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
@@ -169,7 +170,7 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;
 
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) +
+	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
 		 buffer->user_size - 4);
 
 	return buffer->user_size / 8;
@@ -261,7 +262,7 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
 
-	link_target = etnaviv_iommu_get_cmdbuf_va(gpu, cmdbuf);
+	link_target = etnaviv_cmdbuf_get_va(cmdbuf);
 	link_dwords = cmdbuf->size / 8;
 
 	/*
@@ -355,12 +356,13 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, unsigned int event,
 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
 		       VIVS_GL_EVENT_FROM_PE);
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_iommu_get_cmdbuf_va(gpu, buffer) +
+	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
 			    buffer->user_size - 4);
 
 	if (drm_debug & DRM_UT_DRIVER)
 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
-			return_target, etnaviv_iommu_get_cmdbuf_va(gpu, cmdbuf), cmdbuf->vaddr);
+			return_target, etnaviv_cmdbuf_get_va(cmdbuf),
+			cmdbuf->vaddr);
 
 	if (drm_debug & DRM_UT_DRIVER) {
 		print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
index 2a2e5e366ab7..6e3bbcf24160 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmd_parser.c
@@ -56,6 +56,8 @@ static const struct {
 	ST(0x0644, 1),
 	ST(0x064c, 1),
 	ST(0x0680, 8),
+	ST(0x086c, 1),
+	ST(0x1028, 1),
 	ST(0x1410, 1),
 	ST(0x1430, 1),
 	ST(0x1458, 1),
@@ -73,8 +75,12 @@ static const struct {
 	ST(0x16c0, 8),
 	ST(0x16e0, 8),
 	ST(0x1740, 8),
+	ST(0x17c0, 8),
+	ST(0x17e0, 8),
 	ST(0x2400, 14 * 16),
 	ST(0x10800, 32 * 16),
+	ST(0x14600, 16),
+	ST(0x14800, 8 * 8),
 #undef ST
 };
 
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
new file mode 100644
index 000000000000..633e0f07cbac
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@@ -0,0 +1,153 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <drm/drm_mm.h>
+
+#include "etnaviv_cmdbuf.h"
+#include "etnaviv_gpu.h"
+#include "etnaviv_mmu.h"
+
+#define SUBALLOC_SIZE		SZ_256K
+#define SUBALLOC_GRANULE	SZ_4K
+#define SUBALLOC_GRANULES	(SUBALLOC_SIZE / SUBALLOC_GRANULE)
+
+struct etnaviv_cmdbuf_suballoc {
+	/* suballocated dma buffer properties */
+	struct etnaviv_gpu *gpu;
+	void *vaddr;
+	dma_addr_t paddr;
+
+	/* GPU mapping */
+	u32 iova;
+	struct drm_mm_node vram_node; /* only used on MMUv2 */
+
+	/* allocation management */
+	struct mutex lock;
+	DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
+	int free_space;
+	wait_queue_head_t free_event;
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+{
+	struct etnaviv_cmdbuf_suballoc *suballoc;
+	int ret;
+
+	suballoc = kzalloc(sizeof(*suballoc), GFP_KERNEL);
+	if (!suballoc)
+		return ERR_PTR(-ENOMEM);
+
+	suballoc->gpu = gpu;
+	mutex_init(&suballoc->lock);
+	init_waitqueue_head(&suballoc->free_event);
+
+	suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+				       &suballoc->paddr, GFP_KERNEL);
+	if (!suballoc->vaddr)
+		goto free_suballoc;
+
+	ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
+					    &suballoc->vram_node, SUBALLOC_SIZE,
+					    &suballoc->iova);
+	if (ret)
+		goto free_dma;
+
+	return suballoc;
+
+free_dma:
+	dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
+free_suballoc:
+	kfree(suballoc);
+
+	return NULL;
+}
+
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
+{
+	etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
+				      SUBALLOC_SIZE, suballoc->iova);
+	dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+		    suballoc->paddr);
+	kfree(suballoc);
+}
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos)
+{
+	struct etnaviv_cmdbuf *cmdbuf;
+	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
+				 sizeof(*cmdbuf));
+	int granule_offs, order, ret;
+
+	cmdbuf = kzalloc(sz, GFP_KERNEL);
+	if (!cmdbuf)
+		return NULL;
+
+	cmdbuf->suballoc = suballoc;
+	cmdbuf->size = size;
+
+	order = order_base_2(ALIGN(size, SUBALLOC_GRANULE) / SUBALLOC_GRANULE);
+retry:
+	mutex_lock(&suballoc->lock);
+	granule_offs = bitmap_find_free_region(suballoc->granule_map,
+					SUBALLOC_GRANULES, order);
+	if (granule_offs < 0) {
+		suballoc->free_space = 0;
+		mutex_unlock(&suballoc->lock);
+		ret = wait_event_interruptible_timeout(suballoc->free_event,
+						       suballoc->free_space,
+						       msecs_to_jiffies(10 * 1000));
+		if (!ret) {
+			dev_err(suballoc->gpu->dev,
+				"Timeout waiting for cmdbuf space\n");
+			return NULL;
+		}
+		goto retry;
+	}
+	mutex_unlock(&suballoc->lock);
+	cmdbuf->suballoc_offset = granule_offs * SUBALLOC_GRANULE;
+	cmdbuf->vaddr = suballoc->vaddr + cmdbuf->suballoc_offset;
+
+	return cmdbuf;
+}
+
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
+{
+	struct etnaviv_cmdbuf_suballoc *suballoc = cmdbuf->suballoc;
+	int order = order_base_2(ALIGN(cmdbuf->size, SUBALLOC_GRANULE) /
+				 SUBALLOC_GRANULE);
+
+	mutex_lock(&suballoc->lock);
+	bitmap_release_region(suballoc->granule_map,
+			      cmdbuf->suballoc_offset / SUBALLOC_GRANULE,
+			      order);
+	suballoc->free_space = 1;
+	mutex_unlock(&suballoc->lock);
+	wake_up_all(&suballoc->free_event);
+	kfree(cmdbuf);
+}
+
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
+{
+	return buf->suballoc->iova + buf->suballoc_offset;
+}
+
+dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
+{
+	return buf->suballoc->paddr + buf->suballoc_offset;
+}
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
new file mode 100644
index 000000000000..80d78076c679
--- /dev/null
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2017 Etnaviv Project
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ETNAVIV_CMDBUF_H__
+#define __ETNAVIV_CMDBUF_H__
+
+#include <linux/types.h>
+
+struct etnaviv_gpu;
+struct etnaviv_cmdbuf_suballoc;
+
+struct etnaviv_cmdbuf {
+	/* suballocator this cmdbuf is allocated from */
+	struct etnaviv_cmdbuf_suballoc *suballoc;
+	/* user context key, must be unique between all active users */
+	struct etnaviv_file_private *ctx;
+	/* cmdbuf properties */
+	int suballoc_offset;
+	void *vaddr;
+	u32 size;
+	u32 user_size;
+	/* fence after which this buffer is to be disposed */
+	struct dma_fence *fence;
+	/* target exec state */
+	u32 exec_state;
+	/* per GPU in-flight list */
+	struct list_head node;
+	/* BOs attached to this command buffer */
+	unsigned int nr_bos;
+	struct etnaviv_vram_mapping *bo_map[0];
+};
+
+struct etnaviv_cmdbuf_suballoc *
+etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+
+struct etnaviv_cmdbuf *
+etnaviv_cmdbuf_new(struct etnaviv_cmdbuf_suballoc *suballoc, u32 size,
+		   size_t nr_bos);
+void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
+
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
+dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);
+
+#endif /* __ETNAVIV_CMDBUF_H__ */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 590be0d1dd95..587e45043542 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -18,11 +18,11 @@
 #include <linux/of_platform.h>
 #include <drm/drm_of.h>
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_gem.h"
 
 #ifdef CONFIG_DRM_ETNAVIV_REGISTER_LOGGING
 static bool reglog;
@@ -177,7 +177,8 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu, struct seq_file *m)
 	u32 i;
 
 	seq_printf(m, "virt %p - phys 0x%llx - free 0x%08x\n",
-			buf->vaddr, (u64)buf->paddr, size - buf->user_size);
+			buf->vaddr, (u64)etnaviv_cmdbuf_get_pa(buf),
+			size - buf->user_size);
 
 	for (i = 0; i < size / 4; i++) {
 		if (i && !(i % 4))
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index c255eda40526..e41f38667c1c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -73,7 +73,7 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		struct drm_file *file);
 
 int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int etnaviv_gem_fault(struct vm_fault *vmf);
 int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset);
 struct sg_table *etnaviv_gem_prime_get_sg_table(struct drm_gem_object *obj);
 void *etnaviv_gem_prime_vmap(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index af65491a78e2..d019b5e311cc 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/devcoredump.h>
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
@@ -177,12 +178,11 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
 	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer->vaddr,
 			      gpu->buffer->size,
-			      etnaviv_iommu_get_cmdbuf_va(gpu, gpu->buffer));
+			      etnaviv_cmdbuf_get_va(gpu->buffer));
 
 	list_for_each_entry(cmd, &gpu->active_cmd_list, node)
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD, cmd->vaddr,
-				      cmd->size,
-				      etnaviv_iommu_get_cmdbuf_va(gpu, cmd));
+				      cmd->size, etnaviv_cmdbuf_get_va(cmd));
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.c b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
index 114dddbd297b..fd56f92f3469 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@@ -16,6 +16,8 @@
 
 #include <linux/spinlock.h>
 #include <linux/shmem_fs.h>
+#include <linux/sched/mm.h>
+#include <linux/sched/task.h>
 
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
@@ -175,8 +177,9 @@ int etnaviv_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	return obj->ops->mmap(obj, vma);
 }
 
-int etnaviv_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int etnaviv_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct page **pages, *page;
@@ -486,7 +489,7 @@ static void etnaviv_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 
 	seq_printf(m, "%08x: %c %2d (%2d) %08lx %p %zd\n",
 			etnaviv_obj->flags, is_active(etnaviv_obj) ? 'A' : 'I',
-			obj->name, obj->refcount.refcount.counter,
+			obj->name, kref_read(&obj->refcount),
 			off, etnaviv_obj->vaddr, obj->size);
 
 	rcu_read_lock();
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index afdd55ddf821..726090d7a6ac 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -15,6 +15,7 @@
  */
 
 #include <linux/reservation.h>
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@@ -332,8 +333,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	bos = drm_malloc_ab(args->nr_bos, sizeof(*bos));
 	relocs = drm_malloc_ab(args->nr_relocs, sizeof(*relocs));
 	stream = drm_malloc_ab(1, args->stream_size);
-	cmdbuf = etnaviv_gpu_cmdbuf_new(gpu, ALIGN(args->stream_size, 8) + 8,
-					args->nr_bos);
+	cmdbuf = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc,
+				    ALIGN(args->stream_size, 8) + 8,
+				    args->nr_bos);
 	if (!bos || !relocs || !stream || !cmdbuf) {
 		ret = -ENOMEM;
 		goto err_submit_cmds;
@@ -422,7 +424,7 @@ err_submit_objects:
 err_submit_cmds:
 	/* if we still own the cmdbuf */
 	if (cmdbuf)
-		etnaviv_gpu_cmdbuf_free(cmdbuf);
+		etnaviv_cmdbuf_free(cmdbuf);
 	if (stream)
 		drm_free_large(stream);
 	if (bos)
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 0a67124bb2a4..130d7d517a19 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -18,6 +18,8 @@
 #include <linux/dma-fence.h>
 #include <linux/moduleparam.h>
 #include <linux/of_device.h>
+
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@@ -546,6 +548,37 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
 		  VIVS_FE_COMMAND_CONTROL_PREFETCH(prefetch));
 }
 
+static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
+{
+	/*
+	 * Base value for VIVS_PM_PULSE_EATER register on models where it
+	 * cannot be read, extracted from vivante kernel driver.
+	 */
+	u32 pulse_eater = 0x01590880;
+
+	if (etnaviv_is_model_rev(gpu, GC4000, 0x5208) ||
+	    etnaviv_is_model_rev(gpu, GC4000, 0x5222)) {
+		pulse_eater |= BIT(23);
+
+	}
+
+	if (etnaviv_is_model_rev(gpu, GC1000, 0x5039) ||
+	    etnaviv_is_model_rev(gpu, GC1000, 0x5040)) {
+		pulse_eater &= ~BIT(16);
+		pulse_eater |= BIT(17);
+	}
+
+	if ((gpu->identity.revision > 0x5420) &&
+	    (gpu->identity.features & chipFeatures_PIPE_3D))
+	{
+		/* Performance fix: disable internal DFS */
+		pulse_eater = gpu_read(gpu, VIVS_PM_PULSE_EATER);
+		pulse_eater |= BIT(18);
+	}
+
+	gpu_write(gpu, VIVS_PM_PULSE_EATER, pulse_eater);
+}
+
 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
 	u16 prefetch;
@@ -586,6 +619,9 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 		gpu_write(gpu, VIVS_MC_BUS_CONFIG, bus_config);
 	}
 
+	/* setup the pulse eater */
+	etnaviv_gpu_setup_pulse_eater(gpu);
+
 	/* setup the MMU */
 	etnaviv_iommu_restore(gpu);
 
@@ -593,7 +629,7 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_init(gpu);
 
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-	etnaviv_gpu_start_fe(gpu, etnaviv_iommu_get_cmdbuf_va(gpu, gpu->buffer),
+	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(gpu->buffer),
 			     prefetch);
 }
 
@@ -658,8 +694,15 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}
 
+	gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
+	if (IS_ERR(gpu->cmdbuf_suballoc)) {
+		dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
+		ret = PTR_ERR(gpu->cmdbuf_suballoc);
+		goto fail;
+	}
+
 	/* Create buffer: */
-	gpu->buffer = etnaviv_gpu_cmdbuf_new(gpu, PAGE_SIZE, 0);
+	gpu->buffer = etnaviv_cmdbuf_new(gpu->cmdbuf_suballoc, PAGE_SIZE, 0);
 	if (!gpu->buffer) {
 		ret = -ENOMEM;
 		dev_err(gpu->dev, "could not create command buffer\n");
@@ -667,7 +710,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	}
 
 	if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
-	    gpu->buffer->paddr - gpu->memory_base > 0x80000000) {
+	    etnaviv_cmdbuf_get_va(gpu->buffer) > 0x80000000) {
 		ret = -EINVAL;
 		dev_err(gpu->dev,
 			"command buffer outside valid memory window\n");
@@ -694,7 +737,7 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	return 0;
 
 free_buffer:
-	etnaviv_gpu_cmdbuf_free(gpu->buffer);
+	etnaviv_cmdbuf_free(gpu->buffer);
 	gpu->buffer = NULL;
 destroy_iommu:
 	etnaviv_iommu_destroy(gpu->mmu);
@@ -1117,41 +1160,6 @@ static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
  * Cmdstream submission/retirement:
  */
 
-struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu, u32 size,
-	size_t nr_bos)
-{
-	struct etnaviv_cmdbuf *cmdbuf;
-	size_t sz = size_vstruct(nr_bos, sizeof(cmdbuf->bo_map[0]),
-				 sizeof(*cmdbuf));
-
-	cmdbuf = kzalloc(sz, GFP_KERNEL);
-	if (!cmdbuf)
-		return NULL;
-
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V2)
-		size = ALIGN(size, SZ_4K);
-
-	cmdbuf->vaddr = dma_alloc_wc(gpu->dev, size, &cmdbuf->paddr,
-				     GFP_KERNEL);
-	if (!cmdbuf->vaddr) {
-		kfree(cmdbuf);
-		return NULL;
-	}
-
-	cmdbuf->gpu = gpu;
-	cmdbuf->size = size;
-
-	return cmdbuf;
-}
-
-void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
-{
-	etnaviv_iommu_put_cmdbuf_va(cmdbuf->gpu, cmdbuf);
-	dma_free_wc(cmdbuf->gpu->dev, cmdbuf->size, cmdbuf->vaddr,
-		    cmdbuf->paddr);
-	kfree(cmdbuf);
-}
-
 static void retire_worker(struct work_struct *work)
 {
 	struct etnaviv_gpu *gpu = container_of(work, struct etnaviv_gpu,
@@ -1177,7 +1185,7 @@ static void retire_worker(struct work_struct *work)
 			etnaviv_gem_mapping_unreference(mapping);
 		}
 
-		etnaviv_gpu_cmdbuf_free(cmdbuf);
+		etnaviv_cmdbuf_free(cmdbuf);
 		/*
 		 * We need to balance the runtime PM count caused by
 		 * each submission.  Upon submission, we increment
@@ -1593,10 +1601,15 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 #endif
 
 	if (gpu->buffer) {
-		etnaviv_gpu_cmdbuf_free(gpu->buffer);
+		etnaviv_cmdbuf_free(gpu->buffer);
 		gpu->buffer = NULL;
 	}
 
+	if (gpu->cmdbuf_suballoc) {
+		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
+		gpu->cmdbuf_suballoc = NULL;
+	}
+
 	if (gpu->mmu) {
 		etnaviv_iommu_destroy(gpu->mmu);
 		gpu->mmu = NULL;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 8c6b824e9d0a..1c0606ea7d5e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -92,6 +92,7 @@ struct etnaviv_event {
 	struct dma_fence *fence;
 };
 
+struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_cmdbuf;
 
 struct etnaviv_gpu {
@@ -135,6 +136,7 @@ struct etnaviv_gpu {
 	int irq;
 
 	struct etnaviv_iommu *mmu;
+	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
 
 	/* Power Control: */
 	struct clk *clk_bus;
@@ -150,29 +152,6 @@ struct etnaviv_gpu {
 	struct work_struct recover_work;
 };
 
-struct etnaviv_cmdbuf {
-	/* device this cmdbuf is allocated for */
-	struct etnaviv_gpu *gpu;
-	/* user context key, must be unique between all active users */
-	struct etnaviv_file_private *ctx;
-	/* cmdbuf properties */
-	void *vaddr;
-	dma_addr_t paddr;
-	u32 size;
-	u32 user_size;
-	/* vram node used if the cmdbuf is mapped through the MMUv2 */
-	struct drm_mm_node vram_node;
-	/* fence after which this buffer is to be disposed */
-	struct dma_fence *fence;
-	/* target exec state */
-	u32 exec_state;
-	/* per GPU in-flight list */
-	struct list_head node;
-	/* BOs attached to this command buffer */
-	unsigned int nr_bos;
-	struct etnaviv_vram_mapping *bo_map[0];
-};
-
 static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
 {
 	etnaviv_writel(data, gpu->mmio + reg);
@@ -211,9 +190,6 @@ int etnaviv_gpu_wait_obj_inactive(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_object *etnaviv_obj, struct timespec *timeout);
 int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	struct etnaviv_gem_submit *submit, struct etnaviv_cmdbuf *cmdbuf);
-struct etnaviv_cmdbuf *etnaviv_gpu_cmdbuf_new(struct etnaviv_gpu *gpu,
-					      u32 size, size_t nr_bos);
-void etnaviv_gpu_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);
 int etnaviv_gpu_pm_get_sync(struct etnaviv_gpu *gpu);
 void etnaviv_gpu_pm_put(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
index 81f1583a7946..7a7c97f599d7 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@@ -184,7 +184,7 @@ static void etnaviv_iommuv1_dump(struct iommu_domain *domain, void *buf)
 	memcpy(buf, etnaviv_domain->pgtable.pgtable, PT_SIZE);
 }
 
-static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+static const struct etnaviv_iommu_ops etnaviv_iommu_ops = {
 	.ops = {
 		.domain_free = etnaviv_domain_free,
 		.map = etnaviv_iommuv1_map,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
index 7e9c4d210a84..cbe447ac5974 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@@ -21,6 +21,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/bitops.h>
 
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_iommu.h"
@@ -229,7 +230,7 @@ static void etnaviv_iommuv2_dump(struct iommu_domain *domain, void *buf)
 			memcpy(buf, etnaviv_domain->stlb_cpu[i], SZ_4K);
 }
 
-static struct etnaviv_iommu_ops etnaviv_iommu_ops = {
+static const struct etnaviv_iommu_ops etnaviv_iommu_ops = {
 	.ops = {
 		.domain_free = etnaviv_iommuv2_domain_free,
 		.map = etnaviv_iommuv2_map,
@@ -254,7 +255,8 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
 	prefetch = etnaviv_buffer_config_mmuv2(gpu,
 				(u32)etnaviv_domain->mtlb_dma,
 				(u32)etnaviv_domain->bad_page_dma);
-	etnaviv_gpu_start_fe(gpu, gpu->buffer->paddr, prefetch);
+	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(gpu->buffer),
+			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
 
 	gpu_write(gpu, VIVS_MMUv2_CONTROL, VIVS_MMUv2_CONTROL_ENABLE);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
index f503af462dad..f103e787de94 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@@ -15,6 +15,7 @@
  */
 
 #include "common.xml.h"
+#include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
@@ -107,6 +108,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 				   struct drm_mm_node *node, size_t size)
 {
 	struct etnaviv_vram_mapping *free = NULL;
+	enum drm_mm_insert_mode mode = DRM_MM_INSERT_LOW;
 	int ret;
 
 	lockdep_assert_held(&mmu->lock);
@@ -117,15 +119,10 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		struct list_head list;
 		bool found;
 
-		/*
-		 * XXX: The DRM_MM_SEARCH_BELOW is really a hack to trick
-		 * drm_mm into giving out a low IOVA after address space
-		 * rollover. This needs a proper fix.
-		 */
 		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
-			size, 0, mmu->last_iova, ~0UL,
-			mmu->last_iova ? DRM_MM_SEARCH_DEFAULT : DRM_MM_SEARCH_BELOW);
-
+						  size, 0, 0,
+						  mmu->last_iova, U64_MAX,
+						  mode);
 		if (ret != -ENOSPC)
 			break;
 
@@ -140,7 +137,7 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		}
 
 		/* Try to retire some entries */
-		drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, 0);
+		drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, mode);
 
 		found = 0;
 		INIT_LIST_HEAD(&list);
@@ -192,13 +189,12 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 			list_del_init(&m->scan_node);
 		}
 
+		mode = DRM_MM_INSERT_EVICT;
+
 		/*
 		 * We removed enough mappings so that the new allocation will
-		 * succeed.  Ensure that the MMU will be flushed before the
-		 * associated commit requesting this mapping, and retry the
-		 * allocation one more time.
+		 * succeed, retry the allocation one more time.
 		 */
-		mmu->need_flush = true;
 	}
 
 	return ret;
@@ -250,6 +246,7 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 	}
 
 	list_add_tail(&mapping->mmu_node, &mmu->mappings);
+	mmu->need_flush = true;
 	mutex_unlock(&mmu->lock);
 
 	return ret;
@@ -267,6 +264,7 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
 		etnaviv_iommu_remove_mapping(mmu, mapping);
 
 	list_del(&mapping->mmu_node);
+	mmu->need_flush = true;
 	mutex_unlock(&mmu->lock);
 }
 
@@ -322,55 +320,50 @@ void etnaviv_iommu_restore(struct etnaviv_gpu *gpu)
 		etnaviv_iommuv2_restore(gpu);
 }
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf)
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
 	if (mmu->version == ETNAVIV_IOMMU_V1) {
-		return buf->paddr - gpu->memory_base;
+		*iova = paddr - gpu->memory_base;
+		return 0;
 	} else {
 		int ret;
 
-		if (buf->vram_node.allocated)
-			return (u32)buf->vram_node.start;
-
 		mutex_lock(&mmu->lock);
-		ret = etnaviv_iommu_find_iova(mmu, &buf->vram_node,
-					      buf->size + SZ_64K);
+		ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
 		if (ret < 0) {
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		ret = iommu_map(mmu->domain, buf->vram_node.start, buf->paddr,
-				buf->size, IOMMU_READ);
+		ret = iommu_map(mmu->domain, vram_node->start, paddr, size,
+				IOMMU_READ);
 		if (ret < 0) {
-			drm_mm_remove_node(&buf->vram_node);
+			drm_mm_remove_node(vram_node);
 			mutex_unlock(&mmu->lock);
-			return 0;
+			return ret;
 		}
-		/*
-		 * At least on GC3000 the FE MMU doesn't properly flush old TLB
-		 * entries. Make sure to space the command buffers out in a way
-		 * that the FE MMU prefetch won't load invalid entries.
-		 */
-		mmu->last_iova = buf->vram_node.start + buf->size + SZ_64K;
+		mmu->last_iova = vram_node->start + size;
 		gpu->mmu->need_flush = true;
 		mutex_unlock(&mmu->lock);
 
-		return (u32)buf->vram_node.start;
+		*iova = (u32)vram_node->start;
+		return 0;
 	}
 }
 
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf)
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova)
 {
 	struct etnaviv_iommu *mmu = gpu->mmu;
 
-	if (mmu->version == ETNAVIV_IOMMU_V2 && buf->vram_node.allocated) {
+	if (mmu->version == ETNAVIV_IOMMU_V2) {
 		mutex_lock(&mmu->lock);
-		iommu_unmap(mmu->domain, buf->vram_node.start, buf->size);
-		drm_mm_remove_node(&buf->vram_node);
+		iommu_unmap(mmu->domain,iova, size);
+		drm_mm_remove_node(vram_node);
 		mutex_unlock(&mmu->lock);
 	}
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
index e787e49c9693..54be289e5981 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@@ -62,10 +62,12 @@ void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
 	struct etnaviv_vram_mapping *mapping);
 void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
 
-u32 etnaviv_iommu_get_cmdbuf_va(struct etnaviv_gpu *gpu,
-				struct etnaviv_cmdbuf *buf);
-void etnaviv_iommu_put_cmdbuf_va(struct etnaviv_gpu *gpu,
-				 struct etnaviv_cmdbuf *buf);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
+				  struct drm_mm_node *vram_node, size_t size,
+				  u32 *iova);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
+				   struct drm_mm_node *vram_node, size_t size,
+				   u32 iova);
 
 size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
 void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
diff --git a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
index d69af00bdd6a..0fd6f7a18364 100644
--- a/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
+++ b/drivers/gpu/drm/exynos/exynos5433_drm_decon.c
@@ -13,9 +13,11 @@
 #include <linux/platform_device.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
+#include <linux/regmap.h>
 
 #include <video/exynos5433_decon.h>
 
@@ -25,6 +27,9 @@
 #include "exynos_drm_plane.h"
 #include "exynos_drm_iommu.h"
 
+#define DSD_CFG_MUX 0x1004
+#define DSD_CFG_MUX_TE_UNMASK_GLOBAL BIT(13)
+
 #define WINDOWS_NR	3
 #define MIN_FB_WIDTH_FOR_16WORD_BURST	128
 
@@ -57,6 +62,7 @@ struct decon_context {
 	struct exynos_drm_plane		planes[WINDOWS_NR];
 	struct exynos_drm_plane_config	configs[WINDOWS_NR];
 	void __iomem			*addr;
+	struct regmap			*sysreg;
 	struct clk			*clks[ARRAY_SIZE(decon_clks_name)];
 	int				pipe;
 	unsigned long			flags;
@@ -118,18 +124,29 @@ static void decon_disable_vblank(struct exynos_drm_crtc *crtc)
 
 static void decon_setup_trigger(struct decon_context *ctx)
 {
-	u32 val = !(ctx->out_type & I80_HW_TRG)
-		? TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F |
-		  TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN
-		: TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F |
-		  TRIGCON_HWTRIGMASK | TRIGCON_HWTRIGEN;
-	writel(val, ctx->addr + DECON_TRIGCON);
+	if (!(ctx->out_type & (IFTYPE_I80 | I80_HW_TRG)))
+		return;
+
+	if (!(ctx->out_type & I80_HW_TRG)) {
+		writel(TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN
+		       | TRIGCON_TE_AUTO_MASK | TRIGCON_SWTRIGEN,
+		       ctx->addr + DECON_TRIGCON);
+		return;
+	}
+
+	writel(TRIGCON_TRIGEN_PER_F | TRIGCON_TRIGEN_F | TRIGCON_HWTRIGMASK
+	       | TRIGCON_HWTRIGEN, ctx->addr + DECON_TRIGCON);
+
+	if (regmap_update_bits(ctx->sysreg, DSD_CFG_MUX,
+			       DSD_CFG_MUX_TE_UNMASK_GLOBAL, ~0))
+		DRM_ERROR("Cannot update sysreg.\n");
 }
 
 static void decon_commit(struct exynos_drm_crtc *crtc)
 {
 	struct decon_context *ctx = crtc->ctx;
 	struct drm_display_mode *m = &crtc->base.mode;
+	bool interlaced = false;
 	u32 val;
 
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
@@ -140,13 +157,16 @@ static void decon_commit(struct exynos_drm_crtc *crtc)
 		m->crtc_hsync_end = m->crtc_htotal - 92;
 		m->crtc_vsync_start = m->crtc_vdisplay + 1;
 		m->crtc_vsync_end = m->crtc_vsync_start + 1;
+		if (m->flags & DRM_MODE_FLAG_INTERLACE)
+			interlaced = true;
 	}
 
-	if (ctx->out_type & (IFTYPE_I80 | I80_HW_TRG))
-		decon_setup_trigger(ctx);
+	decon_setup_trigger(ctx);
 
 	/* lcd on and use command if */
 	val = VIDOUT_LCD_ON;
+	if (interlaced)
+		val |= VIDOUT_INTERLACE_EN_F;
 	if (ctx->out_type & IFTYPE_I80) {
 		val |= VIDOUT_COMMAND_IF;
 	} else {
@@ -155,15 +175,21 @@ static void decon_commit(struct exynos_drm_crtc *crtc)
 
 	writel(val, ctx->addr + DECON_VIDOUTCON0);
 
-	val = VIDTCON2_LINEVAL(m->vdisplay - 1) |
-		VIDTCON2_HOZVAL(m->hdisplay - 1);
+	if (interlaced)
+		val = VIDTCON2_LINEVAL(m->vdisplay / 2 - 1) |
+			VIDTCON2_HOZVAL(m->hdisplay - 1);
+	else
+		val = VIDTCON2_LINEVAL(m->vdisplay - 1) |
+			VIDTCON2_HOZVAL(m->hdisplay - 1);
 	writel(val, ctx->addr + DECON_VIDTCON2);
 
 	if (!(ctx->out_type & IFTYPE_I80)) {
-		val = VIDTCON00_VBPD_F(
-				m->crtc_vtotal - m->crtc_vsync_end - 1) |
-			VIDTCON00_VFPD_F(
-				m->crtc_vsync_start - m->crtc_vdisplay - 1);
+		int vbp = m->crtc_vtotal - m->crtc_vsync_end;
+		int vfp = m->crtc_vsync_start - m->crtc_vdisplay;
+
+		if (interlaced)
+			vbp = vbp / 2 - 1;
+		val = VIDTCON00_VBPD_F(vbp - 1) | VIDTCON00_VFPD_F(vfp - 1);
 		writel(val, ctx->addr + DECON_VIDTCON00);
 
 		val = VIDTCON01_VSPW_F(
@@ -278,12 +304,22 @@ static void decon_update_plane(struct exynos_drm_crtc *crtc,
 	if (test_bit(BIT_SUSPENDED, &ctx->flags))
 		return;
 
-	val = COORDINATE_X(state->crtc.x) | COORDINATE_Y(state->crtc.y);
-	writel(val, ctx->addr + DECON_VIDOSDxA(win));
+	if (crtc->base.mode.flags & DRM_MODE_FLAG_INTERLACE) {
+		val = COORDINATE_X(state->crtc.x) |
+			COORDINATE_Y(state->crtc.y / 2);
+		writel(val, ctx->addr + DECON_VIDOSDxA(win));
+
+		val = COORDINATE_X(state->crtc.x + state->crtc.w - 1) |
+			COORDINATE_Y((state->crtc.y + state->crtc.h) / 2 - 1);
+		writel(val, ctx->addr + DECON_VIDOSDxB(win));
+	} else {
+		val = COORDINATE_X(state->crtc.x) | COORDINATE_Y(state->crtc.y);
+		writel(val, ctx->addr + DECON_VIDOSDxA(win));
 
-	val = COORDINATE_X(state->crtc.x + state->crtc.w - 1) |
-		COORDINATE_Y(state->crtc.y + state->crtc.h - 1);
-	writel(val, ctx->addr + DECON_VIDOSDxB(win));
+		val = COORDINATE_X(state->crtc.x + state->crtc.w - 1) |
+				COORDINATE_Y(state->crtc.y + state->crtc.h - 1);
+		writel(val, ctx->addr + DECON_VIDOSDxB(win));
+	}
 
 	val = VIDOSD_Wx_ALPHA_R_F(0x0) | VIDOSD_Wx_ALPHA_G_F(0x0) |
 		VIDOSD_Wx_ALPHA_B_F(0x0);
@@ -355,8 +391,6 @@ static void decon_swreset(struct decon_context *ctx)
 		udelay(10);
 	}
 
-	WARN(tries == 0, "failed to disable DECON\n");
-
 	writel(VIDCON0_SWRESET, ctx->addr + DECON_VIDCON0);
 	for (tries = 2000; tries; --tries) {
 		if (~readl(ctx->addr + DECON_VIDCON0) & VIDCON0_SWRESET)
@@ -557,6 +591,13 @@ static irqreturn_t decon_irq_handler(int irq, void *dev_id)
 
 	if (val) {
 		writel(val, ctx->addr + DECON_VIDINTCON1);
+		if (ctx->out_type & IFTYPE_HDMI) {
+			val = readl(ctx->addr + DECON_VIDOUTCON0);
+			val &= VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F;
+			if (val ==
+			    (VIDOUT_INTERLACE_EN_F | VIDOUT_INTERLACE_FIELD_F))
+				return IRQ_HANDLED;
+		}
 		drm_crtc_handle_vblank(&ctx->crtc->base);
 	}
 
@@ -637,6 +678,15 @@ static int exynos5433_decon_probe(struct platform_device *pdev)
 		ctx->out_type |= IFTYPE_I80;
 	}
 
+	if (ctx->out_type | I80_HW_TRG) {
+		ctx->sysreg = syscon_regmap_lookup_by_phandle(dev->of_node,
+							"samsung,disp-sysreg");
+		if (IS_ERR(ctx->sysreg)) {
+			dev_err(dev, "failed to get system register\n");
+			return PTR_ERR(ctx->sysreg);
+		}
+	}
+
 	for (i = 0; i < ARRAY_SIZE(decon_clks_name); i++) {
 		struct clk *clk;
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
index a7884bea42eb..bcdb2720b68e 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fbdev.c
@@ -208,7 +208,6 @@ int exynos_drm_fbdev_init(struct drm_device *dev)
 	struct exynos_drm_fbdev *fbdev;
 	struct exynos_drm_private *private = dev->dev_private;
 	struct drm_fb_helper *helper;
-	unsigned int num_crtc;
 	int ret;
 
 	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
@@ -225,9 +224,7 @@ int exynos_drm_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, helper, &exynos_drm_fb_helper_funcs);
 
-	num_crtc = dev->mode_config.num_crtc;
-
-	ret = drm_fb_helper_init(dev, helper, num_crtc, MAX_CONNECTOR);
+	ret = drm_fb_helper_init(dev, helper, MAX_CONNECTOR);
 	if (ret < 0) {
 		DRM_ERROR("failed to initialize drm fb helper.\n");
 		goto err_init;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index 745cfbdf6b39..a9fa444c6053 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -125,10 +125,8 @@ static struct fimd_driver_data exynos3_fimd_driver_data = {
 	.timing_base = 0x20000,
 	.lcdblk_offset = 0x210,
 	.lcdblk_bypass_shift = 1,
-	.trg_type = I80_HW_TRG,
 	.has_shadowcon = 1,
 	.has_vidoutcon = 1,
-	.has_trigger_per_te = 1,
 };
 
 static struct fimd_driver_data exynos4_fimd_driver_data = {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 603d8425cca6..2b8bf2dd6387 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -1683,7 +1683,7 @@ struct platform_driver g2d_driver = {
 	.probe		= g2d_probe,
 	.remove		= g2d_remove,
 	.driver		= {
-		.name	= "s5p-g2d",
+		.name	= "exynos-drm-g2d",
 		.owner	= THIS_MODULE,
 		.pm	= &g2d_pm_ops,
 		.of_match_table = exynos_g2d_match,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.c b/drivers/gpu/drm/exynos/exynos_drm_gem.c
index 57b81460fec8..4c28f7ffcc4d 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.c
@@ -447,8 +447,9 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
 	return ret;
 }
 
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int exynos_drm_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct exynos_drm_gem *exynos_gem = to_exynos_gem(obj);
 	unsigned long pfn;
diff --git a/drivers/gpu/drm/exynos/exynos_drm_gem.h b/drivers/gpu/drm/exynos/exynos_drm_gem.h
index df7c543d6558..85457255fcd1 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_gem.h
+++ b/drivers/gpu/drm/exynos/exynos_drm_gem.h
@@ -116,7 +116,7 @@ int exynos_drm_gem_dumb_map_offset(struct drm_file *file_priv,
 				   uint64_t *offset);
 
 /* page fault handler and mmap fault address(virtual) to physical memory. */
-int exynos_drm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int exynos_drm_gem_fault(struct vm_fault *vmf);
 
 /* set vm_flags and we can change the vm attribute to other one at here. */
 int exynos_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index 5ed8b1effe71..88ccc0469316 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -35,6 +35,7 @@
 #include <linux/io.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/of_graph.h>
 #include <linux/hdmi.h>
 #include <linux/component.h>
 #include <linux/mfd/syscon.h>
@@ -133,6 +134,7 @@ struct hdmi_context {
 	struct regulator_bulk_data	regul_bulk[ARRAY_SIZE(supply)];
 	struct regulator		*reg_hdmi_en;
 	struct exynos_drm_clk		phy_clk;
+	struct drm_bridge		*bridge;
 };
 
 static inline struct hdmi_context *encoder_to_hdmi(struct drm_encoder *e)
@@ -509,9 +511,9 @@ static const struct hdmiphy_config hdmiphy_5433_configs[] = {
 	{
 		.pixel_clock = 27000000,
 		.conf = {
-			0x01, 0x51, 0x22, 0x51, 0x08, 0xfc, 0x88, 0x46,
-			0x72, 0x50, 0x24, 0x0c, 0x24, 0x0f, 0x7c, 0xa5,
-			0xd4, 0x2b, 0x87, 0x00, 0x00, 0x04, 0x00, 0x30,
+			0x01, 0x51, 0x2d, 0x75, 0x01, 0x00, 0x88, 0x02,
+			0x72, 0x50, 0x44, 0x8c, 0x27, 0x00, 0x7c, 0xac,
+			0xd6, 0x2b, 0x67, 0x00, 0x00, 0x04, 0x00, 0x30,
 			0x08, 0x10, 0x01, 0x01, 0x48, 0x40, 0x00, 0x40,
 		},
 	},
@@ -519,9 +521,9 @@ static const struct hdmiphy_config hdmiphy_5433_configs[] = {
 		.pixel_clock = 27027000,
 		.conf = {
 			0x01, 0x51, 0x2d, 0x72, 0x64, 0x09, 0x88, 0xc3,
-			0x71, 0x50, 0x24, 0x14, 0x24, 0x0f, 0x7c, 0xa5,
-			0xd4, 0x2b, 0x87, 0x00, 0x00, 0x04, 0x00, 0x30,
-			0x28, 0x10, 0x01, 0x01, 0x48, 0x40, 0x00, 0x40,
+			0x71, 0x50, 0x44, 0x8c, 0x27, 0x00, 0x7c, 0xac,
+			0xd6, 0x2b, 0x67, 0x00, 0x00, 0x04, 0x00, 0x30,
+			0x08, 0x10, 0x01, 0x01, 0x48, 0x40, 0x00, 0x40,
 		},
 	},
 	{
@@ -587,6 +589,15 @@ static const struct hdmiphy_config hdmiphy_5433_configs[] = {
 			0x08, 0x10, 0x01, 0x01, 0x48, 0x4a, 0x00, 0x40,
 		},
 	},
+	{
+		.pixel_clock = 297000000,
+		.conf = {
+			0x01, 0x51, 0x3E, 0x05, 0x40, 0xF0, 0x88, 0xC2,
+			0x52, 0x53, 0x44, 0x8C, 0x27, 0x00, 0x7C, 0xAC,
+			0xD6, 0x2B, 0x67, 0x00, 0x00, 0x04, 0x00, 0x30,
+			0x08, 0x10, 0x01, 0x01, 0x48, 0x40, 0x00, 0x40,
+		},
+	},
 };
 
 static const char * const hdmi_clk_gates4[] = {
@@ -788,7 +799,8 @@ static void hdmi_reg_infoframes(struct hdmi_context *hdata)
 				sizeof(buf));
 	if (ret > 0) {
 		hdmi_reg_writeb(hdata, HDMI_VSI_CON, HDMI_VSI_CON_EVERY_VSYNC);
-		hdmi_reg_write_buf(hdata, HDMI_VSI_HEADER0, buf, ret);
+		hdmi_reg_write_buf(hdata, HDMI_VSI_HEADER0, buf, 3);
+		hdmi_reg_write_buf(hdata, HDMI_VSI_DATA(0), buf + 3, ret - 3);
 	}
 
 	ret = hdmi_audio_infoframe_init(&frm.audio);
@@ -912,7 +924,15 @@ static int hdmi_create_connector(struct drm_encoder *encoder)
 	drm_connector_register(connector);
 	drm_mode_connector_attach_encoder(connector, encoder);
 
-	return 0;
+	if (hdata->bridge) {
+		encoder->bridge = hdata->bridge;
+		hdata->bridge->encoder = encoder;
+		ret = drm_bridge_attach(encoder, hdata->bridge, NULL);
+		if (ret)
+			DRM_ERROR("Failed to attach bridge\n");
+	}
+
+	return ret;
 }
 
 static bool hdmi_mode_fixup(struct drm_encoder *encoder,
@@ -1581,6 +1601,31 @@ static void hdmiphy_clk_enable(struct exynos_drm_clk *clk, bool enable)
 		hdmiphy_disable(hdata);
 }
 
+static int hdmi_bridge_init(struct hdmi_context *hdata)
+{
+	struct device *dev = hdata->dev;
+	struct device_node *ep, *np;
+
+	ep = of_graph_get_endpoint_by_regs(dev->of_node, 1, -1);
+	if (!ep)
+		return 0;
+
+	np = of_graph_get_remote_port_parent(ep);
+	of_node_put(ep);
+	if (!np) {
+		DRM_ERROR("failed to get remote port parent");
+		return -EINVAL;
+	}
+
+	hdata->bridge = of_drm_find_bridge(np);
+	of_node_put(np);
+
+	if (!hdata->bridge)
+		return -EPROBE_DEFER;
+
+	return 0;
+}
+
 static int hdmi_resources_init(struct hdmi_context *hdata)
 {
 	struct device *dev = hdata->dev;
@@ -1620,17 +1665,18 @@ static int hdmi_resources_init(struct hdmi_context *hdata)
 
 	hdata->reg_hdmi_en = devm_regulator_get_optional(dev, "hdmi-en");
 
-	if (PTR_ERR(hdata->reg_hdmi_en) == -ENODEV)
-		return 0;
+	if (PTR_ERR(hdata->reg_hdmi_en) != -ENODEV) {
+		if (IS_ERR(hdata->reg_hdmi_en))
+			return PTR_ERR(hdata->reg_hdmi_en);
 
-	if (IS_ERR(hdata->reg_hdmi_en))
-		return PTR_ERR(hdata->reg_hdmi_en);
-
-	ret = regulator_enable(hdata->reg_hdmi_en);
-	if (ret)
-		DRM_ERROR("failed to enable hdmi-en regulator\n");
+		ret = regulator_enable(hdata->reg_hdmi_en);
+		if (ret) {
+			DRM_ERROR("failed to enable hdmi-en regulator\n");
+			return ret;
+		}
+	}
 
-	return ret;
+	return hdmi_bridge_init(hdata);
 }
 
 static struct of_device_id hdmi_match_types[] = {
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
index 48705248f894..04173235f448 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_drv.c
@@ -94,7 +94,7 @@ static int fsl_dcu_load(struct drm_device *dev, unsigned long flags)
 			"Invalid legacyfb_depth.  Defaulting to 24bpp\n");
 		legacyfb_depth = 24;
 	}
-	fsl_dev->fbdev = drm_fbdev_cma_init(dev, legacyfb_depth, 1, 1);
+	fsl_dev->fbdev = drm_fbdev_cma_init(dev, legacyfb_depth, 1);
 	if (IS_ERR(fsl_dev->fbdev)) {
 		ret = PTR_ERR(fsl_dev->fbdev);
 		fsl_dev->fbdev = NULL;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
index 3194e544ee27..b3d70a63c5a3 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_tcon.c
@@ -72,10 +72,8 @@ struct fsl_tcon *fsl_tcon_init(struct device *dev)
 		return NULL;
 
 	tcon = devm_kzalloc(dev, sizeof(*tcon), GFP_KERNEL);
-	if (!tcon) {
-		ret = -ENOMEM;
+	if (!tcon)
 		goto err_node_put;
-	}
 
 	ret = fsl_tcon_init_regmap(dev, tcon, np);
 	if (ret) {
@@ -89,9 +87,13 @@ struct fsl_tcon *fsl_tcon_init(struct device *dev)
 		goto err_node_put;
 	}
 
-	of_node_put(np);
-	clk_prepare_enable(tcon->ipg_clk);
+	ret = clk_prepare_enable(tcon->ipg_clk);
+	if (ret) {
+		dev_err(dev, "Couldn't enable the TCON clock\n");
+		goto err_node_put;
+	}
 
+	of_node_put(np);
 	dev_info(dev, "Using TCON in bypass mode\n");
 
 	return tcon;
diff --git a/drivers/gpu/drm/gma500/framebuffer.c b/drivers/gpu/drm/gma500/framebuffer.c
index fd1488bf5189..ffe6b4ffa1a8 100644
--- a/drivers/gpu/drm/gma500/framebuffer.c
+++ b/drivers/gpu/drm/gma500/framebuffer.c
@@ -111,8 +111,9 @@ static int psbfb_pan(struct fb_var_screeninfo *var, struct fb_info *info)
         return 0;
 }
 
-static int psbfb_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int psbfb_vm_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct psb_framebuffer *psbfb = vma->vm_private_data;
 	struct drm_device *dev = psbfb->base.dev;
 	struct drm_psb_private *dev_priv = dev->dev_private;
@@ -564,7 +565,7 @@ int psb_fbdev_init(struct drm_device *dev)
 	drm_fb_helper_prepare(dev, &fbdev->psb_fb_helper, &psb_fb_helper_funcs);
 
 	ret = drm_fb_helper_init(dev, &fbdev->psb_fb_helper,
-				 dev_priv->ops->crtcs, INTELFB_CONN_LIMIT);
+				 INTELFB_CONN_LIMIT);
 	if (ret)
 		goto free;
 
diff --git a/drivers/gpu/drm/gma500/gem.c b/drivers/gpu/drm/gma500/gem.c
index 527c62917660..7da061aab729 100644
--- a/drivers/gpu/drm/gma500/gem.c
+++ b/drivers/gpu/drm/gma500/gem.c
@@ -164,8 +164,9 @@ int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
  *	vma->vm_private_data points to the GEM object that is backing this
  *	mapping.
  */
-int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int psb_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj;
 	struct gtt_range *r;
 	int ret;
diff --git a/drivers/gpu/drm/gma500/psb_drv.h b/drivers/gpu/drm/gma500/psb_drv.h
index 05d7aaf47eea..83e22fd4cfc0 100644
--- a/drivers/gpu/drm/gma500/psb_drv.h
+++ b/drivers/gpu/drm/gma500/psb_drv.h
@@ -752,7 +752,7 @@ extern int psb_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 			struct drm_mode_create_dumb *args);
 extern int psb_gem_dumb_map_gtt(struct drm_file *file, struct drm_device *dev,
 			uint32_t handle, uint64_t *offset);
-extern int psb_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+extern int psb_gem_fault(struct vm_fault *vmf);
 
 /* psb_device.c */
 extern const struct psb_ops psb_chip_ops;
diff --git a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
index 16fe79053ee1..d7a4d9095b33 100644
--- a/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
+++ b/drivers/gpu/drm/hisilicon/hibmc/hibmc_drm_fbdev.c
@@ -200,8 +200,7 @@ int hibmc_fbdev_init(struct hibmc_drm_private *priv)
 			      &hibmc_fbdev_helper_funcs);
 
 	/* Now just one crtc and one channel */
-	ret = drm_fb_helper_init(priv->dev,
-				 &hifbdev->helper, 1, 1);
+	ret = drm_fb_helper_init(priv->dev, &hifbdev->helper, 1);
 	if (ret) {
 		DRM_ERROR("failed to initialize fb helper: %d\n", ret);
 		return ret;
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
index 7df0e8535e41..7ec93aec7e88 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
@@ -59,8 +59,7 @@ static void kirin_fbdev_output_poll_changed(struct drm_device *dev)
 		drm_fbdev_cma_hotplug_event(priv->fbdev);
 	} else {
 		priv->fbdev = drm_fbdev_cma_init(dev, 32,
-				dev->mode_config.num_crtc,
-				dev->mode_config.num_connector);
+						 dev->mode_config.num_connector);
 		if (IS_ERR(priv->fbdev))
 			priv->fbdev = NULL;
 	}
diff --git a/drivers/gpu/drm/i915/Makefile b/drivers/gpu/drm/i915/Makefile
index 74ca2e8b2494..c62ab45683c0 100644
--- a/drivers/gpu/drm/i915/Makefile
+++ b/drivers/gpu/drm/i915/Makefile
@@ -129,6 +129,9 @@ i915-y += intel_gvt.o
 include $(src)/gvt/Makefile
 endif
 
+# LPE Audio for VLV and CHT
+i915-y += intel_lpe_audio.o
+
 obj-$(CONFIG_DRM_I915) += i915.o
 
 CFLAGS_i915_trace_points.o := -I$(src)
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 7bb11a555b76..b9c8e2407682 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -481,7 +481,6 @@ struct parser_exec_state {
 	(s->vgpu->gvt->device_info.gmadr_bytes_in_cmd >> 2)
 
 static unsigned long bypass_scan_mask = 0;
-static bool bypass_batch_buffer_scan = true;
 
 /* ring ALL, type = 0 */
 static struct sub_op_bits sub_op_mi[] = {
@@ -1543,9 +1542,6 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 {
 	struct intel_gvt *gvt = s->vgpu->gvt;
 
-	if (bypass_batch_buffer_scan)
-		return 0;
-
 	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
 		if (cmd_val(s, 0) & (1 << 8))
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index 136c6e77561a..46eb9fd3c03f 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -364,58 +364,30 @@ static void free_workload(struct intel_vgpu_workload *workload)
 #define get_desc_from_elsp_dwords(ed, i) \
 	((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
 
-
-#define BATCH_BUFFER_ADDR_MASK ((1UL << 32) - (1U << 2))
-#define BATCH_BUFFER_ADDR_HIGH_MASK ((1UL << 16) - (1U))
-static int set_gma_to_bb_cmd(struct intel_shadow_bb_entry *entry_obj,
-			     unsigned long add, int gmadr_bytes)
-{
-	if (WARN_ON(gmadr_bytes != 4 && gmadr_bytes != 8))
-		return -1;
-
-	*((u32 *)(entry_obj->bb_start_cmd_va + (1 << 2))) = add &
-		BATCH_BUFFER_ADDR_MASK;
-	if (gmadr_bytes == 8) {
-		*((u32 *)(entry_obj->bb_start_cmd_va + (2 << 2))) =
-			add & BATCH_BUFFER_ADDR_HIGH_MASK;
-	}
-
-	return 0;
-}
-
 static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 {
-	int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	const int gmadr_bytes = workload->vgpu->gvt->device_info.gmadr_bytes_in_cmd;
+	struct intel_shadow_bb_entry *entry_obj;
 
 	/* pin the gem object to ggtt */
-	if (!list_empty(&workload->shadow_bb)) {
-		struct intel_shadow_bb_entry *entry_obj =
-			list_first_entry(&workload->shadow_bb,
-					 struct intel_shadow_bb_entry,
-					 list);
-		struct intel_shadow_bb_entry *temp;
+	list_for_each_entry(entry_obj, &workload->shadow_bb, list) {
+		struct i915_vma *vma;
 
-		list_for_each_entry_safe(entry_obj, temp, &workload->shadow_bb,
-				list) {
-			struct i915_vma *vma;
-
-			vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0,
-						       4, 0);
-			if (IS_ERR(vma)) {
-				gvt_err("Cannot pin\n");
-				return;
-			}
-
-			/* FIXME: we are not tracking our pinned VMA leaving it
-			 * up to the core to fix up the stray pin_count upon
-			 * free.
-			 */
-
-			/* update the relocate gma with shadow batch buffer*/
-			set_gma_to_bb_cmd(entry_obj,
-					  i915_ggtt_offset(vma),
-					  gmadr_bytes);
+		vma = i915_gem_object_ggtt_pin(entry_obj->obj, NULL, 0, 4, 0);
+		if (IS_ERR(vma)) {
+			gvt_err("Cannot pin\n");
+			return;
 		}
+
+		/* FIXME: we are not tracking our pinned VMA leaving it
+		 * up to the core to fix up the stray pin_count upon
+		 * free.
+		 */
+
+		/* update the relocate gma with shadow batch buffer*/
+		entry_obj->bb_start_cmd_va[1] = i915_ggtt_offset(vma);
+		if (gmadr_bytes == 8)
+			entry_obj->bb_start_cmd_va[2] = 0;
 	}
 }
 
@@ -826,7 +798,7 @@ int intel_vgpu_init_execlist(struct intel_vgpu *vgpu)
 		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
 	}
 
-	vgpu->workloads = kmem_cache_create("gvt-g vgpu workload",
+	vgpu->workloads = kmem_cache_create("gvt-g_vgpu_workload",
 			sizeof(struct intel_vgpu_workload), 0,
 			SLAB_HWCACHE_ALIGN,
 			NULL);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 182914c22ac5..f07cb8ba751f 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -262,8 +262,8 @@ static struct intel_vgpu_type *intel_gvt_find_vgpu_type(struct intel_gvt *gvt,
 	return NULL;
 }
 
-static ssize_t available_instance_show(struct kobject *kobj, struct device *dev,
-		char *buf)
+static ssize_t available_instances_show(struct kobject *kobj,
+					struct device *dev, char *buf)
 {
 	struct intel_vgpu_type *type;
 	unsigned int num = 0;
@@ -301,12 +301,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 		       type->fence, vgpu_edid_str(type->resolution));
 }
 
-static MDEV_TYPE_ATTR_RO(available_instance);
+static MDEV_TYPE_ATTR_RO(available_instances);
 static MDEV_TYPE_ATTR_RO(device_api);
 static MDEV_TYPE_ATTR_RO(description);
 
 static struct attribute *type_attrs[] = {
-	&mdev_type_attr_available_instance.attr,
+	&mdev_type_attr_available_instances.attr,
 	&mdev_type_attr_device_api.attr,
 	&mdev_type_attr_description.attr,
 	NULL,
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 3b30c28bff51..2833dfa8c9ae 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -113,7 +113,7 @@ struct intel_shadow_bb_entry {
 	struct drm_i915_gem_object *obj;
 	void *va;
 	unsigned long len;
-	void *bb_start_cmd_va;
+	u32 *bb_start_cmd_va;
 };
 
 #define workload_q_head(vgpu, ring_id) \
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 1dfc0b204a72..e703556eba99 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -213,7 +213,8 @@ static void intel_detect_pch(struct drm_i915_private *dev_priv)
 			} else if (id == INTEL_PCH_KBP_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_KBP;
 				DRM_DEBUG_KMS("Found KabyPoint PCH\n");
-				WARN_ON(!IS_KABYLAKE(dev_priv));
+				WARN_ON(!IS_SKYLAKE(dev_priv) &&
+					!IS_KABYLAKE(dev_priv));
 			} else if ((id == INTEL_PCH_P2X_DEVICE_ID_TYPE) ||
 				   (id == INTEL_PCH_P3X_DEVICE_ID_TYPE) ||
 				   ((id == INTEL_PCH_QEMU_DEVICE_ID_TYPE) &&
@@ -1138,7 +1139,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
 	if (IS_GEN5(dev_priv))
 		intel_gpu_ips_init(dev_priv);
 
-	i915_audio_component_init(dev_priv);
+	intel_audio_init(dev_priv);
 
 	/*
 	 * Some ports require correctly set-up hpd registers for detection to
@@ -1156,7 +1157,7 @@ static void i915_driver_register(struct drm_i915_private *dev_priv)
  */
 static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 {
-	i915_audio_component_cleanup(dev_priv);
+	intel_audio_deinit(dev_priv);
 
 	intel_gpu_ips_teardown();
 	acpi_video_unregister();
@@ -2377,7 +2378,7 @@ static int intel_runtime_suspend(struct device *kdev)
 
 	assert_forcewakes_inactive(dev_priv);
 
-	if (!IS_VALLEYVIEW(dev_priv) || !IS_CHERRYVIEW(dev_priv))
+	if (!IS_VALLEYVIEW(dev_priv) && !IS_CHERRYVIEW(dev_priv))
 		intel_hpd_poll_init(dev_priv);
 
 	DRM_DEBUG_KMS("Device suspended\n");
@@ -2426,6 +2427,7 @@ static int intel_runtime_resume(struct device *kdev)
 	 * we can do is to hope that things will still work (and disable RPM).
 	 */
 	i915_gem_init_swizzling(dev_priv);
+	i915_gem_restore_fences(dev_priv);
 
 	intel_runtime_pm_enable_interrupts(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 244628065f94..0a4b42d31391 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2242,6 +2242,11 @@ struct drm_i915_private {
 
 	struct i915_frontbuffer_tracking fb_tracking;
 
+	struct intel_atomic_helper {
+		struct llist_head free_list;
+		struct work_struct free_work;
+	} atomic_helper;
+
 	u16 orig_clock;
 
 	bool mchbar_need_disable;
@@ -2454,6 +2459,12 @@ struct drm_i915_private {
 	/* Used to save the pipe-to-encoder mapping for audio */
 	struct intel_encoder *av_enc_map[I915_MAX_PIPES];
 
+	/* necessary resource sharing with HDMI LPE audio driver. */
+	struct {
+		struct platform_device *platdev;
+		int	irq;
+	} lpe_audio;
+
 	/*
 	 * NOTE: This is the dri1/ums dungeon, don't add stuff here. Your patch
 	 * will be rejected. Instead look for a better place.
@@ -3341,7 +3352,7 @@ int __must_check i915_gem_wait_for_idle(struct drm_i915_private *dev_priv,
 					unsigned int flags);
 int __must_check i915_gem_suspend(struct drm_i915_private *dev_priv);
 void i915_gem_resume(struct drm_i915_private *dev_priv);
-int i915_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int i915_gem_fault(struct vm_fault *vmf);
 int i915_gem_object_wait(struct drm_i915_gem_object *obj,
 			 unsigned int flags,
 			 long timeout,
@@ -3592,6 +3603,14 @@ extern int i915_restore_state(struct drm_i915_private *dev_priv);
 void i915_setup_sysfs(struct drm_i915_private *dev_priv);
 void i915_teardown_sysfs(struct drm_i915_private *dev_priv);
 
+/* intel_lpe_audio.c */
+int  intel_lpe_audio_init(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv);
+void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
+			    void *eld, int port, int pipe, int tmds_clk_speed,
+			    bool dp_output, int link_rate);
+
 /* intel_i2c.c */
 extern int intel_setup_gmbus(struct drm_i915_private *dev_priv);
 extern void intel_teardown_gmbus(struct drm_i915_private *dev_priv);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index a07b62732923..6908123162d1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -69,12 +69,10 @@ insert_mappable_node(struct i915_ggtt *ggtt,
                      struct drm_mm_node *node, u32 size)
 {
 	memset(node, 0, sizeof(*node));
-	return drm_mm_insert_node_in_range_generic(&ggtt->base.mm, node,
-						   size, 0,
-						   I915_COLOR_UNEVICTABLE,
-						   0, ggtt->mappable_end,
-						   DRM_MM_SEARCH_DEFAULT,
-						   DRM_MM_CREATE_DEFAULT);
+	return drm_mm_insert_node_in_range(&ggtt->base.mm, node,
+					   size, 0, I915_COLOR_UNEVICTABLE,
+					   0, ggtt->mappable_end,
+					   DRM_MM_INSERT_LOW);
 }
 
 static void
@@ -442,7 +440,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 			timeout = i915_gem_object_wait_fence(shared[i],
 							     flags, timeout,
 							     rps);
-			if (timeout <= 0)
+			if (timeout < 0)
 				break;
 
 			dma_fence_put(shared[i]);
@@ -455,7 +453,7 @@ i915_gem_object_wait_reservation(struct reservation_object *resv,
 		excl = reservation_object_get_excl_rcu(resv);
 	}
 
-	if (excl && timeout > 0)
+	if (excl && timeout >= 0)
 		timeout = i915_gem_object_wait_fence(excl, flags, timeout, rps);
 
 	dma_fence_put(excl);
@@ -1774,7 +1772,6 @@ compute_partial_view(struct drm_i915_gem_object *obj,
 
 /**
  * i915_gem_fault - fault a page into the GTT
- * @area: CPU VMA in question
  * @vmf: fault info
  *
  * The fault handler is set up by drm_gem_mmap() when a object is GTT mapped
@@ -1791,9 +1788,10 @@ compute_partial_view(struct drm_i915_gem_object *obj,
  * The current feature set supported by i915_gem_fault() and thus GTT mmaps
  * is exposed via I915_PARAM_MMAP_GTT_VERSION (see i915_gem_mmap_gtt_version).
  */
-int i915_gem_fault(struct vm_area_struct *area, struct vm_fault *vmf)
+int i915_gem_fault(struct vm_fault *vmf)
 {
 #define MIN_CHUNK_PAGES ((1 << 20) >> PAGE_SHIFT) /* 1 MiB */
+	struct vm_area_struct *area = vmf->vma;
 	struct drm_i915_gem_object *obj = to_intel_bo(area->vm_private_data);
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -2011,8 +2009,16 @@ void i915_gem_runtime_suspend(struct drm_i915_private *dev_priv)
 	for (i = 0; i < dev_priv->num_fence_regs; i++) {
 		struct drm_i915_fence_reg *reg = &dev_priv->fence_regs[i];
 
-		if (WARN_ON(reg->pin_count))
-			continue;
+		/* Ideally we want to assert that the fence register is not
+		 * live at this point (i.e. that no piece of code will be
+		 * trying to write through fence + GTT, as that both violates
+		 * our tracking of activity and associated locking/barriers,
+		 * but also is illegal given that the hw is powered down).
+		 *
+		 * Previously we used reg->pin_count as a "liveness" indicator.
+		 * That is not sufficient, and we need a more fine-grained
+		 * tool if we want to have a sanity check here.
+		 */
 
 		if (!reg->vma)
 			continue;
@@ -2737,21 +2743,17 @@ static void i915_gem_reset_engine(struct intel_engine_cs *engine)
 		engine->irq_seqno_barrier(engine);
 
 	request = i915_gem_find_active_request(engine);
-	if (!request)
-		return;
+	if (request && i915_gem_reset_request(request)) {
+		DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
+				 engine->name, request->global_seqno);
 
-	if (!i915_gem_reset_request(request))
-		return;
-
-	DRM_DEBUG_DRIVER("resetting %s to restart from tail of request 0x%x\n",
-			 engine->name, request->global_seqno);
+		/* If this context is now banned, skip all pending requests. */
+		if (i915_gem_context_is_banned(request->ctx))
+			engine_skip_context(request);
+	}
 
 	/* Setup the CS to resume from the breadcrumb of the hung request */
 	engine->reset_hw(engine, request);
-
-	/* If this context is now banned, skip all of its pending requests. */
-	if (i915_gem_context_is_banned(request->ctx))
-		engine_skip_context(request);
 }
 
 void i915_gem_reset_finish(struct drm_i915_private *dev_priv)
@@ -3519,7 +3521,7 @@ i915_gem_object_pin_to_display_plane(struct drm_i915_gem_object *obj,
 	vma->display_alignment = max_t(u64, vma->display_alignment, alignment);
 
 	/* Treat this as an end-of-frame, like intel_user_framebuffer_dirty() */
-	if (obj->cache_dirty) {
+	if (obj->cache_dirty || obj->base.write_domain == I915_GEM_DOMAIN_CPU) {
 		i915_gem_clflush_object(obj, true);
 		intel_fb_obj_flush(obj, false, ORIGIN_DIRTYFB);
 	}
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index d037adcda6f2..29bb8011dbc4 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -141,7 +141,7 @@ static int i915_gem_dmabuf_mmap(struct dma_buf *dma_buf, struct vm_area_struct *
 	if (!obj->base.filp)
 		return -ENODEV;
 
-	ret = obj->base.filp->f_op->mmap(obj->base.filp, vma);
+	ret = call_mmap(obj->base.filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index a43e44e18042..c181b1bb3d2c 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -109,6 +109,7 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	}, **phase;
 	struct i915_vma *vma, *next;
 	struct drm_mm_node *node;
+	enum drm_mm_insert_mode mode;
 	int ret;
 
 	lockdep_assert_held(&vm->i915->drm.struct_mutex);
@@ -127,10 +128,14 @@ i915_gem_evict_something(struct i915_address_space *vm,
 	 * On each list, the oldest objects lie at the HEAD with the freshest
 	 * object on the TAIL.
 	 */
+	mode = DRM_MM_INSERT_BEST;
+	if (flags & PIN_HIGH)
+		mode = DRM_MM_INSERT_HIGH;
+	if (flags & PIN_MAPPABLE)
+		mode = DRM_MM_INSERT_LOW;
 	drm_mm_scan_init_with_range(&scan, &vm->mm,
 				    min_size, alignment, cache_level,
-				    start, end,
-				    flags & PIN_HIGH ? DRM_MM_CREATE_TOP : 0);
+				    start, end, mode);
 
 	/* Retire before we search the active list. Although we have
 	 * reasonable accuracy in our retirement lists, we may have
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index c66e90571031..d02cfaefe1c8 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -436,12 +436,11 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 					       PIN_MAPPABLE | PIN_NONBLOCK);
 		if (IS_ERR(vma)) {
 			memset(&cache->node, 0, sizeof(cache->node));
-			ret = drm_mm_insert_node_in_range_generic
+			ret = drm_mm_insert_node_in_range
 				(&ggtt->base.mm, &cache->node,
 				 PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
 				 0, ggtt->mappable_end,
-				 DRM_MM_SEARCH_DEFAULT,
-				 DRM_MM_CREATE_DEFAULT);
+				 DRM_MM_INSERT_LOW);
 			if (ret) /* no inactive aperture space, use cpu reloc */
 				return NULL;
 		} else {
@@ -1181,14 +1180,14 @@ validate_exec_list(struct drm_device *dev,
 			if (exec[i].offset !=
 			    gen8_canonical_addr(exec[i].offset & PAGE_MASK))
 				return -EINVAL;
-
-			/* From drm_mm perspective address space is continuous,
-			 * so from this point we're always using non-canonical
-			 * form internally.
-			 */
-			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
 		}
 
+		/* From drm_mm perspective address space is continuous,
+		 * so from this point we're always using non-canonical
+		 * form internally.
+		 */
+		exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
+
 		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
 			return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index e808aad203d8..2801a4d56324 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -755,9 +755,10 @@ static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
 	GEM_BUG_ON(pte_end > GEN8_PTES);
 
 	bitmap_clear(pt->used_ptes, pte, num_entries);
-
-	if (bitmap_empty(pt->used_ptes, GEN8_PTES))
-		return true;
+	if (USES_FULL_PPGTT(vm->i915)) {
+		if (bitmap_empty(pt->used_ptes, GEN8_PTES))
+			return true;
+	}
 
 	pt_vaddr = kmap_px(pt);
 
@@ -2748,12 +2749,10 @@ int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
 		return ret;
 
 	/* Reserve a mappable slot for our lockless error capture */
-	ret = drm_mm_insert_node_in_range_generic(&ggtt->base.mm,
-						  &ggtt->error_capture,
-						  PAGE_SIZE, 0,
-						  I915_COLOR_UNEVICTABLE,
-						  0, ggtt->mappable_end,
-						  0, 0);
+	ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
+					  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
+					  0, ggtt->mappable_end,
+					  DRM_MM_INSERT_LOW);
 	if (ret)
 		return ret;
 
@@ -3663,7 +3662,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 			u64 size, u64 alignment, unsigned long color,
 			u64 start, u64 end, unsigned int flags)
 {
-	u32 search_flag, alloc_flag;
+	enum drm_mm_insert_mode mode;
 	u64 offset;
 	int err;
 
@@ -3684,13 +3683,11 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
 		return -ENOSPC;
 
-	if (flags & PIN_HIGH) {
-		search_flag = DRM_MM_SEARCH_BELOW;
-		alloc_flag = DRM_MM_CREATE_TOP;
-	} else {
-		search_flag = DRM_MM_SEARCH_DEFAULT;
-		alloc_flag = DRM_MM_CREATE_DEFAULT;
-	}
+	mode = DRM_MM_INSERT_BEST;
+	if (flags & PIN_HIGH)
+		mode = DRM_MM_INSERT_HIGH;
+	if (flags & PIN_MAPPABLE)
+		mode = DRM_MM_INSERT_LOW;
 
 	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
 	 * so we know that we always have a minimum alignment of 4096.
@@ -3702,10 +3699,9 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (alignment <= I915_GTT_MIN_ALIGNMENT)
 		alignment = 0;
 
-	err = drm_mm_insert_node_in_range_generic(&vm->mm, node,
-						  size, alignment, color,
-						  start, end,
-						  search_flag, alloc_flag);
+	err = drm_mm_insert_node_in_range(&vm->mm, node,
+					  size, alignment, color,
+					  start, end, mode);
 	if (err != -ENOSPC)
 		return err;
 
@@ -3743,9 +3739,7 @@ int i915_gem_gtt_insert(struct i915_address_space *vm,
 	if (err)
 		return err;
 
-	search_flag = DRM_MM_SEARCH_DEFAULT;
-	return drm_mm_insert_node_in_range_generic(&vm->mm, node,
-						   size, alignment, color,
-						   start, end,
-						   search_flag, alloc_flag);
+	return drm_mm_insert_node_in_range(&vm->mm, node,
+					   size, alignment, color,
+					   start, end, DRM_MM_INSERT_EVICT);
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index 17ce53d0d092..933019e1b206 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -46,16 +46,39 @@ static struct sg_table *
 i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 {
 	struct drm_i915_private *i915 = to_i915(obj->base.dev);
-	unsigned int npages = obj->base.size / PAGE_SIZE;
 	struct sg_table *st;
 	struct scatterlist *sg;
+	unsigned int npages;
 	int max_order;
 	gfp_t gfp;
 
+	max_order = MAX_ORDER;
+#ifdef CONFIG_SWIOTLB
+	if (swiotlb_nr_tbl()) {
+		unsigned int max_segment;
+
+		max_segment = swiotlb_max_segment();
+		if (max_segment) {
+			max_segment = max_t(unsigned int, max_segment,
+					    PAGE_SIZE) >> PAGE_SHIFT;
+			max_order = min(max_order, ilog2(max_segment));
+		}
+	}
+#endif
+
+	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
+	if (IS_I965GM(i915) || IS_I965G(i915)) {
+		/* 965gm cannot relocate objects above 4GiB. */
+		gfp &= ~__GFP_HIGHMEM;
+		gfp |= __GFP_DMA32;
+	}
+
+create_st:
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
 	if (!st)
 		return ERR_PTR(-ENOMEM);
 
+	npages = obj->base.size / PAGE_SIZE;
 	if (sg_alloc_table(st, npages, GFP_KERNEL)) {
 		kfree(st);
 		return ERR_PTR(-ENOMEM);
@@ -64,19 +87,6 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 	sg = st->sgl;
 	st->nents = 0;
 
-	max_order = MAX_ORDER;
-#ifdef CONFIG_SWIOTLB
-	if (swiotlb_nr_tbl()) /* minimum max swiotlb size is IO_TLB_SEGSIZE */
-		max_order = min(max_order, ilog2(IO_TLB_SEGPAGES));
-#endif
-
-	gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE;
-	if (IS_I965GM(i915) || IS_I965G(i915)) {
-		/* 965gm cannot relocate objects above 4GiB. */
-		gfp &= ~__GFP_HIGHMEM;
-		gfp |= __GFP_DMA32;
-	}
-
 	do {
 		int order = min(fls(npages) - 1, max_order);
 		struct page *page;
@@ -104,8 +114,15 @@ i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj)
 		sg = __sg_next(sg);
 	} while (1);
 
-	if (i915_gem_gtt_prepare_pages(obj, st))
+	if (i915_gem_gtt_prepare_pages(obj, st)) {
+		/* Failed to dma-map try again with single page sg segments */
+		if (get_order(st->sgl->length)) {
+			internal_free_pages(st);
+			max_order = 0;
+			goto create_st;
+		}
 		goto err;
+	}
 
 	/* Mark the pages as dontneed whilst they are still pinned. As soon
 	 * as they are unpinned they are allowed to be reaped by the shrinker,
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 290eaa7fc9eb..bf90b07163d1 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -256,7 +256,7 @@ extern void drm_gem_object_unreference_unlocked(struct drm_gem_object *);
 static inline bool
 i915_gem_object_is_dead(const struct drm_i915_gem_object *obj)
 {
-	return atomic_read(&obj->base.refcount.refcount) == 0;
+	return kref_read(&obj->base.refcount) == 0;
 }
 
 static inline bool
diff --git a/drivers/gpu/drm/i915/i915_gem_render_state.c b/drivers/gpu/drm/i915/i915_gem_render_state.c
index b42c81b42487..7032c542a9b1 100644
--- a/drivers/gpu/drm/i915/i915_gem_render_state.c
+++ b/drivers/gpu/drm/i915/i915_gem_render_state.c
@@ -60,7 +60,7 @@ render_state_get_rodata(const struct intel_engine_cs *engine)
  * this is sufficient as the null state generator makes the final batch
  * with two passes to build command and state separately. At this point
  * the size of both are known and it compacts them by relocating the state
- * right after the commands taking care of aligment so we should sufficient
+ * right after the commands taking care of alignment so we should sufficient
  * space below them for adding new commands.
  */
 #define OUT_BATCH(batch, i, val)				\
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 72b7f7d9461d..e7c3c0318ff6 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -24,6 +24,9 @@
 
 #include <linux/prefetch.h>
 #include <linux/dma-fence-array.h>
+#include <linux/sched.h>
+#include <linux/sched/clock.h>
+#include <linux/sched/signal.h>
 
 #include "i915_drv.h"
 
@@ -1025,8 +1028,13 @@ __i915_request_wait_for_execute(struct drm_i915_gem_request *request,
 			break;
 		}
 
+		if (!timeout) {
+			timeout = -ETIME;
+			break;
+		}
+
 		timeout = io_schedule_timeout(timeout);
-	} while (timeout);
+	} while (1);
 	finish_wait(&request->execute.wait, &wait);
 
 	if (flags & I915_WAIT_LOCKED)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 127d698e7c84..9673bcc3b6ad 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -55,9 +55,9 @@ int i915_gem_stolen_insert_node_in_range(struct drm_i915_private *dev_priv,
 		return -ENODEV;
 
 	mutex_lock(&dev_priv->mm.stolen_lock);
-	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node, size,
-					  alignment, start, end,
-					  DRM_MM_SEARCH_DEFAULT);
+	ret = drm_mm_insert_node_in_range(&dev_priv->mm.stolen, node,
+					  size, alignment, 0,
+					  start, end, DRM_MM_INSERT_BEST);
 	mutex_unlock(&dev_priv->mm.stolen_lock);
 
 	return ret;
@@ -405,6 +405,11 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 
 	mutex_init(&dev_priv->mm.stolen_lock);
 
+	if (intel_vgpu_active(dev_priv)) {
+		DRM_INFO("iGVT-g active, disabling use of stolen memory\n");
+		return 0;
+	}
+
 #ifdef CONFIG_INTEL_IOMMU
 	if (intel_iommu_gfx_mapped && INTEL_GEN(dev_priv) < 8) {
 		DRM_INFO("DMAR active, disabling use of stolen memory\n");
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index b1361cfd4c5c..974ac08df473 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -173,7 +173,7 @@ i915_tiling_ok(struct drm_i915_gem_object *obj,
 	else
 		tile_width = 512;
 
-	if (!IS_ALIGNED(stride, tile_width))
+	if (!stride || !IS_ALIGNED(stride, tile_width))
 		return false;
 
 	/* 965+ just needs multiples of tile width */
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 6a8fa085b74e..22b46398831e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -31,6 +31,7 @@
 #include <linux/mmu_notifier.h>
 #include <linux/mempolicy.h>
 #include <linux/swap.h>
+#include <linux/sched/mm.h>
 
 struct i915_mm_struct {
 	struct mm_struct *mm;
@@ -334,7 +335,7 @@ i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj)
 		mm->i915 = to_i915(obj->base.dev);
 
 		mm->mm = current->mm;
-		atomic_inc(&current->mm->mm_count);
+		mmgrab(current->mm);
 
 		mm->mn = NULL;
 
@@ -507,7 +508,7 @@ __i915_gem_userptr_get_pages_worker(struct work_struct *_work)
 			flags |= FOLL_WRITE;
 
 		ret = -EFAULT;
-		if (atomic_inc_not_zero(&mm->mm_users)) {
+		if (mmget_not_zero(mm)) {
 			down_read(&mm->mmap_sem);
 			while (pinned < npages) {
 				ret = get_user_pages_remote
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 6fefc34ef602..e6ffef2f707a 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1926,6 +1926,10 @@ static irqreturn_t valleyview_irq_handler(int irq, void *arg)
 		 * signalled in iir */
 		valleyview_pipestat_irq_ack(dev_priv, iir, pipe_stats);
 
+		if (iir & (I915_LPE_PIPE_A_INTERRUPT |
+			   I915_LPE_PIPE_B_INTERRUPT))
+			intel_lpe_audio_irq_handler(dev_priv);
+
 		/*
 		 * VLV_IIR is single buffered, and reflects the level
 		 * from PIPESTAT/PORT_HOTPLUG_STAT, hence clear it last.
@@ -2006,6 +2010,11 @@ static irqreturn_t cherryview_irq_handler(int irq, void *arg)
 		 * signalled in iir */
 		valleyview_pipestat_irq_ack(dev_priv, iir, pipe_stats);
 
+		if (iir & (I915_LPE_PIPE_A_INTERRUPT |
+			   I915_LPE_PIPE_B_INTERRUPT |
+			   I915_LPE_PIPE_C_INTERRUPT))
+			intel_lpe_audio_irq_handler(dev_priv);
+
 		/*
 		 * VLV_IIR is single buffered, and reflects the level
 		 * from PIPESTAT/PORT_HOTPLUG_STAT, hence clear it last.
@@ -2948,6 +2957,7 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 	u32 pipestat_mask;
 	u32 enable_mask;
 	enum pipe pipe;
+	u32 val;
 
 	pipestat_mask = PLANE_FLIP_DONE_INT_STATUS_VLV |
 			PIPE_CRC_DONE_INTERRUPT_STATUS;
@@ -2964,6 +2974,12 @@ static void vlv_display_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	WARN_ON(dev_priv->irq_mask != ~0);
 
+	val = (I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT |
+		I915_LPE_PIPE_C_INTERRUPT);
+
+	enable_mask |= val;
+
 	dev_priv->irq_mask = ~enable_mask;
 
 	GEN5_IRQ_INIT(VLV_, dev_priv->irq_mask, enable_mask);
@@ -3123,19 +3139,16 @@ static void ibx_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 }
 
-static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+static void spt_hpd_detection_setup(struct drm_i915_private *dev_priv)
 {
-	u32 hotplug_irqs, hotplug, enabled_irqs;
-
-	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
-
-	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
+	u32 hotplug;
 
 	/* Enable digital hotplug on the PCH */
 	hotplug = I915_READ(PCH_PORT_HOTPLUG);
-	hotplug |= PORTD_HOTPLUG_ENABLE | PORTC_HOTPLUG_ENABLE |
-		PORTB_HOTPLUG_ENABLE | PORTA_HOTPLUG_ENABLE;
+	hotplug |= PORTA_HOTPLUG_ENABLE |
+		   PORTB_HOTPLUG_ENABLE |
+		   PORTC_HOTPLUG_ENABLE |
+		   PORTD_HOTPLUG_ENABLE;
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 
 	hotplug = I915_READ(PCH_PORT_HOTPLUG2);
@@ -3143,6 +3156,18 @@ static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG2, hotplug);
 }
 
+static void spt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+{
+	u32 hotplug_irqs, enabled_irqs;
+
+	hotplug_irqs = SDE_HOTPLUG_MASK_SPT;
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_spt);
+
+	ibx_display_interrupt_update(dev_priv, hotplug_irqs, enabled_irqs);
+
+	spt_hpd_detection_setup(dev_priv);
+}
+
 static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv)
 {
 	u32 hotplug_irqs, hotplug, enabled_irqs;
@@ -3177,18 +3202,15 @@ static void ilk_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	ibx_hpd_irq_setup(dev_priv);
 }
 
-static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+static void __bxt_hpd_detection_setup(struct drm_i915_private *dev_priv,
+				      u32 enabled_irqs)
 {
-	u32 hotplug_irqs, hotplug, enabled_irqs;
-
-	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt);
-	hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK;
-
-	bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
+	u32 hotplug;
 
 	hotplug = I915_READ(PCH_PORT_HOTPLUG);
-	hotplug |= PORTC_HOTPLUG_ENABLE | PORTB_HOTPLUG_ENABLE |
-		PORTA_HOTPLUG_ENABLE;
+	hotplug |= PORTA_HOTPLUG_ENABLE |
+		   PORTB_HOTPLUG_ENABLE |
+		   PORTC_HOTPLUG_ENABLE;
 
 	DRM_DEBUG_KMS("Invert bit setting: hp_ctl:%x hp_port:%x\n",
 		      hotplug, enabled_irqs);
@@ -3198,7 +3220,6 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	 * For BXT invert bit has to be set based on AOB design
 	 * for HPD detection logic, update it based on VBT fields.
 	 */
-
 	if ((enabled_irqs & BXT_DE_PORT_HP_DDIA) &&
 	    intel_bios_is_port_hpd_inverted(dev_priv, PORT_A))
 		hotplug |= BXT_DDIA_HPD_INVERT;
@@ -3212,6 +3233,23 @@ static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
 	I915_WRITE(PCH_PORT_HOTPLUG, hotplug);
 }
 
+static void bxt_hpd_detection_setup(struct drm_i915_private *dev_priv)
+{
+	__bxt_hpd_detection_setup(dev_priv, BXT_DE_PORT_HOTPLUG_MASK);
+}
+
+static void bxt_hpd_irq_setup(struct drm_i915_private *dev_priv)
+{
+	u32 hotplug_irqs, enabled_irqs;
+
+	enabled_irqs = intel_hpd_enabled_irqs(dev_priv, hpd_bxt);
+	hotplug_irqs = BXT_DE_PORT_HOTPLUG_MASK;
+
+	bdw_update_port_irq(dev_priv, hotplug_irqs, enabled_irqs);
+
+	__bxt_hpd_detection_setup(dev_priv, enabled_irqs);
+}
+
 static void ibx_irq_postinstall(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -3227,6 +3265,12 @@ static void ibx_irq_postinstall(struct drm_device *dev)
 
 	gen5_assert_iir_is_zero(dev_priv, SDEIIR);
 	I915_WRITE(SDEIMR, ~mask);
+
+	if (HAS_PCH_IBX(dev_priv) || HAS_PCH_CPT(dev_priv) ||
+	    HAS_PCH_LPT(dev_priv))
+		; /* TODO: Enable HPD detection on older PCH platforms too */
+	else
+		spt_hpd_detection_setup(dev_priv);
 }
 
 static void gen5_gt_irq_postinstall(struct drm_device *dev)
@@ -3438,6 +3482,9 @@ static void gen8_de_irq_postinstall(struct drm_i915_private *dev_priv)
 
 	GEN5_IRQ_INIT(GEN8_DE_PORT_, ~de_port_masked, de_port_enables);
 	GEN5_IRQ_INIT(GEN8_DE_MISC_, ~de_misc_masked, de_misc_masked);
+
+	if (IS_GEN9_LP(dev_priv))
+		bxt_hpd_detection_setup(dev_priv);
 }
 
 static int gen8_irq_postinstall(struct drm_device *dev)
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 72f9f36ae5ce..1c8f5b9a7fcd 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -2428,6 +2428,22 @@ enum skl_disp_power_wells {
 #define I915_ASLE_INTERRUPT				(1<<0)
 #define I915_BSD_USER_INTERRUPT				(1<<25)
 
+#define I915_HDMI_LPE_AUDIO_BASE	(VLV_DISPLAY_BASE + 0x65000)
+#define I915_HDMI_LPE_AUDIO_SIZE	0x1000
+
+/* DisplayPort Audio w/ LPE */
+#define VLV_AUD_CHICKEN_BIT_REG		_MMIO(VLV_DISPLAY_BASE + 0x62F38)
+#define VLV_CHICKEN_BIT_DBG_ENABLE	(1 << 0)
+
+#define _VLV_AUD_PORT_EN_B_DBG		(VLV_DISPLAY_BASE + 0x62F20)
+#define _VLV_AUD_PORT_EN_C_DBG		(VLV_DISPLAY_BASE + 0x62F30)
+#define _VLV_AUD_PORT_EN_D_DBG		(VLV_DISPLAY_BASE + 0x62F34)
+#define VLV_AUD_PORT_EN_DBG(port)	_MMIO_PORT3((port) - PORT_B,	   \
+						    _VLV_AUD_PORT_EN_B_DBG, \
+						    _VLV_AUD_PORT_EN_C_DBG, \
+						    _VLV_AUD_PORT_EN_D_DBG)
+#define VLV_AMP_MUTE		        (1 << 1)
+
 #define GEN6_BSD_RNCID			_MMIO(0x12198)
 
 #define GEN7_FF_THREAD_MODE		_MMIO(0x20a0)
@@ -3307,8 +3323,10 @@ enum skl_disp_power_wells {
 /*
  * Logical Context regs
  */
-#define CCID			_MMIO(0x2180)
-#define   CCID_EN		(1<<0)
+#define CCID				_MMIO(0x2180)
+#define   CCID_EN			BIT(0)
+#define   CCID_EXTENDED_STATE_RESTORE	BIT(2)
+#define   CCID_EXTENDED_STATE_SAVE	BIT(3)
 /*
  * Notes on SNB/IVB/VLV context size:
  * - Power context is saved elsewhere (LLC or stolen)
diff --git a/drivers/gpu/drm/i915/intel_audio.c b/drivers/gpu/drm/i915/intel_audio.c
index 16c202781db0..d76f3033e890 100644
--- a/drivers/gpu/drm/i915/intel_audio.c
+++ b/drivers/gpu/drm/i915/intel_audio.c
@@ -24,6 +24,7 @@
 #include <linux/kernel.h>
 #include <linux/component.h>
 #include <drm/i915_component.h>
+#include <drm/intel_lpe_audio.h>
 #include "intel_drv.h"
 
 #include <drm/drmP.h>
@@ -623,13 +624,28 @@ void intel_audio_codec_enable(struct intel_encoder *intel_encoder,
 	dev_priv->av_enc_map[pipe] = intel_encoder;
 	mutex_unlock(&dev_priv->av_mutex);
 
-	/* audio drivers expect pipe = -1 to indicate Non-MST cases */
-	if (intel_encoder->type != INTEL_OUTPUT_DP_MST)
-		pipe = -1;
-
-	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
+	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) {
+		/* audio drivers expect pipe = -1 to indicate Non-MST cases */
+		if (intel_encoder->type != INTEL_OUTPUT_DP_MST)
+			pipe = -1;
 		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
 						 (int) port, (int) pipe);
+	}
+
+	switch (intel_encoder->type) {
+	case INTEL_OUTPUT_HDMI:
+		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
+				       crtc_state->port_clock,
+				       false, 0);
+		break;
+	case INTEL_OUTPUT_DP:
+		intel_lpe_audio_notify(dev_priv, connector->eld, port, pipe,
+				       adjusted_mode->crtc_clock,
+				       true, crtc_state->port_clock);
+		break;
+	default:
+		break;
+	}
 }
 
 /**
@@ -656,13 +672,15 @@ void intel_audio_codec_disable(struct intel_encoder *intel_encoder)
 	dev_priv->av_enc_map[pipe] = NULL;
 	mutex_unlock(&dev_priv->av_mutex);
 
-	/* audio drivers expect pipe = -1 to indicate Non-MST cases */
-	if (intel_encoder->type != INTEL_OUTPUT_DP_MST)
-		pipe = -1;
-
-	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify)
+	if (acomp && acomp->audio_ops && acomp->audio_ops->pin_eld_notify) {
+		/* audio drivers expect pipe = -1 to indicate Non-MST cases */
+		if (intel_encoder->type != INTEL_OUTPUT_DP_MST)
+			pipe = -1;
 		acomp->audio_ops->pin_eld_notify(acomp->audio_ops->audio_ptr,
 						 (int) port, (int) pipe);
+	}
+
+	intel_lpe_audio_notify(dev_priv, NULL, port, pipe, 0, false, 0);
 }
 
 /**
@@ -956,3 +974,28 @@ void i915_audio_component_cleanup(struct drm_i915_private *dev_priv)
 	component_del(dev_priv->drm.dev, &i915_audio_component_bind_ops);
 	dev_priv->audio_component_registered = false;
 }
+
+/**
+ * intel_audio_init() - Initialize the audio driver either using
+ * component framework or using lpe audio bridge
+ * @dev_priv: the i915 drm device private data
+ *
+ */
+void intel_audio_init(struct drm_i915_private *dev_priv)
+{
+	if (intel_lpe_audio_init(dev_priv) < 0)
+		i915_audio_component_init(dev_priv);
+}
+
+/**
+ * intel_audio_deinit() - deinitialize the audio driver
+ * @dev_priv: the i915 drm device private data
+ *
+ */
+void intel_audio_deinit(struct drm_i915_private *dev_priv)
+{
+	if ((dev_priv)->lpe_audio.platdev != NULL)
+		intel_lpe_audio_teardown(dev_priv);
+	else
+		i915_audio_component_cleanup(dev_priv);
+}
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index fcfa423d08bd..7044e9a6abf7 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -23,6 +23,7 @@
  */
 
 #include <linux/kthread.h>
+#include <uapi/linux/sched/types.h>
 
 #include "i915_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 385e29af8baa..2bf5aca6e37c 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -499,6 +499,7 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 	struct drm_i915_private *dev_priv = to_i915(crt->base.base.dev);
 	struct edid *edid;
 	struct i2c_adapter *i2c;
+	bool ret = false;
 
 	BUG_ON(crt->base.type != INTEL_OUTPUT_ANALOG);
 
@@ -515,17 +516,17 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 		 */
 		if (!is_digital) {
 			DRM_DEBUG_KMS("CRT detected via DDC:0x50 [EDID]\n");
-			return true;
+			ret = true;
+		} else {
+			DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n");
 		}
-
-		DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [EDID reports a digital panel]\n");
 	} else {
 		DRM_DEBUG_KMS("CRT not detected via DDC:0x50 [no valid EDID found]\n");
 	}
 
 	kfree(edid);
 
-	return false;
+	return ret;
 }
 
 static enum drm_connector_status
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index b3e773c9f872..01341670738f 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2578,8 +2578,9 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 			 * We only keep the x/y offsets, so push all of the
 			 * gtt offset into the x/y offsets.
 			 */
-			_intel_adjust_tile_offset(&x, &y, tile_size,
-						  tile_width, tile_height, pitch_tiles,
+			_intel_adjust_tile_offset(&x, &y,
+						  tile_width, tile_height,
+						  tile_size, pitch_tiles,
 						  gtt_offset_rotated * tile_size, 0);
 
 			gtt_offset_rotated += rot_info->plane[i].width * rot_info->plane[i].height;
@@ -4252,10 +4253,10 @@ static void page_flip_completed(struct intel_crtc *intel_crtc)
 	drm_crtc_vblank_put(&intel_crtc->base);
 
 	wake_up_all(&dev_priv->pending_flip_queue);
-	queue_work(dev_priv->wq, &work->unpin_work);
-
 	trace_i915_flip_complete(intel_crtc->plane,
 				 work->pending_flip_obj);
+
+	queue_work(dev_priv->wq, &work->unpin_work);
 }
 
 static int intel_crtc_wait_for_pending_flips(struct drm_crtc *crtc)
@@ -6881,6 +6882,12 @@ static void intel_crtc_disable_noatomic(struct drm_crtc *crtc)
 	}
 
 	state = drm_atomic_state_alloc(crtc->dev);
+	if (!state) {
+		DRM_DEBUG_KMS("failed to disable [CRTC:%d:%s], out of memory",
+			      crtc->base.id, crtc->name);
+		return;
+	}
+
 	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
 
 	/* Everything's already locked, -EDEADLK can't happen. */
@@ -14562,8 +14569,14 @@ intel_atomic_commit_ready(struct i915_sw_fence *fence,
 		break;
 
 	case FENCE_FREE:
-		drm_atomic_state_put(&state->base);
-		break;
+		{
+			struct intel_atomic_helper *helper =
+				&to_i915(state->base.dev)->atomic_helper;
+
+			if (llist_add(&state->freed, &helper->free_list))
+				schedule_work(&helper->free_work);
+			break;
+		}
 	}
 
 	return NOTIFY_DONE;
@@ -16586,6 +16599,18 @@ fail:
 	drm_modeset_acquire_fini(&ctx);
 }
 
+static void intel_atomic_helper_free_state(struct work_struct *work)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(work, typeof(*dev_priv), atomic_helper.free_work);
+	struct intel_atomic_state *state, *next;
+	struct llist_node *freed;
+
+	freed = llist_del_all(&dev_priv->atomic_helper.free_list);
+	llist_for_each_entry_safe(state, next, freed, freed)
+		drm_atomic_state_put(&state->base);
+}
+
 int intel_modeset_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
@@ -16605,6 +16630,9 @@ int intel_modeset_init(struct drm_device *dev)
 
 	dev->mode_config.funcs = &intel_mode_funcs;
 
+	INIT_WORK(&dev_priv->atomic_helper.free_work,
+		  intel_atomic_helper_free_state);
+
 	intel_init_quirks(dev);
 
 	intel_init_pm(dev_priv);
@@ -17262,6 +17290,9 @@ void intel_modeset_cleanup(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
+	flush_work(&dev_priv->atomic_helper.free_work);
+	WARN_ON(!llist_empty(&dev_priv->atomic_helper.free_list));
+
 	intel_disable_gt_powersave(dev_priv);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 3d8ac8aa7214..d1670b8afbf5 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -2887,6 +2887,9 @@ static void vlv_detach_power_sequencer(struct intel_dp *intel_dp)
 
 	WARN_ON(intel_dp->active_pipe != INVALID_PIPE);
 
+	if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B))
+		return;
+
 	edp_panel_vdd_off_sync(intel_dp);
 
 	/*
@@ -2914,9 +2917,6 @@ static void vlv_steal_power_sequencer(struct drm_device *dev,
 
 	lockdep_assert_held(&dev_priv->pps_mutex);
 
-	if (WARN_ON(pipe != PIPE_A && pipe != PIPE_B))
-		return;
-
 	for_each_intel_encoder(dev, encoder) {
 		struct intel_dp *intel_dp;
 		enum port port;
@@ -4406,8 +4406,8 @@ static bool bxt_digital_port_connected(struct drm_i915_private *dev_priv,
  *
  * Return %true if @port is connected, %false otherwise.
  */
-static bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
-					 struct intel_digital_port *port)
+bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
+				  struct intel_digital_port *port)
 {
 	if (HAS_PCH_IBX(dev_priv))
 		return ibx_digital_port_connected(dev_priv, port);
diff --git a/drivers/gpu/drm/i915/intel_dpll_mgr.c b/drivers/gpu/drm/i915/intel_dpll_mgr.c
index c92a2558beb4..e59e43a9f3a6 100644
--- a/drivers/gpu/drm/i915/intel_dpll_mgr.c
+++ b/drivers/gpu/drm/i915/intel_dpll_mgr.c
@@ -1855,7 +1855,8 @@ bxt_get_dpll(struct intel_crtc *crtc,
 		return NULL;
 
 	if ((encoder->type == INTEL_OUTPUT_DP ||
-	     encoder->type == INTEL_OUTPUT_EDP) &&
+	     encoder->type == INTEL_OUTPUT_EDP ||
+	     encoder->type == INTEL_OUTPUT_DP_MST) &&
 	    !bxt_ddi_dp_set_dpll_hw_state(clock, &dpll_hw_state))
 		return NULL;
 
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 0cec0013ace0..344f238b283f 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -28,6 +28,7 @@
 #include <linux/async.h>
 #include <linux/i2c.h>
 #include <linux/hdmi.h>
+#include <linux/sched/clock.h>
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 #include <drm/drm_crtc.h>
@@ -371,6 +372,8 @@ struct intel_atomic_state {
 	struct skl_wm_values wm_results;
 
 	struct i915_sw_fence commit_ready;
+
+	struct llist_node freed;
 };
 
 struct intel_plane_state {
@@ -1225,6 +1228,8 @@ void intel_audio_codec_enable(struct intel_encoder *encoder,
 void intel_audio_codec_disable(struct intel_encoder *encoder);
 void i915_audio_component_init(struct drm_i915_private *dev_priv);
 void i915_audio_component_cleanup(struct drm_i915_private *dev_priv);
+void intel_audio_init(struct drm_i915_private *dev_priv);
+void intel_audio_deinit(struct drm_i915_private *dev_priv);
 
 /* intel_display.c */
 enum transcoder intel_crtc_pch_transcoder(struct intel_crtc *crtc);
@@ -1485,6 +1490,8 @@ bool __intel_dp_read_desc(struct intel_dp *intel_dp,
 bool intel_dp_read_desc(struct intel_dp *intel_dp);
 int intel_dp_link_required(int pixel_clock, int bpp);
 int intel_dp_max_data_rate(int max_link_clock, int max_lanes);
+bool intel_digital_port_connected(struct drm_i915_private *dev_priv,
+				  struct intel_digital_port *port);
 
 /* intel_dp_aux_backlight.c */
 int intel_dp_aux_init_backlight_funcs(struct intel_connector *intel_connector);
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index e0d9e72cf3d1..1b8ba2e77539 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -713,8 +713,7 @@ int intel_fbdev_init(struct drm_device *dev)
 	if (!intel_fbdev_init_bios(dev, ifbdev))
 		ifbdev->preferred_bpp = 32;
 
-	ret = drm_fb_helper_init(dev, &ifbdev->helper,
-				 INTEL_INFO(dev_priv)->num_pipes, 4);
+	ret = drm_fb_helper_init(dev, &ifbdev->helper, 4);
 	if (ret) {
 		kfree(ifbdev);
 		return ret;
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index af16b0fa6b69..ebae2bd83918 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -36,6 +36,7 @@
 #include <drm/drm_edid.h>
 #include "intel_drv.h"
 #include <drm/i915_drm.h>
+#include <drm/intel_lpe_audio.h>
 #include "i915_drv.h"
 
 static struct drm_device *intel_hdmi_to_dev(struct intel_hdmi *intel_hdmi)
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
new file mode 100644
index 000000000000..7a5b41b1c024
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -0,0 +1,392 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
+ *    Jerome Anand <jerome.anand@intel.com>
+ *    based on VED patches
+ *
+ */
+
+/**
+ * DOC: LPE Audio integration for HDMI or DP playback
+ *
+ * Motivation:
+ * Atom platforms (e.g. valleyview and cherryTrail) integrates a DMA-based
+ * interface as an alternative to the traditional HDaudio path. While this
+ * mode is unrelated to the LPE aka SST audio engine, the documentation refers
+ * to this mode as LPE so we keep this notation for the sake of consistency.
+ *
+ * The interface is handled by a separate standalone driver maintained in the
+ * ALSA subsystem for simplicity. To minimize the interaction between the two
+ * subsystems, a bridge is setup between the hdmi-lpe-audio and i915:
+ * 1. Create a platform device to share MMIO/IRQ resources
+ * 2. Make the platform device child of i915 device for runtime PM.
+ * 3. Create IRQ chip to forward the LPE audio irqs.
+ * the hdmi-lpe-audio driver probes the lpe audio device and creates a new
+ * sound card
+ *
+ * Threats:
+ * Due to the restriction in Linux platform device model, user need manually
+ * uninstall the hdmi-lpe-audio driver before uninstalling i915 module,
+ * otherwise we might run into use-after-free issues after i915 removes the
+ * platform device: even though hdmi-lpe-audio driver is released, the modules
+ * is still in "installed" status.
+ *
+ * Implementation:
+ * The MMIO/REG platform resources are created according to the registers
+ * specification.
+ * When forwarding LPE audio irqs, the flow control handler selection depends
+ * on the platform, for example on valleyview handle_simple_irq is enough.
+ *
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/pci.h>
+
+#include "i915_drv.h"
+#include <linux/delay.h>
+#include <drm/intel_lpe_audio.h>
+
+#define HAS_LPE_AUDIO(dev_priv) ((dev_priv)->lpe_audio.platdev != NULL)
+
+static struct platform_device *
+lpe_audio_platdev_create(struct drm_i915_private *dev_priv)
+{
+	int ret;
+	struct drm_device *dev = &dev_priv->drm;
+	struct platform_device_info pinfo = {};
+	struct resource *rsc;
+	struct platform_device *platdev;
+	struct intel_hdmi_lpe_audio_pdata *pdata;
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	rsc = kcalloc(2, sizeof(*rsc), GFP_KERNEL);
+	if (!rsc) {
+		kfree(pdata);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	rsc[0].start    = rsc[0].end = dev_priv->lpe_audio.irq;
+	rsc[0].flags    = IORESOURCE_IRQ;
+	rsc[0].name     = "hdmi-lpe-audio-irq";
+
+	rsc[1].start    = pci_resource_start(dev->pdev, 0) +
+		I915_HDMI_LPE_AUDIO_BASE;
+	rsc[1].end      = pci_resource_start(dev->pdev, 0) +
+		I915_HDMI_LPE_AUDIO_BASE + I915_HDMI_LPE_AUDIO_SIZE - 1;
+	rsc[1].flags    = IORESOURCE_MEM;
+	rsc[1].name     = "hdmi-lpe-audio-mmio";
+
+	pinfo.parent = dev->dev;
+	pinfo.name = "hdmi-lpe-audio";
+	pinfo.id = -1;
+	pinfo.res = rsc;
+	pinfo.num_res = 2;
+	pinfo.data = pdata;
+	pinfo.size_data = sizeof(*pdata);
+	pinfo.dma_mask = DMA_BIT_MASK(32);
+
+	spin_lock_init(&pdata->lpe_audio_slock);
+
+	platdev = platform_device_register_full(&pinfo);
+	if (IS_ERR(platdev)) {
+		ret = PTR_ERR(platdev);
+		DRM_ERROR("Failed to allocate LPE audio platform device\n");
+		goto err;
+	}
+
+	kfree(rsc);
+
+	return platdev;
+
+err:
+	kfree(rsc);
+	kfree(pdata);
+	return ERR_PTR(ret);
+}
+
+static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
+{
+	platform_device_unregister(dev_priv->lpe_audio.platdev);
+	kfree(dev_priv->lpe_audio.platdev->dev.dma_mask);
+}
+
+static void lpe_audio_irq_unmask(struct irq_data *d)
+{
+	struct drm_i915_private *dev_priv = d->chip_data;
+	unsigned long irqflags;
+	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT);
+
+	if (IS_CHERRYVIEW(dev_priv))
+		val |= I915_LPE_PIPE_C_INTERRUPT;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	dev_priv->irq_mask &= ~val;
+	I915_WRITE(VLV_IIR, val);
+	I915_WRITE(VLV_IIR, val);
+	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
+	POSTING_READ(VLV_IMR);
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+static void lpe_audio_irq_mask(struct irq_data *d)
+{
+	struct drm_i915_private *dev_priv = d->chip_data;
+	unsigned long irqflags;
+	u32 val = (I915_LPE_PIPE_A_INTERRUPT |
+		I915_LPE_PIPE_B_INTERRUPT);
+
+	if (IS_CHERRYVIEW(dev_priv))
+		val |= I915_LPE_PIPE_C_INTERRUPT;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+
+	dev_priv->irq_mask |= val;
+	I915_WRITE(VLV_IMR, dev_priv->irq_mask);
+	I915_WRITE(VLV_IIR, val);
+	I915_WRITE(VLV_IIR, val);
+	POSTING_READ(VLV_IIR);
+
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+static struct irq_chip lpe_audio_irqchip = {
+	.name = "hdmi_lpe_audio_irqchip",
+	.irq_mask = lpe_audio_irq_mask,
+	.irq_unmask = lpe_audio_irq_unmask,
+};
+
+static int lpe_audio_irq_init(struct drm_i915_private *dev_priv)
+{
+	int irq = dev_priv->lpe_audio.irq;
+
+	WARN_ON(!intel_irqs_enabled(dev_priv));
+	irq_set_chip_and_handler_name(irq,
+				&lpe_audio_irqchip,
+				handle_simple_irq,
+				"hdmi_lpe_audio_irq_handler");
+
+	return irq_set_chip_data(irq, dev_priv);
+}
+
+static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
+{
+	int lpe_present = false;
+
+	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		static const struct pci_device_id atom_hdaudio_ids[] = {
+			/* Baytrail */
+			{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x0f04)},
+			/* Braswell */
+			{PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x2284)},
+			{}
+		};
+
+		if (!pci_dev_present(atom_hdaudio_ids)) {
+			DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+			lpe_present = true;
+		}
+	}
+	return lpe_present;
+}
+
+static int lpe_audio_setup(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	dev_priv->lpe_audio.irq = irq_alloc_desc(0);
+	if (dev_priv->lpe_audio.irq < 0) {
+		DRM_ERROR("Failed to allocate IRQ desc: %d\n",
+			dev_priv->lpe_audio.irq);
+		ret = dev_priv->lpe_audio.irq;
+		goto err;
+	}
+
+	DRM_DEBUG("irq = %d\n", dev_priv->lpe_audio.irq);
+
+	ret = lpe_audio_irq_init(dev_priv);
+
+	if (ret) {
+		DRM_ERROR("Failed to initialize irqchip for lpe audio: %d\n",
+			ret);
+		goto err_free_irq;
+	}
+
+	dev_priv->lpe_audio.platdev = lpe_audio_platdev_create(dev_priv);
+
+	if (IS_ERR(dev_priv->lpe_audio.platdev)) {
+		ret = PTR_ERR(dev_priv->lpe_audio.platdev);
+		DRM_ERROR("Failed to create lpe audio platform device: %d\n",
+			ret);
+		goto err_free_irq;
+	}
+
+	/* enable chicken bit; at least this is required for Dell Wyse 3040
+	 * with DP outputs (but only sometimes by some reason!)
+	 */
+	I915_WRITE(VLV_AUD_CHICKEN_BIT_REG, VLV_CHICKEN_BIT_DBG_ENABLE);
+
+	return 0;
+err_free_irq:
+	irq_free_desc(dev_priv->lpe_audio.irq);
+err:
+	dev_priv->lpe_audio.irq = -1;
+	dev_priv->lpe_audio.platdev = NULL;
+	return ret;
+}
+
+/**
+ * intel_lpe_audio_irq_handler() - forwards the LPE audio irq
+ * @dev_priv: the i915 drm device private data
+ *
+ * the LPE Audio irq is forwarded to the irq handler registered by LPE audio
+ * driver.
+ */
+void intel_lpe_audio_irq_handler(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	if (!HAS_LPE_AUDIO(dev_priv))
+		return;
+
+	ret = generic_handle_irq(dev_priv->lpe_audio.irq);
+	if (ret)
+		DRM_ERROR_RATELIMITED("error handling LPE audio irq: %d\n",
+				ret);
+}
+
+/**
+ * intel_lpe_audio_init() - detect and setup the bridge between HDMI LPE Audio
+ * driver and i915
+ * @dev_priv: the i915 drm device private data
+ *
+ * Return: 0 if successful. non-zero if detection or
+ * llocation/initialization fails
+ */
+int intel_lpe_audio_init(struct drm_i915_private *dev_priv)
+{
+	int ret = -ENODEV;
+
+	if (lpe_audio_detect(dev_priv)) {
+		ret = lpe_audio_setup(dev_priv);
+		if (ret < 0)
+			DRM_ERROR("failed to setup LPE Audio bridge\n");
+	}
+	return ret;
+}
+
+/**
+ * intel_lpe_audio_teardown() - destroy the bridge between HDMI LPE
+ * audio driver and i915
+ * @dev_priv: the i915 drm device private data
+ *
+ * release all the resources for LPE audio <-> i915 bridge.
+ */
+void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
+{
+	struct irq_desc *desc;
+
+	if (!HAS_LPE_AUDIO(dev_priv))
+		return;
+
+	desc = irq_to_desc(dev_priv->lpe_audio.irq);
+
+	lpe_audio_irq_mask(&desc->irq_data);
+
+	lpe_audio_platdev_destroy(dev_priv);
+
+	irq_free_desc(dev_priv->lpe_audio.irq);
+}
+
+
+/**
+ * intel_lpe_audio_notify() - notify lpe audio event
+ * audio driver and i915
+ * @dev_priv: the i915 drm device private data
+ * @eld : ELD data
+ * @port: port id
+ * @tmds_clk_speed: tmds clock frequency in Hz
+ *
+ * Notify lpe audio driver of eld change.
+ */
+void intel_lpe_audio_notify(struct drm_i915_private *dev_priv,
+			    void *eld, int port, int pipe, int tmds_clk_speed,
+			    bool dp_output, int link_rate)
+{
+	unsigned long irq_flags;
+	struct intel_hdmi_lpe_audio_pdata *pdata = NULL;
+	u32 audio_enable;
+
+	if (!HAS_LPE_AUDIO(dev_priv))
+		return;
+
+	pdata = dev_get_platdata(
+		&(dev_priv->lpe_audio.platdev->dev));
+
+	spin_lock_irqsave(&pdata->lpe_audio_slock, irq_flags);
+
+	audio_enable = I915_READ(VLV_AUD_PORT_EN_DBG(port));
+
+	if (eld != NULL) {
+		memcpy(pdata->eld.eld_data, eld,
+			HDMI_MAX_ELD_BYTES);
+		pdata->eld.port_id = port;
+		pdata->eld.pipe_id = pipe;
+		pdata->hdmi_connected = true;
+
+		pdata->dp_output = dp_output;
+		if (tmds_clk_speed)
+			pdata->tmds_clock_speed = tmds_clk_speed;
+		if (link_rate)
+			pdata->link_rate = link_rate;
+
+		/* Unmute the amp for both DP and HDMI */
+		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
+			   audio_enable & ~VLV_AMP_MUTE);
+
+	} else {
+		memset(pdata->eld.eld_data, 0,
+			HDMI_MAX_ELD_BYTES);
+		pdata->hdmi_connected = false;
+		pdata->dp_output = false;
+
+		/* Mute the amp for both DP and HDMI */
+		I915_WRITE(VLV_AUD_PORT_EN_DBG(port),
+			   audio_enable | VLV_AMP_MUTE);
+	}
+
+	if (pdata->notify_audio_lpe)
+		pdata->notify_audio_lpe(dev_priv->lpe_audio.platdev);
+	else
+		pdata->notify_pending = true;
+
+	spin_unlock_irqrestore(&pdata->lpe_audio_slock,
+			irq_flags);
+}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 432ee495dec2..ebf8023d21e6 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -360,7 +360,8 @@ execlists_update_context_pdps(struct i915_hw_ppgtt *ppgtt, u32 *reg_state)
 static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 {
 	struct intel_context *ce = &rq->ctx->engine[rq->engine->id];
-	struct i915_hw_ppgtt *ppgtt = rq->ctx->ppgtt;
+	struct i915_hw_ppgtt *ppgtt =
+		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
@@ -1389,7 +1390,20 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 {
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct execlist_port *port = engine->execlist_port;
-	struct intel_context *ce = &request->ctx->engine[engine->id];
+	struct intel_context *ce;
+
+	/* If the request was innocent, we leave the request in the ELSP
+	 * and will try to replay it on restarting. The context image may
+	 * have been corrupted by the reset, in which case we may have
+	 * to service a new GPU hang, but more likely we can continue on
+	 * without impact.
+	 *
+	 * If the request was guilty, we presume the context is corrupt
+	 * and have to at least restore the RING register in the context
+	 * image back to the expected values to skip over the guilty request.
+	 */
+	if (!request || request->fence.error != -EIO)
+		return;
 
 	/* We want a simple context + ring to execute the breadcrumb update.
 	 * We cannot rely on the context being intact across the GPU hang,
@@ -1398,6 +1412,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	 * future request will be after userspace has had the opportunity
 	 * to recreate its own state.
 	 */
+	ce = &request->ctx->engine[engine->id];
 	execlists_init_reg_state(ce->lrc_reg_state,
 				 request->ctx, engine, ce->ring);
 
diff --git a/drivers/gpu/drm/i915/intel_lspcon.c b/drivers/gpu/drm/i915/intel_lspcon.c
index f6d4e6940257..c300647ef604 100644
--- a/drivers/gpu/drm/i915/intel_lspcon.c
+++ b/drivers/gpu/drm/i915/intel_lspcon.c
@@ -158,6 +158,8 @@ static bool lspcon_probe(struct intel_lspcon *lspcon)
 static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
 {
 	struct intel_dp *intel_dp = lspcon_to_intel_dp(lspcon);
+	struct intel_digital_port *dig_port = dp_to_dig_port(intel_dp);
+	struct drm_i915_private *dev_priv = to_i915(dig_port->base.base.dev);
 	unsigned long start = jiffies;
 
 	if (!lspcon->desc_valid)
@@ -173,7 +175,8 @@ static void lspcon_resume_in_pcon_wa(struct intel_lspcon *lspcon)
 		if (!__intel_dp_read_desc(intel_dp, &desc))
 			return;
 
-		if (!memcmp(&intel_dp->desc, &desc, sizeof(desc))) {
+		if (intel_digital_port_connected(dev_priv, dig_port) &&
+		    !memcmp(&intel_dp->desc, &desc, sizeof(desc))) {
 			DRM_DEBUG_KMS("LSPCON recovering in PCON mode after %u ms\n",
 				      jiffies_to_msecs(jiffies - start));
 			return;
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index f4429f67a4e3..4a862a358c70 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -982,7 +982,18 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 			opregion->vbt_size = vbt_size;
 		} else {
 			vbt = base + OPREGION_VBT_OFFSET;
-			vbt_size = OPREGION_ASLE_EXT_OFFSET - OPREGION_VBT_OFFSET;
+			/*
+			 * The VBT specification says that if the ASLE ext
+			 * mailbox is not used its area is reserved, but
+			 * on some CHT boards the VBT extends into the
+			 * ASLE ext area. Allow this even though it is
+			 * against the spec, so we do not end up rejecting
+			 * the VBT on those boards (and end up not finding the
+			 * LCD panel because of this).
+			 */
+			vbt_size = (mboxes & MBOX_ASLE_EXT) ?
+				OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
+			vbt_size -= OPREGION_VBT_OFFSET;
 			if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
 				DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
 				opregion->vbt = vbt;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index 69035e4f9b3b..91bc4abf5d3e 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -599,10 +599,62 @@ out:
 static void reset_ring_common(struct intel_engine_cs *engine,
 			      struct drm_i915_gem_request *request)
 {
-	struct intel_ring *ring = request->ring;
+	/* Try to restore the logical GPU state to match the continuation
+	 * of the request queue. If we skip the context/PD restore, then
+	 * the next request may try to execute assuming that its context
+	 * is valid and loaded on the GPU and so may try to access invalid
+	 * memory, prompting repeated GPU hangs.
+	 *
+	 * If the request was guilty, we still restore the logical state
+	 * in case the next request requires it (e.g. the aliasing ppgtt),
+	 * but skip over the hung batch.
+	 *
+	 * If the request was innocent, we try to replay the request with
+	 * the restored context.
+	 */
+	if (request) {
+		struct drm_i915_private *dev_priv = request->i915;
+		struct intel_context *ce = &request->ctx->engine[engine->id];
+		struct i915_hw_ppgtt *ppgtt;
+
+		/* FIXME consider gen8 reset */
+
+		if (ce->state) {
+			I915_WRITE(CCID,
+				   i915_ggtt_offset(ce->state) |
+				   BIT(8) /* must be set! */ |
+				   CCID_EXTENDED_STATE_SAVE |
+				   CCID_EXTENDED_STATE_RESTORE |
+				   CCID_EN);
+		}
 
-	ring->head = request->postfix;
-	ring->last_retired_head = -1;
+		ppgtt = request->ctx->ppgtt ?: engine->i915->mm.aliasing_ppgtt;
+		if (ppgtt) {
+			u32 pd_offset = ppgtt->pd.base.ggtt_offset << 10;
+
+			I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
+			I915_WRITE(RING_PP_DIR_BASE(engine), pd_offset);
+
+			/* Wait for the PD reload to complete */
+			if (intel_wait_for_register(dev_priv,
+						    RING_PP_DIR_BASE(engine),
+						    BIT(0), 0,
+						    10))
+				DRM_ERROR("Wait for reload of ppgtt page-directory timed out\n");
+
+			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
+		}
+
+		/* If the rq hung, jump to its breadcrumb and skip the batch */
+		if (request->fence.error == -EIO) {
+			struct intel_ring *ring = request->ring;
+
+			ring->head = request->postfix;
+			ring->last_retired_head = -1;
+		}
+	} else {
+		engine->legacy_active_context = NULL;
+	}
 }
 
 static int intel_ring_workarounds_emit(struct drm_i915_gem_request *req)
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 33404295b447..f562cb7964b0 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -357,8 +357,8 @@ static int imx_drm_bind(struct device *dev)
 	 * this value would be used to check framebuffer size limitation
 	 * at drm_mode_addfb().
 	 */
-	drm->mode_config.min_width = 64;
-	drm->mode_config.min_height = 64;
+	drm->mode_config.min_width = 1;
+	drm->mode_config.min_height = 1;
 	drm->mode_config.max_width = 4096;
 	drm->mode_config.max_height = 4096;
 	drm->mode_config.funcs = &imx_drm_mode_config_funcs;
@@ -389,8 +389,7 @@ static int imx_drm_bind(struct device *dev)
 		dev_warn(dev, "Invalid legacyfb_depth.  Defaulting to 16bpp\n");
 		legacyfb_depth = 16;
 	}
-	imxdrm->fbhelper = drm_fbdev_cma_init(drm, legacyfb_depth,
-				drm->mode_config.num_crtc, MAX_CRTC);
+	imxdrm->fbhelper = drm_fbdev_cma_init(drm, legacyfb_depth, MAX_CRTC);
 	if (IS_ERR(imxdrm->fbhelper)) {
 		ret = PTR_ERR(imxdrm->fbhelper);
 		imxdrm->fbhelper = NULL;
diff --git a/drivers/gpu/drm/imx/imx-tve.c b/drivers/gpu/drm/imx/imx-tve.c
index 8f8aa4a63122..4826bb781723 100644
--- a/drivers/gpu/drm/imx/imx-tve.c
+++ b/drivers/gpu/drm/imx/imx-tve.c
@@ -98,6 +98,8 @@
 /* TVE_TST_MODE_REG */
 #define TVE_TVDAC_TEST_MODE_MASK	(0x7 << 0)
 
+#define IMX_TVE_DAC_VOLTAGE	2750000
+
 enum {
 	TVE_MODE_TVOUT,
 	TVE_MODE_VGA,
@@ -616,9 +618,8 @@ static int imx_tve_bind(struct device *dev, struct device *master, void *data)
 
 	tve->dac_reg = devm_regulator_get(dev, "dac");
 	if (!IS_ERR(tve->dac_reg)) {
-		ret = regulator_set_voltage(tve->dac_reg, 2750000, 2750000);
-		if (ret)
-			return ret;
+		if (regulator_get_voltage(tve->dac_reg) != IMX_TVE_DAC_VOLTAGE)
+			dev_warn(dev, "dac voltage is not %d uV\n", IMX_TVE_DAC_VOLTAGE);
 		ret = regulator_enable(tve->dac_reg);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/meson/Makefile b/drivers/gpu/drm/meson/Makefile
index 2591978b8aad..92cf84530f49 100644
--- a/drivers/gpu/drm/meson/Makefile
+++ b/drivers/gpu/drm/meson/Makefile
@@ -1,4 +1,4 @@
-meson-y := meson_drv.o meson_plane.o meson_crtc.o meson_venc_cvbs.o
-meson-y += meson_viu.o meson_vpp.o meson_venc.o meson_vclk.o meson_canvas.o
+meson-drm-y := meson_drv.o meson_plane.o meson_crtc.o meson_venc_cvbs.o
+meson-drm-y += meson_viu.o meson_vpp.o meson_venc.o meson_vclk.o meson_canvas.o
 
-obj-$(CONFIG_DRM_MESON) += meson.o
+obj-$(CONFIG_DRM_MESON) += meson-drm.o
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index ff1f6019b97b..6f2fd82ed483 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -279,7 +279,6 @@ static int meson_drv_probe(struct platform_device *pdev)
 	drm->mode_config.funcs = &meson_mode_config_funcs;
 
 	priv->fbdev = drm_fbdev_cma_init(drm, 32,
-					 drm->mode_config.num_crtc,
 					 drm->mode_config.num_connector);
 	if (IS_ERR(priv->fbdev)) {
 		ret = PTR_ERR(priv->fbdev);
@@ -329,8 +328,7 @@ static struct platform_driver meson_drm_platform_driver = {
 	.probe      = meson_drv_probe,
 	.remove     = meson_drv_remove,
 	.driver     = {
-		.owner  = THIS_MODULE,
-		.name   = DRIVER_NAME,
+		.name	= "meson-drm",
 		.of_match_table = dt_match,
 	},
 };
diff --git a/drivers/gpu/drm/mga/mga_drv.h b/drivers/gpu/drm/mga/mga_drv.h
index d5ce829b3199..45cf363d25ad 100644
--- a/drivers/gpu/drm/mga/mga_drv.h
+++ b/drivers/gpu/drm/mga/mga_drv.h
@@ -266,7 +266,7 @@ do {									\
 do {									\
 	if (MGA_VERBOSE) {						\
 		DRM_INFO("BEGIN_DMA(%d)\n", (n));			\
-		DRM_INFO("   space=0x%x req=0x%Zx\n",			\
+		DRM_INFO("   space=0x%x req=0x%zx\n",			\
 			 dev_priv->prim.space, (n) * DMA_BLOCK_SIZE);	\
 	}								\
 	prim = dev_priv->prim.start;					\
@@ -313,7 +313,7 @@ do {									\
 #define DMA_WRITE(offset, val)						\
 do {									\
 	if (MGA_VERBOSE)						\
-		DRM_INFO("   DMA_WRITE( 0x%08x ) at 0x%04Zx\n",		\
+		DRM_INFO("   DMA_WRITE( 0x%08x ) at 0x%04zx\n",		\
 			 (u32)(val), write + (offset) * sizeof(u32));	\
 	*(volatile u32 *)(prim + write + (offset) * sizeof(u32)) = val;	\
 } while (0)
diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c
index 1a665e1671b8..a449bb91213a 100644
--- a/drivers/gpu/drm/mgag200/mgag200_fb.c
+++ b/drivers/gpu/drm/mgag200/mgag200_fb.c
@@ -286,7 +286,7 @@ int mgag200_fbdev_init(struct mga_device *mdev)
 	drm_fb_helper_prepare(mdev->dev, &mfbdev->helper, &mga_fb_helper_funcs);
 
 	ret = drm_fb_helper_init(mdev->dev, &mfbdev->helper,
-				 mdev->num_crtc, MGAG200FB_CONN_LIMIT);
+				 MGAG200FB_CONN_LIMIT);
 	if (ret)
 		goto err_fb_helper;
 
diff --git a/drivers/gpu/drm/msm/Kconfig b/drivers/gpu/drm/msm/Kconfig
index 7f78da695dff..5b8e23d051f2 100644
--- a/drivers/gpu/drm/msm/Kconfig
+++ b/drivers/gpu/drm/msm/Kconfig
@@ -72,3 +72,10 @@ config DRM_MSM_DSI_28NM_8960_PHY
 	help
 	  Choose this option if the 28nm DSI PHY 8960 variant is used on the
 	  platform.
+
+config DRM_MSM_DSI_14NM_PHY
+	bool "Enable DSI 14nm PHY driver in MSM DRM (used by MSM8996/APQ8096)"
+	depends on DRM_MSM_DSI
+	default y
+	help
+	  Choose this option if DSI PHY on 8996 is used on the platform.
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 028c24df2291..39055362da95 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -76,11 +76,13 @@ msm-$(CONFIG_DRM_MSM_DSI) += dsi/dsi.o \
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/phy/dsi_phy_28nm.o
 msm-$(CONFIG_DRM_MSM_DSI_20NM_PHY) += dsi/phy/dsi_phy_20nm.o
 msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/phy/dsi_phy_28nm_8960.o
+msm-$(CONFIG_DRM_MSM_DSI_14NM_PHY) += dsi/phy/dsi_phy_14nm.o
 
 ifeq ($(CONFIG_DRM_MSM_DSI_PLL),y)
 msm-y += dsi/pll/dsi_pll.o
 msm-$(CONFIG_DRM_MSM_DSI_28NM_PHY) += dsi/pll/dsi_pll_28nm.o
 msm-$(CONFIG_DRM_MSM_DSI_28NM_8960_PHY) += dsi/pll/dsi_pll_28nm_8960.o
+msm-$(CONFIG_DRM_MSM_DSI_14NM_PHY) += dsi/pll/dsi_pll_14nm.o
 endif
 
 obj-$(CONFIG_DRM_MSM)	+= msm.o
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index b8647198c11c..4414cf73735d 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -12,6 +12,7 @@
  */
 
 #include "msm_gem.h"
+#include "msm_mmu.h"
 #include "a5xx_gpu.h"
 
 extern bool hang_debug;
@@ -327,7 +328,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 	/* Enable RBBM error reporting bits */
 	gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL0, 0x00000001);
 
-	if (adreno_gpu->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
+	if (adreno_gpu->info->quirks & ADRENO_QUIRK_FAULT_DETECT_MASK) {
 		/*
 		 * Mask out the activity signals from RB1-3 to avoid false
 		 * positives
@@ -381,7 +382,7 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 
 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, (0x400 << 11 | 0x300 << 22));
 
-	if (adreno_gpu->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
+	if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
 		gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 
 	gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
@@ -573,6 +574,19 @@ static bool a5xx_idle(struct msm_gpu *gpu)
 	return true;
 }
 
+static int a5xx_fault_handler(void *arg, unsigned long iova, int flags)
+{
+	struct msm_gpu *gpu = arg;
+	pr_warn_ratelimited("*** gpu fault: iova=%08lx, flags=%d (%u,%u,%u,%u)\n",
+			iova, flags,
+			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(4)),
+			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(5)),
+			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(6)),
+			gpu_read(gpu, REG_A5XX_CP_SCRATCH_REG(7)));
+
+	return -EFAULT;
+}
+
 static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 {
 	u32 status = gpu_read(gpu, REG_A5XX_CP_INTERRUPT_STATUS);
@@ -884,5 +898,8 @@ struct msm_gpu *a5xx_gpu_init(struct drm_device *dev)
 		return ERR_PTR(ret);
 	}
 
+	if (gpu->aspace)
+		msm_mmu_set_fault_handler(gpu->aspace->mmu, gpu, a5xx_fault_handler);
+
 	return gpu;
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index 893eb2b2531b..ece39b16a864 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -75,12 +75,14 @@ static const struct adreno_info gpulist[] = {
 		.gmem  = (SZ_1M + SZ_512K),
 		.init  = a4xx_gpu_init,
 	}, {
-		.rev = ADRENO_REV(5, 3, 0, ANY_ID),
+		.rev = ADRENO_REV(5, 3, 0, 2),
 		.revn = 530,
 		.name = "A530",
 		.pm4fw = "a530_pm4.fw",
 		.pfpfw = "a530_pfp.fw",
 		.gmem = SZ_1M,
+		.quirks = ADRENO_QUIRK_TWO_PASS_USE_WFI |
+			ADRENO_QUIRK_FAULT_DETECT_MASK,
 		.init = a5xx_gpu_init,
 		.gpmufw = "a530v3_gpmu.fw2",
 	},
@@ -181,22 +183,51 @@ static void set_gpu_pdev(struct drm_device *dev,
 	priv->gpu_pdev = pdev;
 }
 
-static const struct {
-	const char *str;
-	uint32_t flag;
-} quirks[] = {
-	{ "qcom,gpu-quirk-two-pass-use-wfi", ADRENO_QUIRK_TWO_PASS_USE_WFI },
-	{ "qcom,gpu-quirk-fault-detect-mask", ADRENO_QUIRK_FAULT_DETECT_MASK },
-};
+static int find_chipid(struct device *dev, u32 *chipid)
+{
+	struct device_node *node = dev->of_node;
+	const char *compat;
+	int ret;
+
+	/* first search the compat strings for qcom,adreno-XYZ.W: */
+	ret = of_property_read_string_index(node, "compatible", 0, &compat);
+	if (ret == 0) {
+		unsigned rev, patch;
+
+		if (sscanf(compat, "qcom,adreno-%u.%u", &rev, &patch) == 2) {
+			*chipid = 0;
+			*chipid |= (rev / 100) << 24;  /* core */
+			rev %= 100;
+			*chipid |= (rev / 10) << 16;   /* major */
+			rev %= 10;
+			*chipid |= rev << 8;           /* minor */
+			*chipid |= patch;
+
+			return 0;
+		}
+	}
+
+	/* and if that fails, fall back to legacy "qcom,chipid" property: */
+	ret = of_property_read_u32(node, "qcom,chipid", chipid);
+	if (ret)
+		return ret;
+
+	dev_warn(dev, "Using legacy qcom,chipid binding!\n");
+	dev_warn(dev, "Use compatible qcom,adreno-%u%u%u.%u instead.\n",
+			(*chipid >> 24) & 0xff, (*chipid >> 16) & 0xff,
+			(*chipid >> 8) & 0xff, *chipid & 0xff);
+
+	return 0;
+}
 
 static int adreno_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct adreno_platform_config config = {};
 	struct device_node *child, *node = dev->of_node;
 	u32 val;
-	int ret, i;
+	int ret;
 
-	ret = of_property_read_u32(node, "qcom,chipid", &val);
+	ret = find_chipid(dev, &val);
 	if (ret) {
 		dev_err(dev, "could not find chipid: %d\n", ret);
 		return ret;
@@ -224,14 +255,12 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
 	}
 
 	if (!config.fast_rate) {
-		dev_err(dev, "could not find clk rates\n");
-		return -ENXIO;
+		dev_warn(dev, "could not find clk rates\n");
+		/* This is a safe low speed for all devices: */
+		config.fast_rate = 200000000;
+		config.slow_rate = 27000000;
 	}
 
-	for (i = 0; i < ARRAY_SIZE(quirks); i++)
-		if (of_property_read_bool(node, quirks[i].str))
-			config.quirks |= quirks[i].flag;
-
 	dev->platform_data = &config;
 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
 	return 0;
@@ -260,6 +289,7 @@ static int adreno_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id dt_match[] = {
+	{ .compatible = "qcom,adreno" },
 	{ .compatible = "qcom,adreno-3xx" },
 	/* for backwards compat w/ downstream kgsl DT files: */
 	{ .compatible = "qcom,kgsl-3d0" },
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index 686a580c711a..c9bd1e6225f4 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -352,7 +352,6 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	adreno_gpu->gmem = adreno_gpu->info->gmem;
 	adreno_gpu->revn = adreno_gpu->info->revn;
 	adreno_gpu->rev = config->rev;
-	adreno_gpu->quirks = config->quirks;
 
 	gpu->fast_rate = config->fast_rate;
 	gpu->slow_rate = config->slow_rate;
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index e8d55b0306ed..42e444a67630 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -75,6 +75,7 @@ struct adreno_info {
 	const char *pm4fw, *pfpfw;
 	const char *gpmufw;
 	uint32_t gmem;
+	enum adreno_quirks quirks;
 	struct msm_gpu *(*init)(struct drm_device *dev);
 };
 
@@ -116,8 +117,6 @@ struct adreno_gpu {
 	 * code (a3xx_gpu.c) and stored in this common location.
 	 */
 	const unsigned int *reg_offsets;
-
-	uint32_t quirks;
 };
 #define to_adreno_gpu(x) container_of(x, struct adreno_gpu, base)
 
@@ -128,7 +127,6 @@ struct adreno_platform_config {
 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
 	struct msm_bus_scale_pdata *bus_scale_table;
 #endif
-	uint32_t quirks;
 };
 
 #define ADRENO_IDLE_TIMEOUT msecs_to_jiffies(1000)
diff --git a/drivers/gpu/drm/msm/dsi/dsi.c b/drivers/gpu/drm/msm/dsi/dsi.c
index ec572f8389ed..311c1c1e7d6c 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.c
+++ b/drivers/gpu/drm/msm/dsi/dsi.c
@@ -18,9 +18,7 @@ struct drm_encoder *msm_dsi_get_encoder(struct msm_dsi *msm_dsi)
 	if (!msm_dsi || !msm_dsi_device_connected(msm_dsi))
 		return NULL;
 
-	return (msm_dsi->device_flags & MIPI_DSI_MODE_VIDEO) ?
-		msm_dsi->encoders[MSM_DSI_VIDEO_ENCODER_ID] :
-		msm_dsi->encoders[MSM_DSI_CMD_ENCODER_ID];
+	return msm_dsi->encoder;
 }
 
 static int dsi_get_phy(struct msm_dsi *msm_dsi)
@@ -187,14 +185,13 @@ void __exit msm_dsi_unregister(void)
 }
 
 int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
-		struct drm_encoder *encoders[MSM_DSI_ENCODER_NUM])
+			 struct drm_encoder *encoder)
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_bridge *ext_bridge;
-	int ret, i;
+	int ret;
 
-	if (WARN_ON(!encoders[MSM_DSI_VIDEO_ENCODER_ID] ||
-		!encoders[MSM_DSI_CMD_ENCODER_ID]))
+	if (WARN_ON(!encoder))
 		return -EINVAL;
 
 	msm_dsi->dev = dev;
@@ -205,6 +202,8 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
 		goto fail;
 	}
 
+	msm_dsi->encoder = encoder;
+
 	msm_dsi->bridge = msm_dsi_manager_bridge_init(msm_dsi->id);
 	if (IS_ERR(msm_dsi->bridge)) {
 		ret = PTR_ERR(msm_dsi->bridge);
@@ -213,11 +212,6 @@ int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
 		goto fail;
 	}
 
-	for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
-		encoders[i]->bridge = msm_dsi->bridge;
-		msm_dsi->encoders[i] = encoders[i];
-	}
-
 	/*
 	 * check if the dsi encoder output is connected to a panel or an
 	 * external bridge. We create a connector only if we're connected to a
diff --git a/drivers/gpu/drm/msm/dsi/dsi.h b/drivers/gpu/drm/msm/dsi/dsi.h
index 03f115f532c2..32369975d155 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.h
@@ -27,14 +27,24 @@
 #define DSI_1	1
 #define DSI_MAX	2
 
+struct msm_dsi_phy_shared_timings;
+struct msm_dsi_phy_clk_request;
+
 enum msm_dsi_phy_type {
 	MSM_DSI_PHY_28NM_HPM,
 	MSM_DSI_PHY_28NM_LP,
 	MSM_DSI_PHY_20NM,
 	MSM_DSI_PHY_28NM_8960,
+	MSM_DSI_PHY_14NM,
 	MSM_DSI_PHY_MAX
 };
 
+enum msm_dsi_phy_usecase {
+	MSM_DSI_PHY_STANDALONE,
+	MSM_DSI_PHY_MASTER,
+	MSM_DSI_PHY_SLAVE,
+};
+
 #define DSI_DEV_REGULATOR_MAX	8
 #define DSI_BUS_CLK_MAX		4
 
@@ -73,8 +83,8 @@ struct msm_dsi {
 	struct device *phy_dev;
 	bool phy_enabled;
 
-	/* the encoders we are hooked to (outside of dsi block) */
-	struct drm_encoder *encoders[MSM_DSI_ENCODER_NUM];
+	/* the encoder we are hooked to (outside of dsi block) */
+	struct drm_encoder *encoder;
 
 	int id;
 };
@@ -84,12 +94,9 @@ struct drm_bridge *msm_dsi_manager_bridge_init(u8 id);
 void msm_dsi_manager_bridge_destroy(struct drm_bridge *bridge);
 struct drm_connector *msm_dsi_manager_connector_init(u8 id);
 struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id);
-int msm_dsi_manager_phy_enable(int id,
-		const unsigned long bit_rate, const unsigned long esc_rate,
-		u32 *clk_pre, u32 *clk_post);
-void msm_dsi_manager_phy_disable(int id);
 int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg);
 bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len);
+void msm_dsi_manager_attach_dsi_device(int id, u32 device_flags);
 int msm_dsi_manager_register(struct msm_dsi *msm_dsi);
 void msm_dsi_manager_unregister(struct msm_dsi *msm_dsi);
 
@@ -111,6 +118,8 @@ int msm_dsi_pll_get_clk_provider(struct msm_dsi_pll *pll,
 	struct clk **byte_clk_provider, struct clk **pixel_clk_provider);
 void msm_dsi_pll_save_state(struct msm_dsi_pll *pll);
 int msm_dsi_pll_restore_state(struct msm_dsi_pll *pll);
+int msm_dsi_pll_set_usecase(struct msm_dsi_pll *pll,
+			    enum msm_dsi_phy_usecase uc);
 #else
 static inline struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev,
 			 enum msm_dsi_phy_type type, int id) {
@@ -131,6 +140,11 @@ static inline int msm_dsi_pll_restore_state(struct msm_dsi_pll *pll)
 {
 	return 0;
 }
+static inline int msm_dsi_pll_set_usecase(struct msm_dsi_pll *pll,
+					  enum msm_dsi_phy_usecase uc)
+{
+	return -ENODEV;
+}
 #endif
 
 /* dsi host */
@@ -146,7 +160,8 @@ void msm_dsi_host_cmd_xfer_commit(struct mipi_dsi_host *host,
 					u32 dma_base, u32 len);
 int msm_dsi_host_enable(struct mipi_dsi_host *host);
 int msm_dsi_host_disable(struct mipi_dsi_host *host);
-int msm_dsi_host_power_on(struct mipi_dsi_host *host);
+int msm_dsi_host_power_on(struct mipi_dsi_host *host,
+			struct msm_dsi_phy_shared_timings *phy_shared_timings);
 int msm_dsi_host_power_off(struct mipi_dsi_host *host);
 int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 					struct drm_display_mode *mode);
@@ -157,6 +172,9 @@ int msm_dsi_host_register(struct mipi_dsi_host *host, bool check_defer);
 void msm_dsi_host_unregister(struct mipi_dsi_host *host);
 int msm_dsi_host_set_src_pll(struct mipi_dsi_host *host,
 			struct msm_dsi_pll *src_pll);
+void msm_dsi_host_reset_phy(struct mipi_dsi_host *host);
+void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
+	struct msm_dsi_phy_clk_request *clk_req);
 void msm_dsi_host_destroy(struct mipi_dsi_host *host);
 int msm_dsi_host_modeset_init(struct mipi_dsi_host *host,
 					struct drm_device *dev);
@@ -164,14 +182,27 @@ int msm_dsi_host_init(struct msm_dsi *msm_dsi);
 
 /* dsi phy */
 struct msm_dsi_phy;
+struct msm_dsi_phy_shared_timings {
+	u32 clk_post;
+	u32 clk_pre;
+	bool clk_pre_inc_by_2;
+};
+
+struct msm_dsi_phy_clk_request {
+	unsigned long bitclk_rate;
+	unsigned long escclk_rate;
+};
+
 void msm_dsi_phy_driver_register(void);
 void msm_dsi_phy_driver_unregister(void);
 int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
-	const unsigned long bit_rate, const unsigned long esc_rate);
+			struct msm_dsi_phy_clk_request *clk_req);
 void msm_dsi_phy_disable(struct msm_dsi_phy *phy);
-void msm_dsi_phy_get_clk_pre_post(struct msm_dsi_phy *phy,
-					u32 *clk_pre, u32 *clk_post);
+void msm_dsi_phy_get_shared_timings(struct msm_dsi_phy *phy,
+			struct msm_dsi_phy_shared_timings *shared_timing);
 struct msm_dsi_pll *msm_dsi_phy_get_pll(struct msm_dsi_phy *phy);
+void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy,
+			     enum msm_dsi_phy_usecase uc);
 
 #endif /* __DSI_CONNECTOR_H__ */
 
diff --git a/drivers/gpu/drm/msm/dsi/dsi.xml.h b/drivers/gpu/drm/msm/dsi/dsi.xml.h
index 39dff7d5e89b..b3d70ea42891 100644
--- a/drivers/gpu/drm/msm/dsi/dsi.xml.h
+++ b/drivers/gpu/drm/msm/dsi/dsi.xml.h
@@ -8,19 +8,10 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  36965 bytes, from 2016-11-26 23:01:08)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  27887 bytes, from 2015-10-22 16:34:52)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
-- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
-
-Copyright (C) 2013-2015 by the following authors:
+- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/dsi/dsi.xml    (  33004 bytes, from 2017-01-11 05:19:19)
+- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-05-09 06:32:54)
+
+Copyright (C) 2013-2017 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
@@ -1304,5 +1295,257 @@ static inline uint32_t DSI_20nm_PHY_TIMING_CTRL_11_TRIG3_CMD(uint32_t val)
 
 #define REG_DSI_20nm_PHY_REGULATOR_CAL_PWR_CFG			0x00000018
 
+#define REG_DSI_14nm_PHY_CMN_REVISION_ID0			0x00000000
+
+#define REG_DSI_14nm_PHY_CMN_REVISION_ID1			0x00000004
+
+#define REG_DSI_14nm_PHY_CMN_REVISION_ID2			0x00000008
+
+#define REG_DSI_14nm_PHY_CMN_REVISION_ID3			0x0000000c
+
+#define REG_DSI_14nm_PHY_CMN_CLK_CFG0				0x00000010
+#define DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0__MASK		0x000000f0
+#define DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0__SHIFT		4
+static inline uint32_t DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0__SHIFT) & DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_3_0__MASK;
+}
+#define DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4__MASK		0x000000f0
+#define DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4__SHIFT		4
+static inline uint32_t DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4__SHIFT) & DSI_14nm_PHY_CMN_CLK_CFG0_DIV_CTRL_7_4__MASK;
+}
+
+#define REG_DSI_14nm_PHY_CMN_CLK_CFG1				0x00000014
+#define DSI_14nm_PHY_CMN_CLK_CFG1_DSICLK_SEL			0x00000001
+
+#define REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL			0x00000018
+#define DSI_14nm_PHY_CMN_GLBL_TEST_CTRL_BITCLK_HS_SEL		0x00000004
+
+#define REG_DSI_14nm_PHY_CMN_CTRL_0				0x0000001c
+
+#define REG_DSI_14nm_PHY_CMN_CTRL_1				0x00000020
+
+#define REG_DSI_14nm_PHY_CMN_HW_TRIGGER				0x00000024
+
+#define REG_DSI_14nm_PHY_CMN_SW_CFG0				0x00000028
+
+#define REG_DSI_14nm_PHY_CMN_SW_CFG1				0x0000002c
+
+#define REG_DSI_14nm_PHY_CMN_SW_CFG2				0x00000030
+
+#define REG_DSI_14nm_PHY_CMN_HW_CFG0				0x00000034
+
+#define REG_DSI_14nm_PHY_CMN_HW_CFG1				0x00000038
+
+#define REG_DSI_14nm_PHY_CMN_HW_CFG2				0x0000003c
+
+#define REG_DSI_14nm_PHY_CMN_HW_CFG3				0x00000040
+
+#define REG_DSI_14nm_PHY_CMN_HW_CFG4				0x00000044
+
+#define REG_DSI_14nm_PHY_CMN_PLL_CNTRL				0x00000048
+#define DSI_14nm_PHY_CMN_PLL_CNTRL_PLL_START			0x00000001
+
+#define REG_DSI_14nm_PHY_CMN_LDO_CNTRL				0x0000004c
+#define DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL__MASK		0x0000003f
+#define DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL__SHIFT) & DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN(uint32_t i0) { return 0x00000000 + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_CFG0(uint32_t i0) { return 0x00000000 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_CFG0_PREPARE_DLY__MASK			0x000000c0
+#define DSI_14nm_PHY_LN_CFG0_PREPARE_DLY__SHIFT			6
+static inline uint32_t DSI_14nm_PHY_LN_CFG0_PREPARE_DLY(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_CFG0_PREPARE_DLY__SHIFT) & DSI_14nm_PHY_LN_CFG0_PREPARE_DLY__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_CFG1(uint32_t i0) { return 0x00000004 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_CFG1_HALFBYTECLK_EN			0x00000001
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_CFG2(uint32_t i0) { return 0x00000008 + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_CFG3(uint32_t i0) { return 0x0000000c + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TEST_DATAPATH(uint32_t i0) { return 0x00000010 + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TEST_STR(uint32_t i0) { return 0x00000014 + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_4(uint32_t i0) { return 0x00000018 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_5(uint32_t i0) { return 0x0000001c + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_6(uint32_t i0) { return 0x00000020 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_7(uint32_t i0) { return 0x00000024 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_8(uint32_t i0) { return 0x00000028 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_9(uint32_t i0) { return 0x0000002c + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO__MASK		0x00000007
+#define DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO__MASK;
+}
+#define DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE__MASK		0x00000070
+#define DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE__SHIFT		4
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_10(uint32_t i0) { return 0x00000030 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET__MASK		0x00000007
+#define DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_TIMING_CTRL_11(uint32_t i0) { return 0x00000034 + 0x80*i0; }
+#define DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD__MASK		0x000000ff
+#define DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD__SHIFT		0
+static inline uint32_t DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD(uint32_t val)
+{
+	return ((val) << DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD__SHIFT) & DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD__MASK;
+}
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_STRENGTH_CTRL_0(uint32_t i0) { return 0x00000038 + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_STRENGTH_CTRL_1(uint32_t i0) { return 0x0000003c + 0x80*i0; }
+
+static inline uint32_t REG_DSI_14nm_PHY_LN_VREG_CNTRL(uint32_t i0) { return 0x00000064 + 0x80*i0; }
+
+#define REG_DSI_14nm_PHY_PLL_IE_TRIM				0x00000000
+
+#define REG_DSI_14nm_PHY_PLL_IP_TRIM				0x00000004
+
+#define REG_DSI_14nm_PHY_PLL_IPTAT_TRIM				0x00000010
+
+#define REG_DSI_14nm_PHY_PLL_CLKBUFLR_EN			0x0000001c
+
+#define REG_DSI_14nm_PHY_PLL_SYSCLK_EN_RESET			0x00000028
+
+#define REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL			0x0000002c
+
+#define REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL2			0x00000030
+
+#define REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL3			0x00000034
+
+#define REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL4			0x00000038
+
+#define REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL5			0x0000003c
+
+#define REG_DSI_14nm_PHY_PLL_KVCO_DIV_REF1			0x00000040
+
+#define REG_DSI_14nm_PHY_PLL_KVCO_DIV_REF2			0x00000044
+
+#define REG_DSI_14nm_PHY_PLL_KVCO_COUNT1			0x00000048
+
+#define REG_DSI_14nm_PHY_PLL_KVCO_COUNT2			0x0000004c
+
+#define REG_DSI_14nm_PHY_PLL_VREF_CFG1				0x0000005c
+
+#define REG_DSI_14nm_PHY_PLL_KVCO_CODE				0x00000058
+
+#define REG_DSI_14nm_PHY_PLL_VCO_DIV_REF1			0x0000006c
+
+#define REG_DSI_14nm_PHY_PLL_VCO_DIV_REF2			0x00000070
+
+#define REG_DSI_14nm_PHY_PLL_VCO_COUNT1				0x00000074
+
+#define REG_DSI_14nm_PHY_PLL_VCO_COUNT2				0x00000078
+
+#define REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP1			0x0000007c
+
+#define REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP2			0x00000080
+
+#define REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP3			0x00000084
+
+#define REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP_EN			0x00000088
+
+#define REG_DSI_14nm_PHY_PLL_PLL_VCO_TUNE			0x0000008c
+
+#define REG_DSI_14nm_PHY_PLL_DEC_START				0x00000090
+
+#define REG_DSI_14nm_PHY_PLL_SSC_EN_CENTER			0x00000094
+
+#define REG_DSI_14nm_PHY_PLL_SSC_ADJ_PER1			0x00000098
+
+#define REG_DSI_14nm_PHY_PLL_SSC_ADJ_PER2			0x0000009c
+
+#define REG_DSI_14nm_PHY_PLL_SSC_PER1				0x000000a0
+
+#define REG_DSI_14nm_PHY_PLL_SSC_PER2				0x000000a4
+
+#define REG_DSI_14nm_PHY_PLL_SSC_STEP_SIZE1			0x000000a8
+
+#define REG_DSI_14nm_PHY_PLL_SSC_STEP_SIZE2			0x000000ac
+
+#define REG_DSI_14nm_PHY_PLL_DIV_FRAC_START1			0x000000b4
+
+#define REG_DSI_14nm_PHY_PLL_DIV_FRAC_START2			0x000000b8
+
+#define REG_DSI_14nm_PHY_PLL_DIV_FRAC_START3			0x000000bc
+
+#define REG_DSI_14nm_PHY_PLL_TXCLK_EN				0x000000c0
+
+#define REG_DSI_14nm_PHY_PLL_PLL_CRCTRL				0x000000c4
+
+#define REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS		0x000000cc
+
+#define REG_DSI_14nm_PHY_PLL_PLL_MISC1				0x000000e8
+
+#define REG_DSI_14nm_PHY_PLL_CP_SET_CUR				0x000000f0
+
+#define REG_DSI_14nm_PHY_PLL_PLL_ICPMSET			0x000000f4
+
+#define REG_DSI_14nm_PHY_PLL_PLL_ICPCSET			0x000000f8
+
+#define REG_DSI_14nm_PHY_PLL_PLL_ICP_SET			0x000000fc
+
+#define REG_DSI_14nm_PHY_PLL_PLL_LPF1				0x00000100
+
+#define REG_DSI_14nm_PHY_PLL_PLL_LPF2_POSTDIV			0x00000104
+
+#define REG_DSI_14nm_PHY_PLL_PLL_BANDGAP			0x00000108
+
 
 #endif /* DSI_XML */
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.c b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
index 63436d8ee470..a5d75c9b3a73 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.c
@@ -94,6 +94,30 @@ static const struct msm_dsi_config msm8994_dsi_cfg = {
 	.num_dsi = 2,
 };
 
+/*
+ * TODO: core_mmss_clk fails to enable for some reason, but things work fine
+ * without it too. Figure out why it doesn't enable and uncomment below
+ */
+static const char * const dsi_8996_bus_clk_names[] = {
+	"mdp_core_clk", "iface_clk", "bus_clk", /* "core_mmss_clk", */
+};
+
+static const struct msm_dsi_config msm8996_dsi_cfg = {
+	.io_offset = DSI_6G_REG_SHIFT,
+	.reg_cfg = {
+		.num = 2,
+		.regs = {
+			{"vdda", 18160, 1 },	/* 1.25 V */
+			{"vcca", 17000, 32 },	/* 0.925 V */
+			{"vddio", 100000, 100 },/* 1.8 V */
+		},
+	},
+	.bus_clk_names = dsi_8996_bus_clk_names,
+	.num_bus_clks = ARRAY_SIZE(dsi_8996_bus_clk_names),
+	.io_start = { 0x994000, 0x996000 },
+	.num_dsi = 2,
+};
+
 static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
 	{MSM_DSI_VER_MAJOR_V2, MSM_DSI_V2_VER_MINOR_8064, &apq8064_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_0,
@@ -106,6 +130,7 @@ static const struct msm_dsi_cfg_handler dsi_cfg_handlers[] = {
 						&msm8974_apq8084_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_3, &msm8994_dsi_cfg},
 	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_3_1, &msm8916_dsi_cfg},
+	{MSM_DSI_VER_MAJOR_6G, MSM_DSI_6G_VER_MINOR_V1_4_1, &msm8996_dsi_cfg},
 };
 
 const struct msm_dsi_cfg_handler *msm_dsi_cfg_get(u32 major, u32 minor)
diff --git a/drivers/gpu/drm/msm/dsi/dsi_cfg.h b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
index eeacc3232494..00a5da2663c6 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_cfg.h
+++ b/drivers/gpu/drm/msm/dsi/dsi_cfg.h
@@ -24,6 +24,7 @@
 #define MSM_DSI_6G_VER_MINOR_V1_2	0x10020000
 #define MSM_DSI_6G_VER_MINOR_V1_3	0x10030000
 #define MSM_DSI_6G_VER_MINOR_V1_3_1	0x10030001
+#define MSM_DSI_6G_VER_MINOR_V1_4_1	0x10040001
 
 #define MSM_DSI_V2_VER_MINOR_8064	0x0
 
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 3819fdefcae2..1fc07ce24686 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -691,17 +691,6 @@ static int dsi_calc_clk_rate(struct msm_dsi_host *msm_host)
 	return 0;
 }
 
-static void dsi_phy_sw_reset(struct msm_dsi_host *msm_host)
-{
-	DBG("");
-	dsi_write(msm_host, REG_DSI_PHY_RESET, DSI_PHY_RESET_RESET);
-	/* Make sure fully reset */
-	wmb();
-	udelay(1000);
-	dsi_write(msm_host, REG_DSI_PHY_RESET, 0);
-	udelay(100);
-}
-
 static void dsi_intr_ctrl(struct msm_dsi_host *msm_host, u32 mask, int enable)
 {
 	u32 intr;
@@ -756,7 +745,7 @@ static inline enum dsi_cmd_dst_format dsi_get_cmd_fmt(
 }
 
 static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
-				u32 clk_pre, u32 clk_post)
+			struct msm_dsi_phy_shared_timings *phy_shared_timings)
 {
 	u32 flags = msm_host->mode_flags;
 	enum mipi_dsi_pixel_format mipi_fmt = msm_host->format;
@@ -819,10 +808,16 @@ static void dsi_ctrl_config(struct msm_dsi_host *msm_host, bool enable,
 		data |= DSI_TRIG_CTRL_BLOCK_DMA_WITHIN_FRAME;
 	dsi_write(msm_host, REG_DSI_TRIG_CTRL, data);
 
-	data = DSI_CLKOUT_TIMING_CTRL_T_CLK_POST(clk_post) |
-		DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE(clk_pre);
+	data = DSI_CLKOUT_TIMING_CTRL_T_CLK_POST(phy_shared_timings->clk_post) |
+		DSI_CLKOUT_TIMING_CTRL_T_CLK_PRE(phy_shared_timings->clk_pre);
 	dsi_write(msm_host, REG_DSI_CLKOUT_TIMING_CTRL, data);
 
+	if ((cfg_hnd->major == MSM_DSI_VER_MAJOR_6G) &&
+	    (cfg_hnd->minor > MSM_DSI_6G_VER_MINOR_V1_0) &&
+	    phy_shared_timings->clk_pre_inc_by_2)
+		dsi_write(msm_host, REG_DSI_T_CLK_PRE_EXTEND,
+			  DSI_T_CLK_PRE_EXTEND_INC_BY_2_BYTECLK);
+
 	data = 0;
 	if (!(flags & MIPI_DSI_MODE_EOT_PACKET))
 		data |= DSI_EOT_PACKET_CTRL_TX_EOT_APPEND;
@@ -1482,6 +1477,8 @@ static int dsi_host_attach(struct mipi_dsi_host *host,
 	msm_host->format = dsi->format;
 	msm_host->mode_flags = dsi->mode_flags;
 
+	msm_dsi_manager_attach_dsi_device(msm_host->id, dsi->mode_flags);
+
 	/* Some gpios defined in panel DT need to be controlled by host */
 	ret = dsi_host_init_panel_gpios(msm_host, &dsi->dev);
 	if (ret)
@@ -1557,8 +1554,9 @@ static int dsi_host_parse_lane_data(struct msm_dsi_host *msm_host,
 
 	prop = of_find_property(ep, "data-lanes", &len);
 	if (!prop) {
-		dev_dbg(dev, "failed to find data lane mapping\n");
-		return -EINVAL;
+		dev_dbg(dev,
+			"failed to find data lane mapping, using default\n");
+		return 0;
 	}
 
 	num_lanes = len / sizeof(u32);
@@ -1615,7 +1613,7 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 	struct device *dev = &msm_host->pdev->dev;
 	struct device_node *np = dev->of_node;
 	struct device_node *endpoint, *device_node;
-	int ret;
+	int ret = 0;
 
 	/*
 	 * Get the endpoint of the output port of the DSI host. In our case,
@@ -1639,8 +1637,7 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 	/* Get panel node from the output port's endpoint data */
 	device_node = of_graph_get_remote_port_parent(endpoint);
 	if (!device_node) {
-		dev_err(dev, "%s: no valid device\n", __func__);
-		ret = -ENODEV;
+		dev_dbg(dev, "%s: no valid device\n", __func__);
 		goto err;
 	}
 
@@ -2118,6 +2115,28 @@ exit:
 	return ret;
 }
 
+void msm_dsi_host_reset_phy(struct mipi_dsi_host *host)
+{
+	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+
+	DBG("");
+	dsi_write(msm_host, REG_DSI_PHY_RESET, DSI_PHY_RESET_RESET);
+	/* Make sure fully reset */
+	wmb();
+	udelay(1000);
+	dsi_write(msm_host, REG_DSI_PHY_RESET, 0);
+	udelay(100);
+}
+
+void msm_dsi_host_get_phy_clk_req(struct mipi_dsi_host *host,
+	struct msm_dsi_phy_clk_request *clk_req)
+{
+	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+
+	clk_req->bitclk_rate = msm_host->byte_clk_rate * 8;
+	clk_req->escclk_rate = msm_host->esc_clk_rate;
+}
+
 int msm_dsi_host_enable(struct mipi_dsi_host *host)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
@@ -2165,10 +2184,10 @@ static void msm_dsi_sfpb_config(struct msm_dsi_host *msm_host, bool enable)
 			SFPB_GPREG_MASTER_PORT_EN(en));
 }
 
-int msm_dsi_host_power_on(struct mipi_dsi_host *host)
+int msm_dsi_host_power_on(struct mipi_dsi_host *host,
+			struct msm_dsi_phy_shared_timings *phy_shared_timings)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
-	u32 clk_pre = 0, clk_post = 0;
 	int ret = 0;
 
 	mutex_lock(&msm_host->dev_mutex);
@@ -2179,12 +2198,6 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 
 	msm_dsi_sfpb_config(msm_host, true);
 
-	ret = dsi_calc_clk_rate(msm_host);
-	if (ret) {
-		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
-		goto unlock_ret;
-	}
-
 	ret = dsi_host_regulator_enable(msm_host);
 	if (ret) {
 		pr_err("%s:Failed to enable vregs.ret=%d\n",
@@ -2192,23 +2205,6 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 		goto unlock_ret;
 	}
 
-	ret = dsi_bus_clk_enable(msm_host);
-	if (ret) {
-		pr_err("%s: failed to enable bus clocks, %d\n", __func__, ret);
-		goto fail_disable_reg;
-	}
-
-	dsi_phy_sw_reset(msm_host);
-	ret = msm_dsi_manager_phy_enable(msm_host->id,
-					msm_host->byte_clk_rate * 8,
-					msm_host->esc_clk_rate,
-					&clk_pre, &clk_post);
-	dsi_bus_clk_disable(msm_host);
-	if (ret) {
-		pr_err("%s: failed to enable phy, %d\n", __func__, ret);
-		goto fail_disable_reg;
-	}
-
 	ret = dsi_clk_ctrl(msm_host, 1);
 	if (ret) {
 		pr_err("%s: failed to enable clocks. ret=%d\n", __func__, ret);
@@ -2224,7 +2220,7 @@ int msm_dsi_host_power_on(struct mipi_dsi_host *host)
 
 	dsi_timing_setup(msm_host);
 	dsi_sw_reset(msm_host);
-	dsi_ctrl_config(msm_host, true, clk_pre, clk_post);
+	dsi_ctrl_config(msm_host, true, phy_shared_timings);
 
 	if (msm_host->disp_en_gpio)
 		gpiod_set_value(msm_host->disp_en_gpio, 1);
@@ -2253,15 +2249,13 @@ int msm_dsi_host_power_off(struct mipi_dsi_host *host)
 		goto unlock_ret;
 	}
 
-	dsi_ctrl_config(msm_host, false, 0, 0);
+	dsi_ctrl_config(msm_host, false, NULL);
 
 	if (msm_host->disp_en_gpio)
 		gpiod_set_value(msm_host->disp_en_gpio, 0);
 
 	pinctrl_pm_select_sleep_state(&msm_host->pdev->dev);
 
-	msm_dsi_manager_phy_disable(msm_host->id);
-
 	dsi_clk_ctrl(msm_host, 0);
 
 	dsi_host_regulator_disable(msm_host);
@@ -2281,6 +2275,7 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 					struct drm_display_mode *mode)
 {
 	struct msm_dsi_host *msm_host = to_msm_dsi_host(host);
+	int ret;
 
 	if (msm_host->mode) {
 		drm_mode_destroy(msm_host->dev, msm_host->mode);
@@ -2293,6 +2288,12 @@ int msm_dsi_host_set_display_mode(struct mipi_dsi_host *host,
 		return -ENOMEM;
 	}
 
+	ret = dsi_calc_clk_rate(msm_host);
+	if (ret) {
+		pr_err("%s: unable to calc clk rate, %d\n", __func__, ret);
+		return ret;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 2bd8dad76105..921270ea6059 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -72,11 +72,12 @@ static int dsi_mgr_parse_dual_dsi(struct device_node *np, int id)
 	return 0;
 }
 
-static int dsi_mgr_host_register(int id)
+static int dsi_mgr_setup_components(int id)
 {
 	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
 	struct msm_dsi *other_dsi = dsi_mgr_get_other_dsi(id);
 	struct msm_dsi *clk_master_dsi = dsi_mgr_get_dsi(DSI_CLOCK_MASTER);
+	struct msm_dsi *clk_slave_dsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE);
 	struct msm_dsi_pll *src_pll;
 	int ret;
 
@@ -85,15 +86,16 @@ static int dsi_mgr_host_register(int id)
 		if (ret)
 			return ret;
 
+		msm_dsi_phy_set_usecase(msm_dsi->phy, MSM_DSI_PHY_STANDALONE);
 		src_pll = msm_dsi_phy_get_pll(msm_dsi->phy);
 		ret = msm_dsi_host_set_src_pll(msm_dsi->host, src_pll);
 	} else if (!other_dsi) {
 		ret = 0;
 	} else {
-		struct msm_dsi *mdsi = IS_MASTER_DSI_LINK(id) ?
-					msm_dsi : other_dsi;
-		struct msm_dsi *sdsi = IS_MASTER_DSI_LINK(id) ?
-					other_dsi : msm_dsi;
+		struct msm_dsi *master_link_dsi = IS_MASTER_DSI_LINK(id) ?
+							msm_dsi : other_dsi;
+		struct msm_dsi *slave_link_dsi = IS_MASTER_DSI_LINK(id) ?
+							other_dsi : msm_dsi;
 		/* Register slave host first, so that slave DSI device
 		 * has a chance to probe, and do not block the master
 		 * DSI device's probe.
@@ -101,14 +103,18 @@ static int dsi_mgr_host_register(int id)
 		 * because only master DSI device adds the panel to global
 		 * panel list. The panel's device is the master DSI device.
 		 */
-		ret = msm_dsi_host_register(sdsi->host, false);
+		ret = msm_dsi_host_register(slave_link_dsi->host, false);
 		if (ret)
 			return ret;
-		ret = msm_dsi_host_register(mdsi->host, true);
+		ret = msm_dsi_host_register(master_link_dsi->host, true);
 		if (ret)
 			return ret;
 
 		/* PLL0 is to drive both 2 DSI link clocks in Dual DSI mode. */
+		msm_dsi_phy_set_usecase(clk_master_dsi->phy,
+					MSM_DSI_PHY_MASTER);
+		msm_dsi_phy_set_usecase(clk_slave_dsi->phy,
+					MSM_DSI_PHY_SLAVE);
 		src_pll = msm_dsi_phy_get_pll(clk_master_dsi->phy);
 		ret = msm_dsi_host_set_src_pll(msm_dsi->host, src_pll);
 		if (ret)
@@ -119,6 +125,84 @@ static int dsi_mgr_host_register(int id)
 	return ret;
 }
 
+static int enable_phy(struct msm_dsi *msm_dsi, int src_pll_id,
+		      struct msm_dsi_phy_shared_timings *shared_timings)
+{
+	struct msm_dsi_phy_clk_request clk_req;
+	int ret;
+
+	msm_dsi_host_get_phy_clk_req(msm_dsi->host, &clk_req);
+
+	ret = msm_dsi_phy_enable(msm_dsi->phy, src_pll_id, &clk_req);
+	msm_dsi_phy_get_shared_timings(msm_dsi->phy, shared_timings);
+
+	return ret;
+}
+
+static int
+dsi_mgr_phy_enable(int id,
+		   struct msm_dsi_phy_shared_timings shared_timings[DSI_MAX])
+{
+	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
+	struct msm_dsi *mdsi = dsi_mgr_get_dsi(DSI_CLOCK_MASTER);
+	struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE);
+	int src_pll_id = IS_DUAL_DSI() ? DSI_CLOCK_MASTER : id;
+	int ret;
+
+	/* In case of dual DSI, some registers in PHY1 have been programmed
+	 * during PLL0 clock's set_rate. The PHY1 reset called by host1 here
+	 * will silently reset those PHY1 registers. Therefore we need to reset
+	 * and enable both PHYs before any PLL clock operation.
+	 */
+	if (IS_DUAL_DSI() && mdsi && sdsi) {
+		if (!mdsi->phy_enabled && !sdsi->phy_enabled) {
+			msm_dsi_host_reset_phy(mdsi->host);
+			msm_dsi_host_reset_phy(sdsi->host);
+
+			ret = enable_phy(mdsi, src_pll_id,
+					 &shared_timings[DSI_CLOCK_MASTER]);
+			if (ret)
+				return ret;
+			ret = enable_phy(sdsi, src_pll_id,
+					 &shared_timings[DSI_CLOCK_SLAVE]);
+			if (ret) {
+				msm_dsi_phy_disable(mdsi->phy);
+				return ret;
+			}
+		}
+	} else {
+		msm_dsi_host_reset_phy(mdsi->host);
+		ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]);
+		if (ret)
+			return ret;
+	}
+
+	msm_dsi->phy_enabled = true;
+
+	return 0;
+}
+
+static void dsi_mgr_phy_disable(int id)
+{
+	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
+	struct msm_dsi *mdsi = dsi_mgr_get_dsi(DSI_CLOCK_MASTER);
+	struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE);
+
+	/* disable DSI phy
+	 * In dual-dsi configuration, the phy should be disabled for the
+	 * first controller only when the second controller is disabled.
+	 */
+	msm_dsi->phy_enabled = false;
+	if (IS_DUAL_DSI() && mdsi && sdsi) {
+		if (!mdsi->phy_enabled && !sdsi->phy_enabled) {
+			msm_dsi_phy_disable(sdsi->phy);
+			msm_dsi_phy_disable(mdsi->phy);
+		}
+	} else {
+		msm_dsi_phy_disable(msm_dsi->phy);
+	}
+}
+
 struct dsi_connector {
 	struct drm_connector base;
 	int id;
@@ -168,6 +252,16 @@ static enum drm_connector_status dsi_mgr_connector_detect(
 			msm_dsi->panel = msm_dsi_host_get_panel(
 					other_dsi->host, NULL);
 
+
+		if (msm_dsi->panel && kms->funcs->set_encoder_mode) {
+			bool cmd_mode = !(msm_dsi->device_flags &
+					  MIPI_DSI_MODE_VIDEO);
+			struct drm_encoder *encoder =
+					msm_dsi_get_encoder(msm_dsi);
+
+			kms->funcs->set_encoder_mode(kms, encoder, cmd_mode);
+		}
+
 		if (msm_dsi->panel && IS_DUAL_DSI())
 			drm_object_attach_property(&connector->base,
 				connector->dev->mode_config.tile_property, 0);
@@ -344,22 +438,31 @@ static void dsi_mgr_bridge_pre_enable(struct drm_bridge *bridge)
 	struct msm_dsi *msm_dsi1 = dsi_mgr_get_dsi(DSI_1);
 	struct mipi_dsi_host *host = msm_dsi->host;
 	struct drm_panel *panel = msm_dsi->panel;
+	struct msm_dsi_phy_shared_timings phy_shared_timings[DSI_MAX];
 	bool is_dual_dsi = IS_DUAL_DSI();
 	int ret;
 
 	DBG("id=%d", id);
-	if (!msm_dsi_device_connected(msm_dsi) ||
-			(is_dual_dsi && (DSI_1 == id)))
+	if (!msm_dsi_device_connected(msm_dsi))
 		return;
 
-	ret = msm_dsi_host_power_on(host);
+	ret = dsi_mgr_phy_enable(id, phy_shared_timings);
+	if (ret)
+		goto phy_en_fail;
+
+	/* Do nothing with the host if it is DSI 1 in case of dual DSI */
+	if (is_dual_dsi && (DSI_1 == id))
+		return;
+
+	ret = msm_dsi_host_power_on(host, &phy_shared_timings[id]);
 	if (ret) {
 		pr_err("%s: power on host %d failed, %d\n", __func__, id, ret);
 		goto host_on_fail;
 	}
 
 	if (is_dual_dsi && msm_dsi1) {
-		ret = msm_dsi_host_power_on(msm_dsi1->host);
+		ret = msm_dsi_host_power_on(msm_dsi1->host,
+					    &phy_shared_timings[DSI_1]);
 		if (ret) {
 			pr_err("%s: power on host1 failed, %d\n",
 							__func__, ret);
@@ -418,6 +521,8 @@ panel_prep_fail:
 host1_on_fail:
 	msm_dsi_host_power_off(host);
 host_on_fail:
+	dsi_mgr_phy_disable(id);
+phy_en_fail:
 	return;
 }
 
@@ -443,10 +548,17 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
 
 	DBG("id=%d", id);
 
-	if (!msm_dsi_device_connected(msm_dsi) ||
-			(is_dual_dsi && (DSI_1 == id)))
+	if (!msm_dsi_device_connected(msm_dsi))
 		return;
 
+	/*
+	 * Do nothing with the host if it is DSI 1 in case of dual DSI.
+	 * It is safe to call dsi_mgr_phy_disable() here because a single PHY
+	 * won't be diabled until both PHYs request disable.
+	 */
+	if (is_dual_dsi && (DSI_1 == id))
+		goto disable_phy;
+
 	if (panel) {
 		ret = drm_panel_disable(panel);
 		if (ret)
@@ -481,6 +593,9 @@ static void dsi_mgr_bridge_post_disable(struct drm_bridge *bridge)
 			pr_err("%s: host1 power off failed, %d\n",
 								__func__, ret);
 	}
+
+disable_phy:
+	dsi_mgr_phy_disable(id);
 }
 
 static void dsi_mgr_bridge_mode_set(struct drm_bridge *bridge,
@@ -540,7 +655,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
 	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
 	struct drm_connector *connector = NULL;
 	struct dsi_connector *dsi_connector;
-	int ret, i;
+	int ret;
 
 	dsi_connector = kzalloc(sizeof(*dsi_connector), GFP_KERNEL);
 	if (!dsi_connector)
@@ -566,9 +681,7 @@ struct drm_connector *msm_dsi_manager_connector_init(u8 id)
 	connector->interlace_allowed = 0;
 	connector->doublescan_allowed = 0;
 
-	for (i = 0; i < MSM_DSI_ENCODER_NUM; i++)
-		drm_mode_connector_attach_encoder(connector,
-						msm_dsi->encoders[i]);
+	drm_mode_connector_attach_encoder(connector, msm_dsi->encoder);
 
 	return connector;
 }
@@ -591,13 +704,7 @@ struct drm_bridge *msm_dsi_manager_bridge_init(u8 id)
 
 	dsi_bridge->id = id;
 
-	/*
-	 * HACK: we may not know the external DSI bridge device's mode
-	 * flags here. We'll get to know them only when the device
-	 * attaches to the dsi host. For now, assume the bridge supports
-	 * DSI video mode
-	 */
-	encoder = msm_dsi->encoders[MSM_DSI_VIDEO_ENCODER_ID];
+	encoder = msm_dsi->encoder;
 
 	bridge = &dsi_bridge->base;
 	bridge->funcs = &dsi_mgr_bridge_funcs;
@@ -628,13 +735,7 @@ struct drm_connector *msm_dsi_manager_ext_bridge_init(u8 id)
 	ext_bridge = msm_dsi->external_bridge =
 			msm_dsi_host_get_bridge(msm_dsi->host);
 
-	/*
-	 * HACK: we may not know the external DSI bridge device's mode
-	 * flags here. We'll get to know them only when the device
-	 * attaches to the dsi host. For now, assume the bridge supports
-	 * DSI video mode
-	 */
-	encoder = msm_dsi->encoders[MSM_DSI_VIDEO_ENCODER_ID];
+	encoder = msm_dsi->encoder;
 
 	/* link the internal dsi bridge to the external bridge */
 	drm_bridge_attach(encoder, ext_bridge, int_bridge);
@@ -662,68 +763,6 @@ void msm_dsi_manager_bridge_destroy(struct drm_bridge *bridge)
 {
 }
 
-int msm_dsi_manager_phy_enable(int id,
-		const unsigned long bit_rate, const unsigned long esc_rate,
-		u32 *clk_pre, u32 *clk_post)
-{
-	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
-	struct msm_dsi_phy *phy = msm_dsi->phy;
-	int src_pll_id = IS_DUAL_DSI() ? DSI_CLOCK_MASTER : id;
-	struct msm_dsi_pll *pll = msm_dsi_phy_get_pll(msm_dsi->phy);
-	int ret;
-
-	ret = msm_dsi_phy_enable(phy, src_pll_id, bit_rate, esc_rate);
-	if (ret)
-		return ret;
-
-	/*
-	 * Reset DSI PHY silently changes its PLL registers to reset status,
-	 * which will confuse clock driver and result in wrong output rate of
-	 * link clocks. Restore PLL status if its PLL is being used as clock
-	 * source.
-	 */
-	if (!IS_DUAL_DSI() || (id == DSI_CLOCK_MASTER)) {
-		ret = msm_dsi_pll_restore_state(pll);
-		if (ret) {
-			pr_err("%s: failed to restore pll state\n", __func__);
-			msm_dsi_phy_disable(phy);
-			return ret;
-		}
-	}
-
-	msm_dsi->phy_enabled = true;
-	msm_dsi_phy_get_clk_pre_post(phy, clk_pre, clk_post);
-
-	return 0;
-}
-
-void msm_dsi_manager_phy_disable(int id)
-{
-	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
-	struct msm_dsi *mdsi = dsi_mgr_get_dsi(DSI_CLOCK_MASTER);
-	struct msm_dsi *sdsi = dsi_mgr_get_dsi(DSI_CLOCK_SLAVE);
-	struct msm_dsi_phy *phy = msm_dsi->phy;
-	struct msm_dsi_pll *pll = msm_dsi_phy_get_pll(msm_dsi->phy);
-
-	/* Save PLL status if it is a clock source */
-	if (!IS_DUAL_DSI() || (id == DSI_CLOCK_MASTER))
-		msm_dsi_pll_save_state(pll);
-
-	/* disable DSI phy
-	 * In dual-dsi configuration, the phy should be disabled for the
-	 * first controller only when the second controller is disabled.
-	 */
-	msm_dsi->phy_enabled = false;
-	if (IS_DUAL_DSI() && mdsi && sdsi) {
-		if (!mdsi->phy_enabled && !sdsi->phy_enabled) {
-			msm_dsi_phy_disable(sdsi->phy);
-			msm_dsi_phy_disable(mdsi->phy);
-		}
-	} else {
-		msm_dsi_phy_disable(phy);
-	}
-}
-
 int msm_dsi_manager_cmd_xfer(int id, const struct mipi_dsi_msg *msg)
 {
 	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
@@ -787,6 +826,33 @@ bool msm_dsi_manager_cmd_xfer_trigger(int id, u32 dma_base, u32 len)
 	return true;
 }
 
+void msm_dsi_manager_attach_dsi_device(int id, u32 device_flags)
+{
+	struct msm_dsi *msm_dsi = dsi_mgr_get_dsi(id);
+	struct drm_device *dev = msm_dsi->dev;
+	struct msm_drm_private *priv;
+	struct msm_kms *kms;
+	struct drm_encoder *encoder;
+
+	/*
+	 * drm_device pointer is assigned to msm_dsi only in the modeset_init
+	 * path. If mipi_dsi_attach() happens in DSI driver's probe path
+	 * (generally the case when we're connected to a drm_panel of the type
+	 * mipi_dsi_device), this would be NULL. In such cases, try to set the
+	 * encoder mode in the DSI connector's detect() op.
+	 */
+	if (!dev)
+		return;
+
+	priv = dev->dev_private;
+	kms = priv->kms;
+	encoder = msm_dsi_get_encoder(msm_dsi);
+
+	if (encoder && kms->funcs->set_encoder_mode)
+		if (!(device_flags & MIPI_DSI_MODE_VIDEO))
+			kms->funcs->set_encoder_mode(kms, encoder, true);
+}
+
 int msm_dsi_manager_register(struct msm_dsi *msm_dsi)
 {
 	struct msm_dsi_manager *msm_dsim = &msm_dsim_glb;
@@ -811,7 +877,7 @@ int msm_dsi_manager_register(struct msm_dsi *msm_dsi)
 		goto fail;
 	}
 
-	ret = dsi_mgr_host_register(id);
+	ret = dsi_mgr_setup_components(id);
 	if (ret) {
 		pr_err("%s: failed to register mipi dsi host for DSI %d\n",
 			__func__, id);
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
index f39386ed75e4..0c2eb9c9a1fc 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.c
@@ -54,8 +54,10 @@ static void dsi_dphy_timing_calc_clk_zero(struct msm_dsi_dphy_timing *timing,
 }
 
 int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
-	const unsigned long bit_rate, const unsigned long esc_rate)
+			     struct msm_dsi_phy_clk_request *clk_req)
 {
+	const unsigned long bit_rate = clk_req->bitclk_rate;
+	const unsigned long esc_rate = clk_req->escclk_rate;
 	s32 ui, lpx;
 	s32 tmax, tmin;
 	s32 pcnt0 = 10;
@@ -115,8 +117,8 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
 	temp = ((timing->hs_exit >> 1) + 1) * 2 * ui;
 	temp = 60 * coeff + 52 * ui - 24 * ui - temp;
 	tmin = S_DIV_ROUND_UP(temp, 8 * ui) - 1;
-	timing->clk_post = linear_inter(tmax, tmin, pcnt2, 0, false);
-
+	timing->shared_timings.clk_post = linear_inter(tmax, tmin, pcnt2, 0,
+						       false);
 	tmax = 63;
 	temp = ((timing->clk_prepare >> 1) + 1) * 2 * ui;
 	temp += ((timing->clk_zero >> 1) + 1) * 2 * ui;
@@ -124,17 +126,21 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
 	tmin = S_DIV_ROUND_UP(temp, 8 * ui) - 1;
 	if (tmin > tmax) {
 		temp = linear_inter(2 * tmax, tmin, pcnt2, 0, false);
-		timing->clk_pre = temp >> 1;
+		timing->shared_timings.clk_pre = temp >> 1;
+		timing->shared_timings.clk_pre_inc_by_2 = true;
 	} else {
-		timing->clk_pre = linear_inter(tmax, tmin, pcnt2, 0, false);
+		timing->shared_timings.clk_pre =
+				linear_inter(tmax, tmin, pcnt2, 0, false);
+		timing->shared_timings.clk_pre_inc_by_2 = false;
 	}
 
 	timing->ta_go = 3;
 	timing->ta_sure = 0;
 	timing->ta_get = 4;
 
-	DBG("PHY timings: %d, %d, %d, %d, %d, %d, %d, %d, %d, %d",
-		timing->clk_pre, timing->clk_post, timing->clk_zero,
+	DBG("PHY timings: %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d",
+		timing->shared_timings.clk_pre, timing->shared_timings.clk_post,
+		timing->shared_timings.clk_pre_inc_by_2, timing->clk_zero,
 		timing->clk_trail, timing->clk_prepare, timing->hs_exit,
 		timing->hs_zero, timing->hs_prepare, timing->hs_trail,
 		timing->hs_rqst);
@@ -142,6 +148,123 @@ int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
 	return 0;
 }
 
+int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
+				struct msm_dsi_phy_clk_request *clk_req)
+{
+	const unsigned long bit_rate = clk_req->bitclk_rate;
+	const unsigned long esc_rate = clk_req->escclk_rate;
+	s32 ui, ui_x8, lpx;
+	s32 tmax, tmin;
+	s32 pcnt0 = 50;
+	s32 pcnt1 = 50;
+	s32 pcnt2 = 10;
+	s32 pcnt3 = 30;
+	s32 pcnt4 = 10;
+	s32 pcnt5 = 2;
+	s32 coeff = 1000; /* Precision, should avoid overflow */
+	s32 hb_en, hb_en_ckln, pd_ckln, pd;
+	s32 val, val_ckln;
+	s32 temp;
+
+	if (!bit_rate || !esc_rate)
+		return -EINVAL;
+
+	timing->hs_halfbyte_en = 0;
+	hb_en = 0;
+	timing->hs_halfbyte_en_ckln = 0;
+	hb_en_ckln = 0;
+	timing->hs_prep_dly_ckln = (bit_rate > 100000000) ? 0 : 3;
+	pd_ckln = timing->hs_prep_dly_ckln;
+	timing->hs_prep_dly = (bit_rate > 120000000) ? 0 : 1;
+	pd = timing->hs_prep_dly;
+
+	val = (hb_en << 2) + (pd << 1);
+	val_ckln = (hb_en_ckln << 2) + (pd_ckln << 1);
+
+	ui = mult_frac(NSEC_PER_MSEC, coeff, bit_rate / 1000);
+	ui_x8 = ui << 3;
+	lpx = mult_frac(NSEC_PER_MSEC, coeff, esc_rate / 1000);
+
+	temp = S_DIV_ROUND_UP(38 * coeff - val_ckln * ui, ui_x8);
+	tmin = max_t(s32, temp, 0);
+	temp = (95 * coeff - val_ckln * ui) / ui_x8;
+	tmax = max_t(s32, temp, 0);
+	timing->clk_prepare = linear_inter(tmax, tmin, pcnt0, 0, false);
+
+	temp = 300 * coeff - ((timing->clk_prepare << 3) + val_ckln) * ui;
+	tmin = S_DIV_ROUND_UP(temp - 11 * ui, ui_x8) - 3;
+	tmax = (tmin > 255) ? 511 : 255;
+	timing->clk_zero = linear_inter(tmax, tmin, pcnt5, 0, false);
+
+	tmin = DIV_ROUND_UP(60 * coeff + 3 * ui, ui_x8);
+	temp = 105 * coeff + 12 * ui - 20 * coeff;
+	tmax = (temp + 3 * ui) / ui_x8;
+	timing->clk_trail = linear_inter(tmax, tmin, pcnt3, 0, false);
+
+	temp = S_DIV_ROUND_UP(40 * coeff + 4 * ui - val * ui, ui_x8);
+	tmin = max_t(s32, temp, 0);
+	temp = (85 * coeff + 6 * ui - val * ui) / ui_x8;
+	tmax = max_t(s32, temp, 0);
+	timing->hs_prepare = linear_inter(tmax, tmin, pcnt1, 0, false);
+
+	temp = 145 * coeff + 10 * ui - ((timing->hs_prepare << 3) + val) * ui;
+	tmin = S_DIV_ROUND_UP(temp - 11 * ui, ui_x8) - 3;
+	tmax = 255;
+	timing->hs_zero = linear_inter(tmax, tmin, pcnt4, 0, false);
+
+	tmin = DIV_ROUND_UP(60 * coeff + 4 * ui + 3 * ui, ui_x8);
+	temp = 105 * coeff + 12 * ui - 20 * coeff;
+	tmax = (temp + 3 * ui) / ui_x8;
+	timing->hs_trail = linear_inter(tmax, tmin, pcnt3, 0, false);
+
+	temp = 50 * coeff + ((hb_en << 2) - 8) * ui;
+	timing->hs_rqst = S_DIV_ROUND_UP(temp, ui_x8);
+
+	tmin = DIV_ROUND_UP(100 * coeff, ui_x8) - 1;
+	tmax = 255;
+	timing->hs_exit = linear_inter(tmax, tmin, pcnt2, 0, false);
+
+	temp = 50 * coeff + ((hb_en_ckln << 2) - 8) * ui;
+	timing->hs_rqst_ckln = S_DIV_ROUND_UP(temp, ui_x8);
+
+	temp = 60 * coeff + 52 * ui - 43 * ui;
+	tmin = DIV_ROUND_UP(temp, ui_x8) - 1;
+	tmax = 63;
+	timing->shared_timings.clk_post =
+				linear_inter(tmax, tmin, pcnt2, 0, false);
+
+	temp = 8 * ui + ((timing->clk_prepare << 3) + val_ckln) * ui;
+	temp += (((timing->clk_zero + 3) << 3) + 11 - (pd_ckln << 1)) * ui;
+	temp += hb_en_ckln ? (((timing->hs_rqst_ckln << 3) + 4) * ui) :
+				(((timing->hs_rqst_ckln << 3) + 8) * ui);
+	tmin = S_DIV_ROUND_UP(temp, ui_x8) - 1;
+	tmax = 63;
+	if (tmin > tmax) {
+		temp = linear_inter(tmax << 1, tmin, pcnt2, 0, false);
+		timing->shared_timings.clk_pre = temp >> 1;
+		timing->shared_timings.clk_pre_inc_by_2 = 1;
+	} else {
+		timing->shared_timings.clk_pre =
+				linear_inter(tmax, tmin, pcnt2, 0, false);
+		timing->shared_timings.clk_pre_inc_by_2 = 0;
+	}
+
+	timing->ta_go = 3;
+	timing->ta_sure = 0;
+	timing->ta_get = 4;
+
+	DBG("%d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d, %d",
+	    timing->shared_timings.clk_pre, timing->shared_timings.clk_post,
+	    timing->shared_timings.clk_pre_inc_by_2, timing->clk_zero,
+	    timing->clk_trail, timing->clk_prepare, timing->hs_exit,
+	    timing->hs_zero, timing->hs_prepare, timing->hs_trail,
+	    timing->hs_rqst, timing->hs_rqst_ckln, timing->hs_halfbyte_en,
+	    timing->hs_halfbyte_en_ckln, timing->hs_prep_dly,
+	    timing->hs_prep_dly_ckln);
+
+	return 0;
+}
+
 void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg,
 				u32 bit_mask)
 {
@@ -268,6 +391,10 @@ static const struct of_device_id dsi_phy_dt_match[] = {
 	{ .compatible = "qcom,dsi-phy-28nm-8960",
 	  .data = &dsi_phy_28nm_8960_cfgs },
 #endif
+#ifdef CONFIG_DRM_MSM_DSI_14NM_PHY
+	{ .compatible = "qcom,dsi-phy-14nm",
+	  .data = &dsi_phy_14nm_cfgs },
+#endif
 	{}
 };
 
@@ -295,6 +422,24 @@ static int dsi_phy_get_id(struct msm_dsi_phy *phy)
 	return -EINVAL;
 }
 
+int msm_dsi_phy_init_common(struct msm_dsi_phy *phy)
+{
+	struct platform_device *pdev = phy->pdev;
+	int ret = 0;
+
+	phy->reg_base = msm_ioremap(pdev, "dsi_phy_regulator",
+				"DSI_PHY_REG");
+	if (IS_ERR(phy->reg_base)) {
+		dev_err(&pdev->dev, "%s: failed to map phy regulator base\n",
+			__func__);
+		ret = -ENOMEM;
+		goto fail;
+	}
+
+fail:
+	return ret;
+}
+
 static int dsi_phy_driver_probe(struct platform_device *pdev)
 {
 	struct msm_dsi_phy *phy;
@@ -331,15 +476,6 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 		goto fail;
 	}
 
-	phy->reg_base = msm_ioremap(pdev, "dsi_phy_regulator",
-				"DSI_PHY_REG");
-	if (IS_ERR(phy->reg_base)) {
-		dev_err(dev, "%s: failed to map phy regulator base\n",
-			__func__);
-		ret = -ENOMEM;
-		goto fail;
-	}
-
 	ret = dsi_phy_regulator_init(phy);
 	if (ret) {
 		dev_err(dev, "%s: failed to init regulator\n", __func__);
@@ -353,6 +489,12 @@ static int dsi_phy_driver_probe(struct platform_device *pdev)
 		goto fail;
 	}
 
+	if (phy->cfg->ops.init) {
+		ret = phy->cfg->ops.init(phy);
+		if (ret)
+			goto fail;
+	}
+
 	/* PLL init will call into clk_register which requires
 	 * register access, so we need to enable power and ahb clock.
 	 */
@@ -410,7 +552,7 @@ void __exit msm_dsi_phy_driver_unregister(void)
 }
 
 int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
-	const unsigned long bit_rate, const unsigned long esc_rate)
+			struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct device *dev = &phy->pdev->dev;
 	int ret;
@@ -418,21 +560,52 @@ int msm_dsi_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
 	if (!phy || !phy->cfg->ops.enable)
 		return -EINVAL;
 
+	ret = dsi_phy_enable_resource(phy);
+	if (ret) {
+		dev_err(dev, "%s: resource enable failed, %d\n",
+			__func__, ret);
+		goto res_en_fail;
+	}
+
 	ret = dsi_phy_regulator_enable(phy);
 	if (ret) {
 		dev_err(dev, "%s: regulator enable failed, %d\n",
 			__func__, ret);
-		return ret;
+		goto reg_en_fail;
 	}
 
-	ret = phy->cfg->ops.enable(phy, src_pll_id, bit_rate, esc_rate);
+	ret = phy->cfg->ops.enable(phy, src_pll_id, clk_req);
 	if (ret) {
 		dev_err(dev, "%s: phy enable failed, %d\n", __func__, ret);
-		dsi_phy_regulator_disable(phy);
-		return ret;
+		goto phy_en_fail;
+	}
+
+	/*
+	 * Resetting DSI PHY silently changes its PLL registers to reset status,
+	 * which will confuse clock driver and result in wrong output rate of
+	 * link clocks. Restore PLL status if its PLL is being used as clock
+	 * source.
+	 */
+	if (phy->usecase != MSM_DSI_PHY_SLAVE) {
+		ret = msm_dsi_pll_restore_state(phy->pll);
+		if (ret) {
+			dev_err(dev, "%s: failed to restore pll state, %d\n",
+				__func__, ret);
+			goto pll_restor_fail;
+		}
 	}
 
 	return 0;
+
+pll_restor_fail:
+	if (phy->cfg->ops.disable)
+		phy->cfg->ops.disable(phy);
+phy_en_fail:
+	dsi_phy_regulator_disable(phy);
+reg_en_fail:
+	dsi_phy_disable_resource(phy);
+res_en_fail:
+	return ret;
 }
 
 void msm_dsi_phy_disable(struct msm_dsi_phy *phy)
@@ -440,21 +613,21 @@ void msm_dsi_phy_disable(struct msm_dsi_phy *phy)
 	if (!phy || !phy->cfg->ops.disable)
 		return;
 
+	/* Save PLL status if it is a clock source */
+	if (phy->usecase != MSM_DSI_PHY_SLAVE)
+		msm_dsi_pll_save_state(phy->pll);
+
 	phy->cfg->ops.disable(phy);
 
 	dsi_phy_regulator_disable(phy);
+	dsi_phy_disable_resource(phy);
 }
 
-void msm_dsi_phy_get_clk_pre_post(struct msm_dsi_phy *phy,
-					u32 *clk_pre, u32 *clk_post)
+void msm_dsi_phy_get_shared_timings(struct msm_dsi_phy *phy,
+			struct msm_dsi_phy_shared_timings *shared_timings)
 {
-	if (!phy)
-		return;
-
-	if (clk_pre)
-		*clk_pre = phy->timing.clk_pre;
-	if (clk_post)
-		*clk_post = phy->timing.clk_post;
+	memcpy(shared_timings, &phy->timing.shared_timings,
+	       sizeof(*shared_timings));
 }
 
 struct msm_dsi_pll *msm_dsi_phy_get_pll(struct msm_dsi_phy *phy)
@@ -465,3 +638,9 @@ struct msm_dsi_pll *msm_dsi_phy_get_pll(struct msm_dsi_phy *phy)
 	return phy->pll;
 }
 
+void msm_dsi_phy_set_usecase(struct msm_dsi_phy *phy,
+			     enum msm_dsi_phy_usecase uc)
+{
+	if (phy)
+		phy->usecase = uc;
+}
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
index f24a85439b94..1733f6608a09 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy.h
@@ -22,8 +22,9 @@
 #define dsi_phy_write(offset, data) msm_writel((data), (offset))
 
 struct msm_dsi_phy_ops {
+	int (*init) (struct msm_dsi_phy *phy);
 	int (*enable)(struct msm_dsi_phy *phy, int src_pll_id,
-		const unsigned long bit_rate, const unsigned long esc_rate);
+			struct msm_dsi_phy_clk_request *clk_req);
 	void (*disable)(struct msm_dsi_phy *phy);
 };
 
@@ -46,6 +47,7 @@ extern const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs;
 extern const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs;
+extern const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs;
 
 struct msm_dsi_dphy_timing {
 	u32 clk_pre;
@@ -61,12 +63,22 @@ struct msm_dsi_dphy_timing {
 	u32 ta_go;
 	u32 ta_sure;
 	u32 ta_get;
+
+	struct msm_dsi_phy_shared_timings shared_timings;
+
+	/* For PHY v2 only */
+	u32 hs_rqst_ckln;
+	u32 hs_prep_dly;
+	u32 hs_prep_dly_ckln;
+	u8 hs_halfbyte_en;
+	u8 hs_halfbyte_en_ckln;
 };
 
 struct msm_dsi_phy {
 	struct platform_device *pdev;
 	void __iomem *base;
 	void __iomem *reg_base;
+	void __iomem *lane_base;
 	int id;
 
 	struct clk *ahb_clk;
@@ -75,6 +87,7 @@ struct msm_dsi_phy {
 	struct msm_dsi_dphy_timing timing;
 	const struct msm_dsi_phy_cfg *cfg;
 
+	enum msm_dsi_phy_usecase usecase;
 	bool regulator_ldo_mode;
 
 	struct msm_dsi_pll *pll;
@@ -84,9 +97,12 @@ struct msm_dsi_phy {
  * PHY internal functions
  */
 int msm_dsi_dphy_timing_calc(struct msm_dsi_dphy_timing *timing,
-	const unsigned long bit_rate, const unsigned long esc_rate);
+			     struct msm_dsi_phy_clk_request *clk_req);
+int msm_dsi_dphy_timing_calc_v2(struct msm_dsi_dphy_timing *timing,
+				struct msm_dsi_phy_clk_request *clk_req);
 void msm_dsi_phy_set_src_pll(struct msm_dsi_phy *phy, int pll_id, u32 reg,
 				u32 bit_mask);
+int msm_dsi_phy_init_common(struct msm_dsi_phy *phy);
 
 #endif /* __DSI_PHY_H__ */
 
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
new file mode 100644
index 000000000000..513f4234adc1
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_14nm.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "dsi_phy.h"
+#include "dsi.xml.h"
+
+#define PHY_14NM_CKLN_IDX	4
+
+static void dsi_14nm_dphy_set_timing(struct msm_dsi_phy *phy,
+				     struct msm_dsi_dphy_timing *timing,
+				     int lane_idx)
+{
+	void __iomem *base = phy->lane_base;
+	bool clk_ln = (lane_idx == PHY_14NM_CKLN_IDX);
+	u32 zero = clk_ln ? timing->clk_zero : timing->hs_zero;
+	u32 prepare = clk_ln ? timing->clk_prepare : timing->hs_prepare;
+	u32 trail = clk_ln ? timing->clk_trail : timing->hs_trail;
+	u32 rqst = clk_ln ? timing->hs_rqst_ckln : timing->hs_rqst;
+	u32 prep_dly = clk_ln ? timing->hs_prep_dly_ckln : timing->hs_prep_dly;
+	u32 halfbyte_en = clk_ln ? timing->hs_halfbyte_en_ckln :
+				   timing->hs_halfbyte_en;
+
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_4(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_4_HS_EXIT(timing->hs_exit));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_5(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_5_HS_ZERO(zero));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_6(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_6_HS_PREPARE(prepare));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_7(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_7_HS_TRAIL(trail));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_8(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_8_HS_RQST(rqst));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_CFG0(lane_idx),
+		      DSI_14nm_PHY_LN_CFG0_PREPARE_DLY(prep_dly));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_CFG1(lane_idx),
+		      halfbyte_en ? DSI_14nm_PHY_LN_CFG1_HALFBYTECLK_EN : 0);
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_9(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_GO(timing->ta_go) |
+		      DSI_14nm_PHY_LN_TIMING_CTRL_9_TA_SURE(timing->ta_sure));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_10(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_10_TA_GET(timing->ta_get));
+	dsi_phy_write(base + REG_DSI_14nm_PHY_LN_TIMING_CTRL_11(lane_idx),
+		      DSI_14nm_PHY_LN_TIMING_CTRL_11_TRIG3_CMD(0xa0));
+}
+
+static int dsi_14nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
+			       struct msm_dsi_phy_clk_request *clk_req)
+{
+	struct msm_dsi_dphy_timing *timing = &phy->timing;
+	u32 data;
+	int i;
+	int ret;
+	void __iomem *base = phy->base;
+	void __iomem *lane_base = phy->lane_base;
+
+	if (msm_dsi_dphy_timing_calc_v2(timing, clk_req)) {
+		dev_err(&phy->pdev->dev,
+			"%s: D-PHY timing calculation failed\n", __func__);
+		return -EINVAL;
+	}
+
+	data = 0x1c;
+	if (phy->usecase != MSM_DSI_PHY_STANDALONE)
+		data |= DSI_14nm_PHY_CMN_LDO_CNTRL_VREG_CTRL(32);
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_LDO_CNTRL, data);
+
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL, 0x1);
+
+	/* 4 data lanes + 1 clk lane configuration */
+	for (i = 0; i < 5; i++) {
+		dsi_phy_write(lane_base + REG_DSI_14nm_PHY_LN_VREG_CNTRL(i),
+			      0x1d);
+
+		dsi_phy_write(lane_base +
+			      REG_DSI_14nm_PHY_LN_STRENGTH_CTRL_0(i), 0xff);
+		dsi_phy_write(lane_base +
+			      REG_DSI_14nm_PHY_LN_STRENGTH_CTRL_1(i),
+			      (i == PHY_14NM_CKLN_IDX) ? 0x00 : 0x06);
+
+		dsi_phy_write(lane_base + REG_DSI_14nm_PHY_LN_CFG3(i),
+			      (i == PHY_14NM_CKLN_IDX) ? 0x8f : 0x0f);
+		dsi_phy_write(lane_base + REG_DSI_14nm_PHY_LN_CFG2(i), 0x10);
+		dsi_phy_write(lane_base + REG_DSI_14nm_PHY_LN_TEST_DATAPATH(i),
+			      0);
+		dsi_phy_write(lane_base + REG_DSI_14nm_PHY_LN_TEST_STR(i),
+			      0x88);
+
+		dsi_14nm_dphy_set_timing(phy, timing, i);
+	}
+
+	/* Make sure PLL is not start */
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_PLL_CNTRL, 0x00);
+
+	wmb(); /* make sure everything is written before reset and enable */
+
+	/* reset digital block */
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_CTRL_1, 0x80);
+	wmb(); /* ensure reset is asserted */
+	udelay(100);
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_CTRL_1, 0x00);
+
+	msm_dsi_phy_set_src_pll(phy, src_pll_id,
+				REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL,
+				DSI_14nm_PHY_CMN_GLBL_TEST_CTRL_BITCLK_HS_SEL);
+
+	ret = msm_dsi_pll_set_usecase(phy->pll, phy->usecase);
+	if (ret) {
+		dev_err(&phy->pdev->dev, "%s: set pll usecase failed, %d\n",
+			__func__, ret);
+		return ret;
+	}
+
+	/* Remove power down from PLL and all lanes */
+	dsi_phy_write(base + REG_DSI_14nm_PHY_CMN_CTRL_0, 0xff);
+
+	return 0;
+}
+
+static void dsi_14nm_phy_disable(struct msm_dsi_phy *phy)
+{
+	dsi_phy_write(phy->base + REG_DSI_14nm_PHY_CMN_GLBL_TEST_CTRL, 0);
+	dsi_phy_write(phy->base + REG_DSI_14nm_PHY_CMN_CTRL_0, 0);
+
+	/* ensure that the phy is completely disabled */
+	wmb();
+}
+
+static int dsi_14nm_phy_init(struct msm_dsi_phy *phy)
+{
+	struct platform_device *pdev = phy->pdev;
+
+	phy->lane_base = msm_ioremap(pdev, "dsi_phy_lane",
+				"DSI_PHY_LANE");
+	if (IS_ERR(phy->lane_base)) {
+		dev_err(&pdev->dev, "%s: failed to map phy lane base\n",
+			__func__);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+const struct msm_dsi_phy_cfg dsi_phy_14nm_cfgs = {
+	.type = MSM_DSI_PHY_14NM,
+	.src_pll_truthtable = { {false, false}, {true, false} },
+	.reg_cfg = {
+		.num = 1,
+		.regs = {
+			{"vcca", 17000, 32},
+		},
+	},
+	.ops = {
+		.enable = dsi_14nm_phy_enable,
+		.disable = dsi_14nm_phy_disable,
+		.init = dsi_14nm_phy_init,
+	},
+	.io_start = { 0x994400, 0x996400 },
+	.num_dsi_phy = 2,
+};
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
index c757e2070cac..1ca6c69516f5 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
@@ -72,7 +72,7 @@ static void dsi_20nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable)
 }
 
 static int dsi_20nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
-		const unsigned long bit_rate, const unsigned long esc_rate)
+				struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct msm_dsi_dphy_timing *timing = &phy->timing;
 	int i;
@@ -81,7 +81,7 @@ static int dsi_20nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
 
 	DBG("");
 
-	if (msm_dsi_dphy_timing_calc(timing, bit_rate, esc_rate)) {
+	if (msm_dsi_dphy_timing_calc(timing, clk_req)) {
 		dev_err(&phy->pdev->dev,
 			"%s: D-PHY timing calculation failed\n", __func__);
 		return -EINVAL;
@@ -145,6 +145,7 @@ const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs = {
 	.ops = {
 		.enable = dsi_20nm_phy_enable,
 		.disable = dsi_20nm_phy_disable,
+		.init = msm_dsi_phy_init_common,
 	},
 	.io_start = { 0xfd998300, 0xfd9a0300 },
 	.num_dsi_phy = 2,
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
index 63d7fba31380..4972b52cbe44 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm.c
@@ -67,7 +67,7 @@ static void dsi_28nm_phy_regulator_ctrl(struct msm_dsi_phy *phy, bool enable)
 }
 
 static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
-		const unsigned long bit_rate, const unsigned long esc_rate)
+				struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct msm_dsi_dphy_timing *timing = &phy->timing;
 	int i;
@@ -75,7 +75,7 @@ static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
 
 	DBG("");
 
-	if (msm_dsi_dphy_timing_calc(timing, bit_rate, esc_rate)) {
+	if (msm_dsi_dphy_timing_calc(timing, clk_req)) {
 		dev_err(&phy->pdev->dev,
 			"%s: D-PHY timing calculation failed\n", __func__);
 		return -EINVAL;
@@ -144,6 +144,7 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_hpm_cfgs = {
 	.ops = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
+		.init = msm_dsi_phy_init_common,
 	},
 	.io_start = { 0xfd922b00, 0xfd923100 },
 	.num_dsi_phy = 2,
@@ -161,6 +162,7 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_lp_cfgs = {
 	.ops = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
+		.init = msm_dsi_phy_init_common,
 	},
 	.io_start = { 0x1a98500 },
 	.num_dsi_phy = 1,
diff --git a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
index 7bdb9de54968..398004463498 100644
--- a/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
+++ b/drivers/gpu/drm/msm/dsi/phy/dsi_phy_28nm_8960.c
@@ -124,14 +124,14 @@ static void dsi_28nm_phy_lane_config(struct msm_dsi_phy *phy)
 }
 
 static int dsi_28nm_phy_enable(struct msm_dsi_phy *phy, int src_pll_id,
-		const unsigned long bit_rate, const unsigned long esc_rate)
+				struct msm_dsi_phy_clk_request *clk_req)
 {
 	struct msm_dsi_dphy_timing *timing = &phy->timing;
 	void __iomem *base = phy->base;
 
 	DBG("");
 
-	if (msm_dsi_dphy_timing_calc(timing, bit_rate, esc_rate)) {
+	if (msm_dsi_dphy_timing_calc(timing, clk_req)) {
 		dev_err(&phy->pdev->dev,
 			"%s: D-PHY timing calculation failed\n", __func__);
 		return -EINVAL;
@@ -191,6 +191,7 @@ const struct msm_dsi_phy_cfg dsi_phy_28nm_8960_cfgs = {
 	.ops = {
 		.enable = dsi_28nm_phy_enable,
 		.disable = dsi_28nm_phy_disable,
+		.init = msm_dsi_phy_init_common,
 	},
 	.io_start = { 0x4700300, 0x5800300 },
 	.num_dsi_phy = 2,
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
index 5cd438f91afe..bc289f5c9078 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
@@ -140,6 +140,15 @@ int msm_dsi_pll_restore_state(struct msm_dsi_pll *pll)
 	return 0;
 }
 
+int msm_dsi_pll_set_usecase(struct msm_dsi_pll *pll,
+			    enum msm_dsi_phy_usecase uc)
+{
+	if (pll->set_usecase)
+		return pll->set_usecase(pll, uc);
+
+	return 0;
+}
+
 struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev,
 			enum msm_dsi_phy_type type, int id)
 {
@@ -154,6 +163,9 @@ struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev,
 	case MSM_DSI_PHY_28NM_8960:
 		pll = msm_dsi_pll_28nm_8960_init(pdev, id);
 		break;
+	case MSM_DSI_PHY_14NM:
+		pll = msm_dsi_pll_14nm_init(pdev, id);
+		break;
 	default:
 		pll = ERR_PTR(-ENXIO);
 		break;
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
index 2cf1664723e8..f63e7ada74a8 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
@@ -41,6 +41,8 @@ struct msm_dsi_pll {
 	void (*destroy)(struct msm_dsi_pll *pll);
 	void (*save_state)(struct msm_dsi_pll *pll);
 	int (*restore_state)(struct msm_dsi_pll *pll);
+	int (*set_usecase)(struct msm_dsi_pll *pll,
+			   enum msm_dsi_phy_usecase uc);
 };
 
 #define hw_clk_to_pll(x) container_of(x, struct msm_dsi_pll, clk_hw)
@@ -104,5 +106,14 @@ static inline struct msm_dsi_pll *msm_dsi_pll_28nm_8960_init(
 }
 #endif
 
+#ifdef CONFIG_DRM_MSM_DSI_14NM_PHY
+struct msm_dsi_pll *msm_dsi_pll_14nm_init(struct platform_device *pdev, int id);
+#else
+static inline struct msm_dsi_pll *
+msm_dsi_pll_14nm_init(struct platform_device *pdev, int id)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
 #endif /* __DSI_PLL_H__ */
 
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_14nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_14nm.c
new file mode 100644
index 000000000000..fe15aa64086f
--- /dev/null
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_14nm.c
@@ -0,0 +1,1104 @@
+/*
+ * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+
+#include "dsi_pll.h"
+#include "dsi.xml.h"
+
+/*
+ * DSI PLL 14nm - clock diagram (eg: DSI0):
+ *
+ *         dsi0n1_postdiv_clk
+ *                         |
+ *                         |
+ *                 +----+  |  +----+
+ *  dsi0vco_clk ---| n1 |--o--| /8 |-- dsi0pllbyte
+ *                 +----+  |  +----+
+ *                         |           dsi0n1_postdivby2_clk
+ *                         |   +----+  |
+ *                         o---| /2 |--o--|\
+ *                         |   +----+     | \   +----+
+ *                         |              |  |--| n2 |-- dsi0pll
+ *                         o--------------| /   +----+
+ *                                        |/
+ */
+
+#define POLL_MAX_READS			15
+#define POLL_TIMEOUT_US			1000
+
+#define NUM_PROVIDED_CLKS		2
+
+#define VCO_REF_CLK_RATE		19200000
+#define VCO_MIN_RATE			1300000000UL
+#define VCO_MAX_RATE			2600000000UL
+
+#define DSI_BYTE_PLL_CLK		0
+#define DSI_PIXEL_PLL_CLK		1
+
+#define DSI_PLL_DEFAULT_VCO_POSTDIV	1
+
+struct dsi_pll_input {
+	u32 fref;	/* reference clk */
+	u32 fdata;	/* bit clock rate */
+	u32 dsiclk_sel; /* Mux configuration (see diagram) */
+	u32 ssc_en;	/* SSC enable/disable */
+	u32 ldo_en;
+
+	/* fixed params */
+	u32 refclk_dbler_en;
+	u32 vco_measure_time;
+	u32 kvco_measure_time;
+	u32 bandgap_timer;
+	u32 pll_wakeup_timer;
+	u32 plllock_cnt;
+	u32 plllock_rng;
+	u32 ssc_center;
+	u32 ssc_adj_period;
+	u32 ssc_spread;
+	u32 ssc_freq;
+	u32 pll_ie_trim;
+	u32 pll_ip_trim;
+	u32 pll_iptat_trim;
+	u32 pll_cpcset_cur;
+	u32 pll_cpmset_cur;
+
+	u32 pll_icpmset;
+	u32 pll_icpcset;
+
+	u32 pll_icpmset_p;
+	u32 pll_icpmset_m;
+
+	u32 pll_icpcset_p;
+	u32 pll_icpcset_m;
+
+	u32 pll_lpf_res1;
+	u32 pll_lpf_cap1;
+	u32 pll_lpf_cap2;
+	u32 pll_c3ctrl;
+	u32 pll_r3ctrl;
+};
+
+struct dsi_pll_output {
+	u32 pll_txclk_en;
+	u32 dec_start;
+	u32 div_frac_start;
+	u32 ssc_period;
+	u32 ssc_step_size;
+	u32 plllock_cmp;
+	u32 pll_vco_div_ref;
+	u32 pll_vco_count;
+	u32 pll_kvco_div_ref;
+	u32 pll_kvco_count;
+	u32 pll_misc1;
+	u32 pll_lpf2_postdiv;
+	u32 pll_resetsm_cntrl;
+	u32 pll_resetsm_cntrl2;
+	u32 pll_resetsm_cntrl5;
+	u32 pll_kvco_code;
+
+	u32 cmn_clk_cfg0;
+	u32 cmn_clk_cfg1;
+	u32 cmn_ldo_cntrl;
+
+	u32 pll_postdiv;
+	u32 fcvo;
+};
+
+struct pll_14nm_cached_state {
+	unsigned long vco_rate;
+	u8 n2postdiv;
+	u8 n1postdiv;
+};
+
+struct dsi_pll_14nm {
+	struct msm_dsi_pll base;
+
+	int id;
+	struct platform_device *pdev;
+
+	void __iomem *phy_cmn_mmio;
+	void __iomem *mmio;
+
+	int vco_delay;
+
+	struct dsi_pll_input in;
+	struct dsi_pll_output out;
+
+	/* protects REG_DSI_14nm_PHY_CMN_CLK_CFG0 register */
+	spinlock_t postdiv_lock;
+
+	u64 vco_current_rate;
+	u64 vco_ref_clk_rate;
+
+	/* private clocks: */
+	struct clk_hw *hws[NUM_DSI_CLOCKS_MAX];
+	u32 num_hws;
+
+	/* clock-provider: */
+	struct clk_hw_onecell_data *hw_data;
+
+	struct pll_14nm_cached_state cached_state;
+
+	enum msm_dsi_phy_usecase uc;
+	struct dsi_pll_14nm *slave;
+};
+
+#define to_pll_14nm(x)	container_of(x, struct dsi_pll_14nm, base)
+
+/*
+ * Private struct for N1/N2 post-divider clocks. These clocks are similar to
+ * the generic clk_divider class of clocks. The only difference is that it
+ * also sets the slave DSI PLL's post-dividers if in Dual DSI mode
+ */
+struct dsi_pll_14nm_postdiv {
+	struct clk_hw hw;
+
+	/* divider params */
+	u8 shift;
+	u8 width;
+	u8 flags; /* same flags as used by clk_divider struct */
+
+	struct dsi_pll_14nm *pll;
+};
+
+#define to_pll_14nm_postdiv(_hw) container_of(_hw, struct dsi_pll_14nm_postdiv, hw)
+
+/*
+ * Global list of private DSI PLL struct pointers. We need this for Dual DSI
+ * mode, where the master PLL's clk_ops needs access the slave's private data
+ */
+static struct dsi_pll_14nm *pll_14nm_list[DSI_MAX];
+
+static bool pll_14nm_poll_for_ready(struct dsi_pll_14nm *pll_14nm,
+				    u32 nb_tries, u32 timeout_us)
+{
+	bool pll_locked = false;
+	void __iomem *base = pll_14nm->mmio;
+	u32 tries, val;
+
+	tries = nb_tries;
+	while (tries--) {
+		val = pll_read(base +
+			       REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS);
+		pll_locked = !!(val & BIT(5));
+
+		if (pll_locked)
+			break;
+
+		udelay(timeout_us);
+	}
+
+	if (!pll_locked) {
+		tries = nb_tries;
+		while (tries--) {
+			val = pll_read(base +
+				REG_DSI_14nm_PHY_PLL_RESET_SM_READY_STATUS);
+			pll_locked = !!(val & BIT(0));
+
+			if (pll_locked)
+				break;
+
+			udelay(timeout_us);
+		}
+	}
+
+	DBG("DSI PLL is %slocked", pll_locked ? "" : "*not* ");
+
+	return pll_locked;
+}
+
+static void dsi_pll_14nm_input_init(struct dsi_pll_14nm *pll)
+{
+	pll->in.fref = pll->vco_ref_clk_rate;
+	pll->in.fdata = 0;
+	pll->in.dsiclk_sel = 1;	/* Use the /2 path in Mux */
+	pll->in.ldo_en = 0;	/* disabled for now */
+
+	/* fixed input */
+	pll->in.refclk_dbler_en = 0;
+	pll->in.vco_measure_time = 5;
+	pll->in.kvco_measure_time = 5;
+	pll->in.bandgap_timer = 4;
+	pll->in.pll_wakeup_timer = 5;
+	pll->in.plllock_cnt = 1;
+	pll->in.plllock_rng = 0;
+
+	/*
+	 * SSC is enabled by default. We might need DT props for configuring
+	 * some SSC params like PPM and center/down spread etc.
+	 */
+	pll->in.ssc_en = 1;
+	pll->in.ssc_center = 0;		/* down spread by default */
+	pll->in.ssc_spread = 5;		/* PPM / 1000 */
+	pll->in.ssc_freq = 31500;	/* default recommended */
+	pll->in.ssc_adj_period = 37;
+
+	pll->in.pll_ie_trim = 4;
+	pll->in.pll_ip_trim = 4;
+	pll->in.pll_cpcset_cur = 1;
+	pll->in.pll_cpmset_cur = 1;
+	pll->in.pll_icpmset = 4;
+	pll->in.pll_icpcset = 4;
+	pll->in.pll_icpmset_p = 0;
+	pll->in.pll_icpmset_m = 0;
+	pll->in.pll_icpcset_p = 0;
+	pll->in.pll_icpcset_m = 0;
+	pll->in.pll_lpf_res1 = 3;
+	pll->in.pll_lpf_cap1 = 11;
+	pll->in.pll_lpf_cap2 = 1;
+	pll->in.pll_iptat_trim = 7;
+	pll->in.pll_c3ctrl = 2;
+	pll->in.pll_r3ctrl = 1;
+}
+
+#define CEIL(x, y)		(((x) + ((y) - 1)) / (y))
+
+static void pll_14nm_ssc_calc(struct dsi_pll_14nm *pll)
+{
+	u32 period, ssc_period;
+	u32 ref, rem;
+	u64 step_size;
+
+	DBG("vco=%lld ref=%lld", pll->vco_current_rate, pll->vco_ref_clk_rate);
+
+	ssc_period = pll->in.ssc_freq / 500;
+	period = (u32)pll->vco_ref_clk_rate / 1000;
+	ssc_period  = CEIL(period, ssc_period);
+	ssc_period -= 1;
+	pll->out.ssc_period = ssc_period;
+
+	DBG("ssc freq=%d spread=%d period=%d", pll->in.ssc_freq,
+	    pll->in.ssc_spread, pll->out.ssc_period);
+
+	step_size = (u32)pll->vco_current_rate;
+	ref = pll->vco_ref_clk_rate;
+	ref /= 1000;
+	step_size = div_u64(step_size, ref);
+	step_size <<= 20;
+	step_size = div_u64(step_size, 1000);
+	step_size *= pll->in.ssc_spread;
+	step_size = div_u64(step_size, 1000);
+	step_size *= (pll->in.ssc_adj_period + 1);
+
+	rem = 0;
+	step_size = div_u64_rem(step_size, ssc_period + 1, &rem);
+	if (rem)
+		step_size++;
+
+	DBG("step_size=%lld", step_size);
+
+	step_size &= 0x0ffff;	/* take lower 16 bits */
+
+	pll->out.ssc_step_size = step_size;
+}
+
+static void pll_14nm_dec_frac_calc(struct dsi_pll_14nm *pll)
+{
+	struct dsi_pll_input *pin = &pll->in;
+	struct dsi_pll_output *pout = &pll->out;
+	u64 multiplier = BIT(20);
+	u64 dec_start_multiple, dec_start, pll_comp_val;
+	u32 duration, div_frac_start;
+	u64 vco_clk_rate = pll->vco_current_rate;
+	u64 fref = pll->vco_ref_clk_rate;
+
+	DBG("vco_clk_rate=%lld ref_clk_rate=%lld", vco_clk_rate, fref);
+
+	dec_start_multiple = div_u64(vco_clk_rate * multiplier, fref);
+	div_u64_rem(dec_start_multiple, multiplier, &div_frac_start);
+
+	dec_start = div_u64(dec_start_multiple, multiplier);
+
+	pout->dec_start = (u32)dec_start;
+	pout->div_frac_start = div_frac_start;
+
+	if (pin->plllock_cnt == 0)
+		duration = 1024;
+	else if (pin->plllock_cnt == 1)
+		duration = 256;
+	else if (pin->plllock_cnt == 2)
+		duration = 128;
+	else
+		duration = 32;
+
+	pll_comp_val = duration * dec_start_multiple;
+	pll_comp_val = div_u64(pll_comp_val, multiplier);
+	do_div(pll_comp_val, 10);
+
+	pout->plllock_cmp = (u32)pll_comp_val;
+
+	pout->pll_txclk_en = 1;
+	pout->cmn_ldo_cntrl = 0x3c;
+}
+
+static u32 pll_14nm_kvco_slop(u32 vrate)
+{
+	u32 slop = 0;
+
+	if (vrate > VCO_MIN_RATE && vrate <= 1800000000UL)
+		slop =  600;
+	else if (vrate > 1800000000UL && vrate < 2300000000UL)
+		slop = 400;
+	else if (vrate > 2300000000UL && vrate < VCO_MAX_RATE)
+		slop = 280;
+
+	return slop;
+}
+
+static void pll_14nm_calc_vco_count(struct dsi_pll_14nm *pll)
+{
+	struct dsi_pll_input *pin = &pll->in;
+	struct dsi_pll_output *pout = &pll->out;
+	u64 vco_clk_rate = pll->vco_current_rate;
+	u64 fref = pll->vco_ref_clk_rate;
+	u64 data;
+	u32 cnt;
+
+	data = fref * pin->vco_measure_time;
+	do_div(data, 1000000);
+	data &= 0x03ff;	/* 10 bits */
+	data -= 2;
+	pout->pll_vco_div_ref = data;
+
+	data = div_u64(vco_clk_rate, 1000000);	/* unit is Mhz */
+	data *= pin->vco_measure_time;
+	do_div(data, 10);
+	pout->pll_vco_count = data;
+
+	data = fref * pin->kvco_measure_time;
+	do_div(data, 1000000);
+	data &= 0x03ff;	/* 10 bits */
+	data -= 1;
+	pout->pll_kvco_div_ref = data;
+
+	cnt = pll_14nm_kvco_slop(vco_clk_rate);
+	cnt *= 2;
+	cnt /= 100;
+	cnt *= pin->kvco_measure_time;
+	pout->pll_kvco_count = cnt;
+
+	pout->pll_misc1 = 16;
+	pout->pll_resetsm_cntrl = 48;
+	pout->pll_resetsm_cntrl2 = pin->bandgap_timer << 3;
+	pout->pll_resetsm_cntrl5 = pin->pll_wakeup_timer;
+	pout->pll_kvco_code = 0;
+}
+
+static void pll_db_commit_ssc(struct dsi_pll_14nm *pll)
+{
+	void __iomem *base = pll->mmio;
+	struct dsi_pll_input *pin = &pll->in;
+	struct dsi_pll_output *pout = &pll->out;
+	u8 data;
+
+	data = pin->ssc_adj_period;
+	data &= 0x0ff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_ADJ_PER1, data);
+	data = (pin->ssc_adj_period >> 8);
+	data &= 0x03;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_ADJ_PER2, data);
+
+	data = pout->ssc_period;
+	data &= 0x0ff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_PER1, data);
+	data = (pout->ssc_period >> 8);
+	data &= 0x0ff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_PER2, data);
+
+	data = pout->ssc_step_size;
+	data &= 0x0ff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_STEP_SIZE1, data);
+	data = (pout->ssc_step_size >> 8);
+	data &= 0x0ff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_STEP_SIZE2, data);
+
+	data = (pin->ssc_center & 0x01);
+	data <<= 1;
+	data |= 0x01; /* enable */
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SSC_EN_CENTER, data);
+
+	wmb();	/* make sure register committed */
+}
+
+static void pll_db_commit_common(struct dsi_pll_14nm *pll,
+				 struct dsi_pll_input *pin,
+				 struct dsi_pll_output *pout)
+{
+	void __iomem *base = pll->mmio;
+	u8 data;
+
+	/* confgiure the non frequency dependent pll registers */
+	data = 0;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_SYSCLK_EN_RESET, data);
+
+	data = pout->pll_txclk_en;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_TXCLK_EN, data);
+
+	data = pout->pll_resetsm_cntrl;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL, data);
+	data = pout->pll_resetsm_cntrl2;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL2, data);
+	data = pout->pll_resetsm_cntrl5;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_RESETSM_CNTRL5, data);
+
+	data = pout->pll_vco_div_ref & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_VCO_DIV_REF1, data);
+	data = (pout->pll_vco_div_ref >> 8) & 0x3;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_VCO_DIV_REF2, data);
+
+	data = pout->pll_kvco_div_ref & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_KVCO_DIV_REF1, data);
+	data = (pout->pll_kvco_div_ref >> 8) & 0x3;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_KVCO_DIV_REF2, data);
+
+	data = pout->pll_misc1;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_MISC1, data);
+
+	data = pin->pll_ie_trim;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_IE_TRIM, data);
+
+	data = pin->pll_ip_trim;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_IP_TRIM, data);
+
+	data = pin->pll_cpmset_cur << 3 | pin->pll_cpcset_cur;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_CP_SET_CUR, data);
+
+	data = pin->pll_icpcset_p << 3 | pin->pll_icpcset_m;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_ICPCSET, data);
+
+	data = pin->pll_icpmset_p << 3 | pin->pll_icpcset_m;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_ICPMSET, data);
+
+	data = pin->pll_icpmset << 3 | pin->pll_icpcset;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_ICP_SET, data);
+
+	data = pin->pll_lpf_cap2 << 4 | pin->pll_lpf_cap1;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_LPF1, data);
+
+	data = pin->pll_iptat_trim;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_IPTAT_TRIM, data);
+
+	data = pin->pll_c3ctrl | pin->pll_r3ctrl << 4;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_CRCTRL, data);
+}
+
+static void pll_14nm_software_reset(struct dsi_pll_14nm *pll_14nm)
+{
+	void __iomem *cmn_base = pll_14nm->phy_cmn_mmio;
+
+	/* de assert pll start and apply pll sw reset */
+
+	/* stop pll */
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_PLL_CNTRL, 0);
+
+	/* pll sw reset */
+	pll_write_udelay(cmn_base + REG_DSI_14nm_PHY_CMN_CTRL_1, 0x20, 10);
+	wmb();	/* make sure register committed */
+
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_CTRL_1, 0);
+	wmb();	/* make sure register committed */
+}
+
+static void pll_db_commit_14nm(struct dsi_pll_14nm *pll,
+			       struct dsi_pll_input *pin,
+			       struct dsi_pll_output *pout)
+{
+	void __iomem *base = pll->mmio;
+	void __iomem *cmn_base = pll->phy_cmn_mmio;
+	u8 data;
+
+	DBG("DSI%d PLL", pll->id);
+
+	data = pout->cmn_ldo_cntrl;
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_LDO_CNTRL, data);
+
+	pll_db_commit_common(pll, pin, pout);
+
+	pll_14nm_software_reset(pll);
+
+	data = pin->dsiclk_sel; /* set dsiclk_sel = 1  */
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_CLK_CFG1, data);
+
+	data = 0xff; /* data, clk, pll normal operation */
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_CTRL_0, data);
+
+	/* configure the frequency dependent pll registers */
+	data = pout->dec_start;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_DEC_START, data);
+
+	data = pout->div_frac_start & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START1, data);
+	data = (pout->div_frac_start >> 8) & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START2, data);
+	data = (pout->div_frac_start >> 16) & 0xf;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START3, data);
+
+	data = pout->plllock_cmp & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP1, data);
+
+	data = (pout->plllock_cmp >> 8) & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP2, data);
+
+	data = (pout->plllock_cmp >> 16) & 0x3;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP3, data);
+
+	data = pin->plllock_cnt << 1 | pin->plllock_rng << 3;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLLLOCK_CMP_EN, data);
+
+	data = pout->pll_vco_count & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_VCO_COUNT1, data);
+	data = (pout->pll_vco_count >> 8) & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_VCO_COUNT2, data);
+
+	data = pout->pll_kvco_count & 0xff;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_KVCO_COUNT1, data);
+	data = (pout->pll_kvco_count >> 8) & 0x3;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_KVCO_COUNT2, data);
+
+	data = (pout->pll_postdiv - 1) << 4 | pin->pll_lpf_res1;
+	pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_LPF2_POSTDIV, data);
+
+	if (pin->ssc_en)
+		pll_db_commit_ssc(pll);
+
+	wmb();	/* make sure register committed */
+}
+
+/*
+ * VCO clock Callbacks
+ */
+static int dsi_pll_14nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
+				     unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	struct dsi_pll_input *pin = &pll_14nm->in;
+	struct dsi_pll_output *pout = &pll_14nm->out;
+
+	DBG("DSI PLL%d rate=%lu, parent's=%lu", pll_14nm->id, rate,
+	    parent_rate);
+
+	pll_14nm->vco_current_rate = rate;
+	pll_14nm->vco_ref_clk_rate = VCO_REF_CLK_RATE;
+
+	dsi_pll_14nm_input_init(pll_14nm);
+
+	/*
+	 * This configures the post divider internal to the VCO. It's
+	 * fixed to divide by 1 for now.
+	 *
+	 * tx_band = pll_postdiv.
+	 * 0: divided by 1
+	 * 1: divided by 2
+	 * 2: divided by 4
+	 * 3: divided by 8
+	 */
+	pout->pll_postdiv = DSI_PLL_DEFAULT_VCO_POSTDIV;
+
+	pll_14nm_dec_frac_calc(pll_14nm);
+
+	if (pin->ssc_en)
+		pll_14nm_ssc_calc(pll_14nm);
+
+	pll_14nm_calc_vco_count(pll_14nm);
+
+	/* commit the slave DSI PLL registers if we're master. Note that we
+	 * don't lock the slave PLL. We just ensure that the PLL/PHY registers
+	 * of the master and slave are identical
+	 */
+	if (pll_14nm->uc == MSM_DSI_PHY_MASTER) {
+		struct dsi_pll_14nm *pll_14nm_slave = pll_14nm->slave;
+
+		pll_db_commit_14nm(pll_14nm_slave, pin, pout);
+	}
+
+	pll_db_commit_14nm(pll_14nm, pin, pout);
+
+	return 0;
+}
+
+static unsigned long dsi_pll_14nm_vco_recalc_rate(struct clk_hw *hw,
+						  unsigned long parent_rate)
+{
+	struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	void __iomem *base = pll_14nm->mmio;
+	u64 vco_rate, multiplier = BIT(20);
+	u32 div_frac_start;
+	u32 dec_start;
+	u64 ref_clk = parent_rate;
+
+	dec_start = pll_read(base + REG_DSI_14nm_PHY_PLL_DEC_START);
+	dec_start &= 0x0ff;
+
+	DBG("dec_start = %x", dec_start);
+
+	div_frac_start = (pll_read(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START3)
+				& 0xf) << 16;
+	div_frac_start |= (pll_read(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START2)
+				& 0xff) << 8;
+	div_frac_start |= pll_read(base + REG_DSI_14nm_PHY_PLL_DIV_FRAC_START1)
+				& 0xff;
+
+	DBG("div_frac_start = %x", div_frac_start);
+
+	vco_rate = ref_clk * dec_start;
+
+	vco_rate += ((ref_clk * div_frac_start) / multiplier);
+
+	/*
+	 * Recalculating the rate from dec_start and frac_start doesn't end up
+	 * the rate we originally set. Convert the freq to KHz, round it up and
+	 * convert it back to MHz.
+	 */
+	vco_rate = DIV_ROUND_UP_ULL(vco_rate, 1000) * 1000;
+
+	DBG("returning vco rate = %lu", (unsigned long)vco_rate);
+
+	return (unsigned long)vco_rate;
+}
+
+static const struct clk_ops clk_ops_dsi_pll_14nm_vco = {
+	.round_rate = msm_dsi_pll_helper_clk_round_rate,
+	.set_rate = dsi_pll_14nm_vco_set_rate,
+	.recalc_rate = dsi_pll_14nm_vco_recalc_rate,
+	.prepare = msm_dsi_pll_helper_clk_prepare,
+	.unprepare = msm_dsi_pll_helper_clk_unprepare,
+};
+
+/*
+ * N1 and N2 post-divider clock callbacks
+ */
+#define div_mask(width)	((1 << (width)) - 1)
+static unsigned long dsi_pll_14nm_postdiv_recalc_rate(struct clk_hw *hw,
+						      unsigned long parent_rate)
+{
+	struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw);
+	struct dsi_pll_14nm *pll_14nm = postdiv->pll;
+	void __iomem *base = pll_14nm->phy_cmn_mmio;
+	u8 shift = postdiv->shift;
+	u8 width = postdiv->width;
+	u32 val;
+
+	DBG("DSI%d PLL parent rate=%lu", pll_14nm->id, parent_rate);
+
+	val = pll_read(base + REG_DSI_14nm_PHY_CMN_CLK_CFG0) >> shift;
+	val &= div_mask(width);
+
+	return divider_recalc_rate(hw, parent_rate, val, NULL,
+				   postdiv->flags);
+}
+
+static long dsi_pll_14nm_postdiv_round_rate(struct clk_hw *hw,
+					    unsigned long rate,
+					    unsigned long *prate)
+{
+	struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw);
+	struct dsi_pll_14nm *pll_14nm = postdiv->pll;
+
+	DBG("DSI%d PLL parent rate=%lu", pll_14nm->id, rate);
+
+	return divider_round_rate(hw, rate, prate, NULL,
+				  postdiv->width,
+				  postdiv->flags);
+}
+
+static int dsi_pll_14nm_postdiv_set_rate(struct clk_hw *hw, unsigned long rate,
+					 unsigned long parent_rate)
+{
+	struct dsi_pll_14nm_postdiv *postdiv = to_pll_14nm_postdiv(hw);
+	struct dsi_pll_14nm *pll_14nm = postdiv->pll;
+	void __iomem *base = pll_14nm->phy_cmn_mmio;
+	spinlock_t *lock = &pll_14nm->postdiv_lock;
+	u8 shift = postdiv->shift;
+	u8 width = postdiv->width;
+	unsigned int value;
+	unsigned long flags = 0;
+	u32 val;
+
+	DBG("DSI%d PLL parent rate=%lu parent rate %lu", pll_14nm->id, rate,
+	    parent_rate);
+
+	value = divider_get_val(rate, parent_rate, NULL, postdiv->width,
+				postdiv->flags);
+
+	spin_lock_irqsave(lock, flags);
+
+	val = pll_read(base + REG_DSI_14nm_PHY_CMN_CLK_CFG0);
+	val &= ~(div_mask(width) << shift);
+
+	val |= value << shift;
+	pll_write(base + REG_DSI_14nm_PHY_CMN_CLK_CFG0, val);
+
+	/* If we're master in dual DSI mode, then the slave PLL's post-dividers
+	 * follow the master's post dividers
+	 */
+	if (pll_14nm->uc == MSM_DSI_PHY_MASTER) {
+		struct dsi_pll_14nm *pll_14nm_slave = pll_14nm->slave;
+		void __iomem *slave_base = pll_14nm_slave->phy_cmn_mmio;
+
+		pll_write(slave_base + REG_DSI_14nm_PHY_CMN_CLK_CFG0, val);
+	}
+
+	spin_unlock_irqrestore(lock, flags);
+
+	return 0;
+}
+
+static const struct clk_ops clk_ops_dsi_pll_14nm_postdiv = {
+	.recalc_rate = dsi_pll_14nm_postdiv_recalc_rate,
+	.round_rate = dsi_pll_14nm_postdiv_round_rate,
+	.set_rate = dsi_pll_14nm_postdiv_set_rate,
+};
+
+/*
+ * PLL Callbacks
+ */
+
+static int dsi_pll_14nm_enable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	void __iomem *base = pll_14nm->mmio;
+	void __iomem *cmn_base = pll_14nm->phy_cmn_mmio;
+	bool locked;
+
+	DBG("");
+
+	pll_write(base + REG_DSI_14nm_PHY_PLL_VREF_CFG1, 0x10);
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_PLL_CNTRL, 1);
+
+	locked = pll_14nm_poll_for_ready(pll_14nm, POLL_MAX_READS,
+					 POLL_TIMEOUT_US);
+
+	if (unlikely(!locked))
+		dev_err(&pll_14nm->pdev->dev, "DSI PLL lock failed\n");
+	else
+		DBG("DSI PLL lock success");
+
+	return locked ? 0 : -EINVAL;
+}
+
+static void dsi_pll_14nm_disable_seq(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	void __iomem *cmn_base = pll_14nm->phy_cmn_mmio;
+
+	DBG("");
+
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_PLL_CNTRL, 0);
+}
+
+static void dsi_pll_14nm_save_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	struct pll_14nm_cached_state *cached_state = &pll_14nm->cached_state;
+	void __iomem *cmn_base = pll_14nm->phy_cmn_mmio;
+	u32 data;
+
+	data = pll_read(cmn_base + REG_DSI_14nm_PHY_CMN_CLK_CFG0);
+
+	cached_state->n1postdiv = data & 0xf;
+	cached_state->n2postdiv = (data >> 4) & 0xf;
+
+	DBG("DSI%d PLL save state %x %x", pll_14nm->id,
+	    cached_state->n1postdiv, cached_state->n2postdiv);
+
+	cached_state->vco_rate = clk_hw_get_rate(&pll->clk_hw);
+}
+
+static int dsi_pll_14nm_restore_state(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	struct pll_14nm_cached_state *cached_state = &pll_14nm->cached_state;
+	void __iomem *cmn_base = pll_14nm->phy_cmn_mmio;
+	u32 data;
+	int ret;
+
+	ret = dsi_pll_14nm_vco_set_rate(&pll->clk_hw,
+					cached_state->vco_rate, 0);
+	if (ret) {
+		dev_err(&pll_14nm->pdev->dev,
+			"restore vco rate failed. ret=%d\n", ret);
+		return ret;
+	}
+
+	data = cached_state->n1postdiv | (cached_state->n2postdiv << 4);
+
+	DBG("DSI%d PLL restore state %x %x", pll_14nm->id,
+	    cached_state->n1postdiv, cached_state->n2postdiv);
+
+	pll_write(cmn_base + REG_DSI_14nm_PHY_CMN_CLK_CFG0, data);
+
+	/* also restore post-dividers for slave DSI PLL */
+	if (pll_14nm->uc == MSM_DSI_PHY_MASTER) {
+		struct dsi_pll_14nm *pll_14nm_slave = pll_14nm->slave;
+		void __iomem *slave_base = pll_14nm_slave->phy_cmn_mmio;
+
+		pll_write(slave_base + REG_DSI_14nm_PHY_CMN_CLK_CFG0, data);
+	}
+
+	return 0;
+}
+
+static int dsi_pll_14nm_set_usecase(struct msm_dsi_pll *pll,
+				    enum msm_dsi_phy_usecase uc)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	void __iomem *base = pll_14nm->mmio;
+	u32 clkbuflr_en, bandgap = 0;
+
+	switch (uc) {
+	case MSM_DSI_PHY_STANDALONE:
+		clkbuflr_en = 0x1;
+		break;
+	case MSM_DSI_PHY_MASTER:
+		clkbuflr_en = 0x3;
+		pll_14nm->slave = pll_14nm_list[(pll_14nm->id + 1) % DSI_MAX];
+		break;
+	case MSM_DSI_PHY_SLAVE:
+		clkbuflr_en = 0x0;
+		bandgap = 0x3;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	pll_write(base + REG_DSI_14nm_PHY_PLL_CLKBUFLR_EN, clkbuflr_en);
+	if (bandgap)
+		pll_write(base + REG_DSI_14nm_PHY_PLL_PLL_BANDGAP, bandgap);
+
+	pll_14nm->uc = uc;
+
+	return 0;
+}
+
+static int dsi_pll_14nm_get_provider(struct msm_dsi_pll *pll,
+				     struct clk **byte_clk_provider,
+				     struct clk **pixel_clk_provider)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	struct clk_hw_onecell_data *hw_data = pll_14nm->hw_data;
+
+	if (byte_clk_provider)
+		*byte_clk_provider = hw_data->hws[DSI_BYTE_PLL_CLK]->clk;
+	if (pixel_clk_provider)
+		*pixel_clk_provider = hw_data->hws[DSI_PIXEL_PLL_CLK]->clk;
+
+	return 0;
+}
+
+static void dsi_pll_14nm_destroy(struct msm_dsi_pll *pll)
+{
+	struct dsi_pll_14nm *pll_14nm = to_pll_14nm(pll);
+	struct platform_device *pdev = pll_14nm->pdev;
+	int num_hws = pll_14nm->num_hws;
+
+	of_clk_del_provider(pdev->dev.of_node);
+
+	while (num_hws--)
+		clk_hw_unregister(pll_14nm->hws[num_hws]);
+}
+
+static struct clk_hw *pll_14nm_postdiv_register(struct dsi_pll_14nm *pll_14nm,
+						const char *name,
+						const char *parent_name,
+						unsigned long flags,
+						u8 shift)
+{
+	struct dsi_pll_14nm_postdiv *pll_postdiv;
+	struct device *dev = &pll_14nm->pdev->dev;
+	struct clk_init_data postdiv_init = {
+		.parent_names = (const char *[]) { parent_name },
+		.num_parents = 1,
+		.name = name,
+		.flags = flags,
+		.ops = &clk_ops_dsi_pll_14nm_postdiv,
+	};
+	int ret;
+
+	pll_postdiv = devm_kzalloc(dev, sizeof(*pll_postdiv), GFP_KERNEL);
+	if (!pll_postdiv)
+		return ERR_PTR(-ENOMEM);
+
+	pll_postdiv->pll = pll_14nm;
+	pll_postdiv->shift = shift;
+	/* both N1 and N2 postdividers are 4 bits wide */
+	pll_postdiv->width = 4;
+	/* range of each divider is from 1 to 15 */
+	pll_postdiv->flags = CLK_DIVIDER_ONE_BASED;
+	pll_postdiv->hw.init = &postdiv_init;
+
+	ret = clk_hw_register(dev, &pll_postdiv->hw);
+	if (ret)
+		return ERR_PTR(ret);
+
+	return &pll_postdiv->hw;
+}
+
+static int pll_14nm_register(struct dsi_pll_14nm *pll_14nm)
+{
+	char clk_name[32], parent[32], vco_name[32];
+	struct clk_init_data vco_init = {
+		.parent_names = (const char *[]){ "xo" },
+		.num_parents = 1,
+		.name = vco_name,
+		.flags = CLK_IGNORE_UNUSED,
+		.ops = &clk_ops_dsi_pll_14nm_vco,
+	};
+	struct device *dev = &pll_14nm->pdev->dev;
+	struct clk_hw **hws = pll_14nm->hws;
+	struct clk_hw_onecell_data *hw_data;
+	struct clk_hw *hw;
+	int num = 0;
+	int ret;
+
+	DBG("DSI%d", pll_14nm->id);
+
+	hw_data = devm_kzalloc(dev, sizeof(*hw_data) +
+			       NUM_PROVIDED_CLKS * sizeof(struct clk_hw *),
+			       GFP_KERNEL);
+	if (!hw_data)
+		return -ENOMEM;
+
+	snprintf(vco_name, 32, "dsi%dvco_clk", pll_14nm->id);
+	pll_14nm->base.clk_hw.init = &vco_init;
+
+	ret = clk_hw_register(dev, &pll_14nm->base.clk_hw);
+	if (ret)
+		return ret;
+
+	hws[num++] = &pll_14nm->base.clk_hw;
+
+	snprintf(clk_name, 32, "dsi%dn1_postdiv_clk", pll_14nm->id);
+	snprintf(parent, 32, "dsi%dvco_clk", pll_14nm->id);
+
+	/* N1 postdiv, bits 0-3 in REG_DSI_14nm_PHY_CMN_CLK_CFG0 */
+	hw = pll_14nm_postdiv_register(pll_14nm, clk_name, parent,
+				       CLK_SET_RATE_PARENT, 0);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
+	hws[num++] = hw;
+
+	snprintf(clk_name, 32, "dsi%dpllbyte", pll_14nm->id);
+	snprintf(parent, 32, "dsi%dn1_postdiv_clk", pll_14nm->id);
+
+	/* DSI Byte clock = VCO_CLK / N1 / 8 */
+	hw = clk_hw_register_fixed_factor(dev, clk_name, parent,
+					  CLK_SET_RATE_PARENT, 1, 8);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
+	hws[num++] = hw;
+	hw_data->hws[DSI_BYTE_PLL_CLK] = hw;
+
+	snprintf(clk_name, 32, "dsi%dn1_postdivby2_clk", pll_14nm->id);
+	snprintf(parent, 32, "dsi%dn1_postdiv_clk", pll_14nm->id);
+
+	/*
+	 * Skip the mux for now, force DSICLK_SEL to 1, Add a /2 divider
+	 * on the way. Don't let it set parent.
+	 */
+	hw = clk_hw_register_fixed_factor(dev, clk_name, parent, 0, 1, 2);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
+	hws[num++] = hw;
+
+	snprintf(clk_name, 32, "dsi%dpll", pll_14nm->id);
+	snprintf(parent, 32, "dsi%dn1_postdivby2_clk", pll_14nm->id);
+
+	/* DSI pixel clock = VCO_CLK / N1 / 2 / N2
+	 * This is the output of N2 post-divider, bits 4-7 in
+	 * REG_DSI_14nm_PHY_CMN_CLK_CFG0. Don't let it set parent.
+	 */
+	hw = pll_14nm_postdiv_register(pll_14nm, clk_name, parent, 0, 4);
+	if (IS_ERR(hw))
+		return PTR_ERR(hw);
+
+	hws[num++] = hw;
+	hw_data->hws[DSI_PIXEL_PLL_CLK]	= hw;
+
+	pll_14nm->num_hws = num;
+
+	hw_data->num = NUM_PROVIDED_CLKS;
+	pll_14nm->hw_data = hw_data;
+
+	ret = of_clk_add_hw_provider(dev->of_node, of_clk_hw_onecell_get,
+				     pll_14nm->hw_data);
+	if (ret) {
+		dev_err(dev, "failed to register clk provider: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+struct msm_dsi_pll *msm_dsi_pll_14nm_init(struct platform_device *pdev, int id)
+{
+	struct dsi_pll_14nm *pll_14nm;
+	struct msm_dsi_pll *pll;
+	int ret;
+
+	if (!pdev)
+		return ERR_PTR(-ENODEV);
+
+	pll_14nm = devm_kzalloc(&pdev->dev, sizeof(*pll_14nm), GFP_KERNEL);
+	if (!pll_14nm)
+		return ERR_PTR(-ENOMEM);
+
+	DBG("PLL%d", id);
+
+	pll_14nm->pdev = pdev;
+	pll_14nm->id = id;
+	pll_14nm_list[id] = pll_14nm;
+
+	pll_14nm->phy_cmn_mmio = msm_ioremap(pdev, "dsi_phy", "DSI_PHY");
+	if (IS_ERR_OR_NULL(pll_14nm->phy_cmn_mmio)) {
+		dev_err(&pdev->dev, "failed to map CMN PHY base\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pll_14nm->mmio = msm_ioremap(pdev, "dsi_pll", "DSI_PLL");
+	if (IS_ERR_OR_NULL(pll_14nm->mmio)) {
+		dev_err(&pdev->dev, "failed to map PLL base\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	spin_lock_init(&pll_14nm->postdiv_lock);
+
+	pll = &pll_14nm->base;
+	pll->min_rate = VCO_MIN_RATE;
+	pll->max_rate = VCO_MAX_RATE;
+	pll->get_provider = dsi_pll_14nm_get_provider;
+	pll->destroy = dsi_pll_14nm_destroy;
+	pll->disable_seq = dsi_pll_14nm_disable_seq;
+	pll->save_state = dsi_pll_14nm_save_state;
+	pll->restore_state = dsi_pll_14nm_restore_state;
+	pll->set_usecase = dsi_pll_14nm_set_usecase;
+
+	pll_14nm->vco_delay = 1;
+
+	pll->en_seq_cnt = 1;
+	pll->enable_seqs[0] = dsi_pll_14nm_enable_seq;
+
+	ret = pll_14nm_register(pll_14nm);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to register PLL: %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	return pll;
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index b782efd4b95f..94ea963519b2 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -260,8 +260,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
 	struct drm_encoder *encoder;
 	struct drm_connector *connector;
 	struct device_node *panel_node;
-	struct drm_encoder *dsi_encs[MSM_DSI_ENCODER_NUM];
-	int i, dsi_id;
+	int dsi_id;
 	int ret;
 
 	switch (intf_type) {
@@ -322,22 +321,19 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
 		if (!priv->dsi[dsi_id])
 			break;
 
-		for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
-			dsi_encs[i] = mdp4_dsi_encoder_init(dev);
-			if (IS_ERR(dsi_encs[i])) {
-				ret = PTR_ERR(dsi_encs[i]);
-				dev_err(dev->dev,
-					"failed to construct DSI encoder: %d\n",
-					ret);
-				return ret;
-			}
-
-			/* TODO: Add DMA_S later? */
-			dsi_encs[i]->possible_crtcs = 1 << DMA_P;
-			priv->encoders[priv->num_encoders++] = dsi_encs[i];
+		encoder = mdp4_dsi_encoder_init(dev);
+		if (IS_ERR(encoder)) {
+			ret = PTR_ERR(encoder);
+			dev_err(dev->dev,
+				"failed to construct DSI encoder: %d\n", ret);
+			return ret;
 		}
 
-		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, dsi_encs);
+		/* TODO: Add DMA_S later? */
+		encoder->possible_crtcs = 1 << DMA_P;
+		priv->encoders[priv->num_encoders++] = encoder;
+
+		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, encoder);
 		if (ret) {
 			dev_err(dev->dev, "failed to initialize DSI: %d\n",
 				ret);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
index 27d5371acee0..e6dfc518d4db 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5.xml.h
@@ -8,19 +8,11 @@ http://github.com/freedreno/envytools/
 git clone https://github.com/freedreno/envytools.git
 
 The rules-ng-ng source files this header was generated from are:
-- /home/robclark/src/freedreno/envytools/rnndb/msm.xml                 (    676 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-02-10 17:07:21)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp4.xml            (  20915 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2015-09-18 12:07:28)
-- /home/robclark/src/freedreno/envytools/rnndb/mdp/mdp5.xml            (  36965 bytes, from 2016-11-26 23:01:08)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/dsi.xml             (  27887 bytes, from 2015-10-22 16:34:52)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/sfpb.xml            (    602 bytes, from 2015-10-22 16:35:02)
-- /home/robclark/src/freedreno/envytools/rnndb/dsi/mmss_cc.xml         (   1686 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/qfprom.xml         (    600 bytes, from 2015-05-20 20:03:07)
-- /home/robclark/src/freedreno/envytools/rnndb/hdmi/hdmi.xml           (  41472 bytes, from 2016-01-22 18:18:18)
-- /home/robclark/src/freedreno/envytools/rnndb/edp/edp.xml             (  10416 bytes, from 2015-05-20 20:03:14)
-
-Copyright (C) 2013-2016 by the following authors:
+- /local/mnt/workspace/source_trees/envytools/rnndb/../rnndb/mdp/mdp5.xml   (  37411 bytes, from 2017-01-11 05:19:19)
+- /local/mnt/workspace/source_trees/envytools/rnndb/freedreno_copyright.xml (   1572 bytes, from 2016-05-09 06:32:54)
+- /local/mnt/workspace/source_trees/envytools/rnndb/mdp/mdp_common.xml      (   2849 bytes, from 2016-01-07 08:45:55)
+
+Copyright (C) 2013-2017 by the following authors:
 - Rob Clark <robdclark@gmail.com> (robclark)
 - Ilia Mirkin <imirkin@alum.mit.edu> (imirkin)
 
@@ -65,16 +57,19 @@ enum mdp5_intfnum {
 };
 
 enum mdp5_pipe {
-	SSPP_VIG0 = 0,
-	SSPP_VIG1 = 1,
-	SSPP_VIG2 = 2,
-	SSPP_RGB0 = 3,
-	SSPP_RGB1 = 4,
-	SSPP_RGB2 = 5,
-	SSPP_DMA0 = 6,
-	SSPP_DMA1 = 7,
-	SSPP_VIG3 = 8,
-	SSPP_RGB3 = 9,
+	SSPP_NONE = 0,
+	SSPP_VIG0 = 1,
+	SSPP_VIG1 = 2,
+	SSPP_VIG2 = 3,
+	SSPP_RGB0 = 4,
+	SSPP_RGB1 = 5,
+	SSPP_RGB2 = 6,
+	SSPP_DMA0 = 7,
+	SSPP_DMA1 = 8,
+	SSPP_VIG3 = 9,
+	SSPP_RGB3 = 10,
+	SSPP_CURSOR0 = 11,
+	SSPP_CURSOR1 = 12,
 };
 
 enum mdp5_ctl_mode {
@@ -532,6 +527,7 @@ static inline uint32_t MDP5_CTL_LAYER_EXT_REG_CURSOR1(enum mdp_mixer_stage_id va
 static inline uint32_t __offset_PIPE(enum mdp5_pipe idx)
 {
 	switch (idx) {
+		case SSPP_NONE: return (INVALID_IDX(idx));
 		case SSPP_VIG0: return (mdp5_cfg->pipe_vig.base[0]);
 		case SSPP_VIG1: return (mdp5_cfg->pipe_vig.base[1]);
 		case SSPP_VIG2: return (mdp5_cfg->pipe_vig.base[2]);
@@ -542,6 +538,8 @@ static inline uint32_t __offset_PIPE(enum mdp5_pipe idx)
 		case SSPP_DMA1: return (mdp5_cfg->pipe_dma.base[1]);
 		case SSPP_VIG3: return (mdp5_cfg->pipe_vig.base[3]);
 		case SSPP_RGB3: return (mdp5_cfg->pipe_rgb.base[3]);
+		case SSPP_CURSOR0: return (mdp5_cfg->pipe_cursor.base[0]);
+		case SSPP_CURSOR1: return (mdp5_cfg->pipe_cursor.base[1]);
 		default: return INVALID_IDX(idx);
 	}
 }
@@ -1073,6 +1071,10 @@ static inline uint32_t REG_MDP5_LM_BLEND_COLOR_OUT(uint32_t i0) { return 0x00000
 #define MDP5_LM_BLEND_COLOR_OUT_STAGE1_FG_ALPHA			0x00000004
 #define MDP5_LM_BLEND_COLOR_OUT_STAGE2_FG_ALPHA			0x00000008
 #define MDP5_LM_BLEND_COLOR_OUT_STAGE3_FG_ALPHA			0x00000010
+#define MDP5_LM_BLEND_COLOR_OUT_STAGE4_FG_ALPHA			0x00000020
+#define MDP5_LM_BLEND_COLOR_OUT_STAGE5_FG_ALPHA			0x00000040
+#define MDP5_LM_BLEND_COLOR_OUT_STAGE6_FG_ALPHA			0x00000080
+#define MDP5_LM_BLEND_COLOR_OUT_SPLIT_LEFT_RIGHT		0x80000000
 
 static inline uint32_t REG_MDP5_LM_OUT_SIZE(uint32_t i0) { return 0x00000004 + __offset_LM(i0); }
 #define MDP5_LM_OUT_SIZE_HEIGHT__MASK				0xffff0000
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index 618b2ffed9b4..34ab553f6897 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -421,6 +421,16 @@ const struct mdp5_cfg_hw msm8x96_config = {
 			MDP_PIPE_CAP_SW_PIX_EXT	|
 			0,
 	},
+	.pipe_cursor = {
+		.count = 2,
+		.base = { 0x34000, 0x36000 },
+		.caps = MDP_PIPE_CAP_HFLIP	|
+			MDP_PIPE_CAP_VFLIP	|
+			MDP_PIPE_CAP_SW_PIX_EXT	|
+			MDP_PIPE_CAP_CURSOR	|
+			0,
+	},
+
 	.lm = {
 		.count = 6,
 		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
index 050e1618c836..b1c7daaede86 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
@@ -32,7 +32,7 @@ extern const struct mdp5_cfg_hw *mdp5_cfg;
 typedef DECLARE_BITMAP(mdp5_smp_state_t, MAX_SMP_BLOCKS);
 
 #define MDP5_SUB_BLOCK_DEFINITION \
-	int count; \
+	unsigned int count; \
 	uint32_t base[MAX_BASES]
 
 struct mdp5_sub_block {
@@ -85,6 +85,7 @@ struct mdp5_cfg_hw {
 	struct mdp5_pipe_block pipe_vig;
 	struct mdp5_pipe_block pipe_rgb;
 	struct mdp5_pipe_block pipe_dma;
+	struct mdp5_pipe_block pipe_cursor;
 	struct mdp5_lm_block  lm;
 	struct mdp5_sub_block dspp;
 	struct mdp5_sub_block ad;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index c627ab6d0061..df1c8adec3f3 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -16,16 +16,6 @@
 #include "drm_crtc.h"
 #include "drm_crtc_helper.h"
 
-struct mdp5_cmd_encoder {
-	struct drm_encoder base;
-	struct mdp5_interface intf;
-	bool enabled;
-	uint32_t bsc;
-
-	struct mdp5_ctl *ctl;
-};
-#define to_mdp5_cmd_encoder(x) container_of(x, struct mdp5_cmd_encoder, base)
-
 static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 {
 	struct msm_drm_private *priv = encoder->dev->dev_private;
@@ -36,47 +26,8 @@ static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 #include <mach/board.h>
 #include <linux/msm-bus.h>
 #include <linux/msm-bus-board.h>
-#define MDP_BUS_VECTOR_ENTRY(ab_val, ib_val)		\
-	{						\
-		.src = MSM_BUS_MASTER_MDP_PORT0,	\
-		.dst = MSM_BUS_SLAVE_EBI_CH0,		\
-		.ab = (ab_val),				\
-		.ib = (ib_val),				\
-	}
-
-static struct msm_bus_vectors mdp_bus_vectors[] = {
-	MDP_BUS_VECTOR_ENTRY(0, 0),
-	MDP_BUS_VECTOR_ENTRY(2000000000, 2000000000),
-};
-static struct msm_bus_paths mdp_bus_usecases[] = { {
-		.num_paths = 1,
-		.vectors = &mdp_bus_vectors[0],
-}, {
-		.num_paths = 1,
-		.vectors = &mdp_bus_vectors[1],
-} };
-static struct msm_bus_scale_pdata mdp_bus_scale_table = {
-	.usecase = mdp_bus_usecases,
-	.num_usecases = ARRAY_SIZE(mdp_bus_usecases),
-	.name = "mdss_mdp",
-};
-
-static void bs_init(struct mdp5_cmd_encoder *mdp5_cmd_enc)
-{
-	mdp5_cmd_enc->bsc = msm_bus_scale_register_client(
-			&mdp_bus_scale_table);
-	DBG("bus scale client: %08x", mdp5_cmd_enc->bsc);
-}
-
-static void bs_fini(struct mdp5_cmd_encoder *mdp5_cmd_enc)
-{
-	if (mdp5_cmd_enc->bsc) {
-		msm_bus_scale_unregister_client(mdp5_cmd_enc->bsc);
-		mdp5_cmd_enc->bsc = 0;
-	}
-}
 
-static void bs_set(struct mdp5_cmd_encoder *mdp5_cmd_enc, int idx)
+static void bs_set(struct mdp5_encoder *mdp5_cmd_enc, int idx)
 {
 	if (mdp5_cmd_enc->bsc) {
 		DBG("set bus scaling: %d", idx);
@@ -89,14 +40,12 @@ static void bs_set(struct mdp5_cmd_encoder *mdp5_cmd_enc, int idx)
 	}
 }
 #else
-static void bs_init(struct mdp5_cmd_encoder *mdp5_cmd_enc) {}
-static void bs_fini(struct mdp5_cmd_encoder *mdp5_cmd_enc) {}
-static void bs_set(struct mdp5_cmd_encoder *mdp5_cmd_enc, int idx) {}
+static void bs_set(struct mdp5_encoder *mdp5_cmd_enc, int idx) {}
 #endif
 
 #define VSYNC_CLK_RATE 19200000
 static int pingpong_tearcheck_setup(struct drm_encoder *encoder,
-					struct drm_display_mode *mode)
+				    struct drm_display_mode *mode)
 {
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
 	struct device *dev = encoder->dev->dev;
@@ -176,23 +125,11 @@ static void pingpong_tearcheck_disable(struct drm_encoder *encoder)
 	clk_disable_unprepare(mdp5_kms->vsync_clk);
 }
 
-static void mdp5_cmd_encoder_destroy(struct drm_encoder *encoder)
-{
-	struct mdp5_cmd_encoder *mdp5_cmd_enc = to_mdp5_cmd_encoder(encoder);
-	bs_fini(mdp5_cmd_enc);
-	drm_encoder_cleanup(encoder);
-	kfree(mdp5_cmd_enc);
-}
-
-static const struct drm_encoder_funcs mdp5_cmd_encoder_funcs = {
-	.destroy = mdp5_cmd_encoder_destroy,
-};
-
-static void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
-		struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
+void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode)
 {
-	struct mdp5_cmd_encoder *mdp5_cmd_enc = to_mdp5_cmd_encoder(encoder);
+	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 
 	mode = adjusted_mode;
 
@@ -209,9 +146,9 @@ static void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
 				mdp5_cmd_enc->ctl);
 }
 
-static void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
+void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
 {
-	struct mdp5_cmd_encoder *mdp5_cmd_enc = to_mdp5_cmd_encoder(encoder);
+	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 	struct mdp5_ctl *ctl = mdp5_cmd_enc->ctl;
 	struct mdp5_interface *intf = &mdp5_cmd_enc->intf;
 
@@ -228,9 +165,9 @@ static void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
 	mdp5_cmd_enc->enabled = false;
 }
 
-static void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
+void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 {
-	struct mdp5_cmd_encoder *mdp5_cmd_enc = to_mdp5_cmd_encoder(encoder);
+	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 	struct mdp5_ctl *ctl = mdp5_cmd_enc->ctl;
 	struct mdp5_interface *intf = &mdp5_cmd_enc->intf;
 
@@ -248,16 +185,10 @@ static void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 	mdp5_cmd_enc->enabled = true;
 }
 
-static const struct drm_encoder_helper_funcs mdp5_cmd_encoder_helper_funcs = {
-	.mode_set = mdp5_cmd_encoder_mode_set,
-	.disable = mdp5_cmd_encoder_disable,
-	.enable = mdp5_cmd_encoder_enable,
-};
-
 int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
-					struct drm_encoder *slave_encoder)
+				       struct drm_encoder *slave_encoder)
 {
-	struct mdp5_cmd_encoder *mdp5_cmd_enc = to_mdp5_cmd_encoder(encoder);
+	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms;
 	int intf_num;
 	u32 data = 0;
@@ -292,43 +223,3 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
 
 	return 0;
 }
-
-/* initialize command mode encoder */
-struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
-			struct mdp5_interface *intf, struct mdp5_ctl *ctl)
-{
-	struct drm_encoder *encoder = NULL;
-	struct mdp5_cmd_encoder *mdp5_cmd_enc;
-	int ret;
-
-	if (WARN_ON((intf->type != INTF_DSI) &&
-		(intf->mode != MDP5_INTF_DSI_MODE_COMMAND))) {
-		ret = -EINVAL;
-		goto fail;
-	}
-
-	mdp5_cmd_enc = kzalloc(sizeof(*mdp5_cmd_enc), GFP_KERNEL);
-	if (!mdp5_cmd_enc) {
-		ret = -ENOMEM;
-		goto fail;
-	}
-
-	memcpy(&mdp5_cmd_enc->intf, intf, sizeof(mdp5_cmd_enc->intf));
-	encoder = &mdp5_cmd_enc->base;
-	mdp5_cmd_enc->ctl = ctl;
-
-	drm_encoder_init(dev, encoder, &mdp5_cmd_encoder_funcs,
-			DRM_MODE_ENCODER_DSI, NULL);
-
-	drm_encoder_helper_add(encoder, &mdp5_cmd_encoder_helper_funcs);
-
-	bs_init(mdp5_cmd_enc);
-
-	return encoder;
-
-fail:
-	if (encoder)
-		mdp5_cmd_encoder_destroy(encoder);
-
-	return ERR_PTR(ret);
-}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 1ce8a01a5a28..d0c8b38b96ce 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -177,6 +177,21 @@ static void mdp5_crtc_destroy(struct drm_crtc *crtc)
 	kfree(mdp5_crtc);
 }
 
+static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage)
+{
+	switch (stage) {
+	case STAGE0: return MDP5_LM_BLEND_COLOR_OUT_STAGE0_FG_ALPHA;
+	case STAGE1: return MDP5_LM_BLEND_COLOR_OUT_STAGE1_FG_ALPHA;
+	case STAGE2: return MDP5_LM_BLEND_COLOR_OUT_STAGE2_FG_ALPHA;
+	case STAGE3: return MDP5_LM_BLEND_COLOR_OUT_STAGE3_FG_ALPHA;
+	case STAGE4: return MDP5_LM_BLEND_COLOR_OUT_STAGE4_FG_ALPHA;
+	case STAGE5: return MDP5_LM_BLEND_COLOR_OUT_STAGE5_FG_ALPHA;
+	case STAGE6: return MDP5_LM_BLEND_COLOR_OUT_STAGE6_FG_ALPHA;
+	default:
+		return 0;
+	}
+}
+
 /*
  * blend_setup() - blend all the planes of a CRTC
  *
@@ -195,8 +210,10 @@ static void blend_setup(struct drm_crtc *crtc)
 	uint32_t lm = mdp5_crtc->lm;
 	uint32_t blend_op, fg_alpha, bg_alpha, ctl_blend_flags = 0;
 	unsigned long flags;
-	uint8_t stage[STAGE_MAX + 1];
+	enum mdp5_pipe stage[STAGE_MAX + 1] = { SSPP_NONE };
 	int i, plane_cnt = 0;
+	bool bg_alpha_enabled = false;
+	u32 mixer_op_mode = 0;
 #define blender(stage)	((stage) - STAGE0)
 
 	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
@@ -218,6 +235,11 @@ static void blend_setup(struct drm_crtc *crtc)
 	if (!pstates[STAGE_BASE]) {
 		ctl_blend_flags |= MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT;
 		DBG("Border Color is enabled");
+	} else if (plane_cnt) {
+		format = to_mdp_format(msm_framebuffer_format(pstates[STAGE_BASE]->base.fb));
+
+		if (format->alpha_enable)
+			bg_alpha_enabled = true;
 	}
 
 	/* The reset for blending */
@@ -232,6 +254,12 @@ static void blend_setup(struct drm_crtc *crtc)
 			MDP5_LM_BLEND_OP_MODE_BG_ALPHA(BG_CONST);
 		fg_alpha = pstates[i]->alpha;
 		bg_alpha = 0xFF - pstates[i]->alpha;
+
+		if (!format->alpha_enable && bg_alpha_enabled)
+			mixer_op_mode = 0;
+		else
+			mixer_op_mode |= mdp5_lm_use_fg_alpha_mask(i);
+
 		DBG("Stage %d fg_alpha %x bg_alpha %x", i, fg_alpha, bg_alpha);
 
 		if (format->alpha_enable && pstates[i]->premultiplied) {
@@ -268,6 +296,8 @@ static void blend_setup(struct drm_crtc *crtc)
 				blender(i)), bg_alpha);
 	}
 
+	mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm), mixer_op_mode);
+
 	mdp5_ctl_blend(mdp5_crtc->ctl, stage, plane_cnt, ctl_blend_flags);
 
 out:
@@ -370,6 +400,7 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 	struct plane_state pstates[STAGE_MAX + 1];
 	const struct mdp5_cfg_hw *hw_cfg;
 	const struct drm_plane_state *pstate;
+	bool cursor_plane = false;
 	int cnt = 0, base = 0, i;
 
 	DBG("%s: check", crtc->name);
@@ -379,6 +410,9 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 		pstates[cnt].state = to_mdp5_plane_state(pstate);
 
 		cnt++;
+
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
+			cursor_plane = true;
 	}
 
 	/* assign a stage based on sorted zpos property */
@@ -390,6 +424,10 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 	if ((cnt > 0) && !is_fullscreen(state, &pstates[0].state->base))
 		base++;
 
+	/* trigger a warning if cursor isn't the highest zorder */
+	WARN_ON(cursor_plane &&
+		(pstates[cnt - 1].plane->type != DRM_PLANE_TYPE_CURSOR));
+
 	/* verify that there are not too many planes attached to crtc
 	 * and that we don't have conflicting mixer stages:
 	 */
@@ -401,7 +439,10 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 	}
 
 	for (i = 0; i < cnt; i++) {
-		pstates[i].state->stage = STAGE_BASE + i + base;
+		if (cursor_plane && (i == (cnt - 1)))
+			pstates[i].state->stage = hw_cfg->lm.nb_stages;
+		else
+			pstates[i].state->stage = STAGE_BASE + i + base;
 		DBG("%s: assign pipe %s on stage=%d", crtc->name,
 				pstates[i].plane->name,
 				pstates[i].state->stage);
@@ -612,6 +653,16 @@ static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.cursor_move = mdp5_crtc_cursor_move,
 };
 
+static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
+	.set_config = drm_atomic_helper_set_config,
+	.destroy = mdp5_crtc_destroy,
+	.page_flip = drm_atomic_helper_page_flip,
+	.set_property = drm_atomic_helper_crtc_set_property,
+	.reset = drm_atomic_helper_crtc_reset,
+	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+};
+
 static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = {
 	.mode_set_nofb = mdp5_crtc_mode_set_nofb,
 	.disable = mdp5_crtc_disable,
@@ -727,6 +778,13 @@ void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
 	mdp5_ctl_set_pipeline(ctl, intf, lm);
 }
 
+struct mdp5_ctl *mdp5_crtc_get_ctl(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+
+	return mdp5_crtc->ctl;
+}
+
 int mdp5_crtc_get_lm(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
@@ -745,7 +803,8 @@ void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc)
 
 /* initialize crtc */
 struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
-		struct drm_plane *plane, int id)
+				struct drm_plane *plane,
+				struct drm_plane *cursor_plane, int id)
 {
 	struct drm_crtc *crtc = NULL;
 	struct mdp5_crtc *mdp5_crtc;
@@ -766,8 +825,12 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 	mdp5_crtc->vblank.irq = mdp5_crtc_vblank_irq;
 	mdp5_crtc->err.irq = mdp5_crtc_err_irq;
 
-	drm_crtc_init_with_planes(dev, crtc, plane, NULL, &mdp5_crtc_funcs,
-				  NULL);
+	if (cursor_plane)
+		drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane,
+					  &mdp5_crtc_no_lm_cursor_funcs, NULL);
+	else
+		drm_crtc_init_with_planes(dev, crtc, plane, NULL,
+					  &mdp5_crtc_funcs, NULL);
 
 	drm_flip_work_init(&mdp5_crtc->unref_cursor_work,
 			"unref cursor", unref_cursor_worker);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
index d021edc3b307..8b93f7e13200 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
@@ -326,6 +326,8 @@ static u32 mdp_ctl_blend_mask(enum mdp5_pipe pipe,
 	case SSPP_DMA1: return MDP5_CTL_LAYER_REG_DMA1(stage);
 	case SSPP_VIG3: return MDP5_CTL_LAYER_REG_VIG3(stage);
 	case SSPP_RGB3: return MDP5_CTL_LAYER_REG_RGB3(stage);
+	case SSPP_CURSOR0:
+	case SSPP_CURSOR1:
 	default:	return 0;
 	}
 }
@@ -333,7 +335,7 @@ static u32 mdp_ctl_blend_mask(enum mdp5_pipe pipe,
 static u32 mdp_ctl_blend_ext_mask(enum mdp5_pipe pipe,
 		enum mdp_mixer_stage_id stage)
 {
-	if (stage < STAGE6)
+	if (stage < STAGE6 && (pipe != SSPP_CURSOR0 && pipe != SSPP_CURSOR1))
 		return 0;
 
 	switch (pipe) {
@@ -347,12 +349,14 @@ static u32 mdp_ctl_blend_ext_mask(enum mdp5_pipe pipe,
 	case SSPP_DMA1: return MDP5_CTL_LAYER_EXT_REG_DMA1_BIT3;
 	case SSPP_VIG3: return MDP5_CTL_LAYER_EXT_REG_VIG3_BIT3;
 	case SSPP_RGB3: return MDP5_CTL_LAYER_EXT_REG_RGB3_BIT3;
+	case SSPP_CURSOR0: return MDP5_CTL_LAYER_EXT_REG_CURSOR0(stage);
+	case SSPP_CURSOR1: return MDP5_CTL_LAYER_EXT_REG_CURSOR1(stage);
 	default:	return 0;
 	}
 }
 
-int mdp5_ctl_blend(struct mdp5_ctl *ctl, u8 *stage, u32 stage_cnt,
-	u32 ctl_blend_op_flags)
+int mdp5_ctl_blend(struct mdp5_ctl *ctl, enum mdp5_pipe *stage, u32 stage_cnt,
+		   u32 ctl_blend_op_flags)
 {
 	unsigned long flags;
 	u32 blend_cfg = 0, blend_ext_cfg = 0;
@@ -365,7 +369,7 @@ int mdp5_ctl_blend(struct mdp5_ctl *ctl, u8 *stage, u32 stage_cnt,
 		start_stage = STAGE_BASE;
 	}
 
-	for (i = start_stage; i < start_stage + stage_cnt; i++) {
+	for (i = start_stage; stage_cnt && i <= STAGE_MAX; i++) {
 		blend_cfg |= mdp_ctl_blend_mask(stage[i], i);
 		blend_ext_cfg |= mdp_ctl_blend_ext_mask(stage[i], i);
 	}
@@ -422,6 +426,8 @@ u32 mdp_ctl_flush_mask_pipe(enum mdp5_pipe pipe)
 	case SSPP_DMA1: return MDP5_CTL_FLUSH_DMA1;
 	case SSPP_VIG3: return MDP5_CTL_FLUSH_VIG3;
 	case SSPP_RGB3: return MDP5_CTL_FLUSH_RGB3;
+	case SSPP_CURSOR0: return MDP5_CTL_FLUSH_CURSOR_0;
+	case SSPP_CURSOR1: return MDP5_CTL_FLUSH_CURSOR_1;
 	default:        return 0;
 	}
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
index 96148c6f863c..fda00d33e4db 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
@@ -56,8 +56,8 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable);
  * (call mdp5_ctl_commit() with mdp_ctl_flush_mask_ctl() mask)
  */
 #define MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT	BIT(0)
-int mdp5_ctl_blend(struct mdp5_ctl *ctl, u8 *stage, u32 stage_cnt,
-	u32 ctl_blend_op_flags);
+int mdp5_ctl_blend(struct mdp5_ctl *ctl, enum mdp5_pipe *stage, u32 stage_cnt,
+		   u32 ctl_blend_op_flags);
 
 /**
  * mdp_ctl_flush_mask...() - Register FLUSH masks
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index fe0c22230883..80fa482ae8ed 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -21,17 +21,6 @@
 #include "drm_crtc.h"
 #include "drm_crtc_helper.h"
 
-struct mdp5_encoder {
-	struct drm_encoder base;
-	struct mdp5_interface intf;
-	spinlock_t intf_lock;	/* protect REG_MDP5_INTF_* registers */
-	bool enabled;
-	uint32_t bsc;
-
-	struct mdp5_ctl *ctl;
-};
-#define to_mdp5_encoder(x) container_of(x, struct mdp5_encoder, base)
-
 static struct mdp5_kms *get_kms(struct drm_encoder *encoder)
 {
 	struct msm_drm_private *priv = encoder->dev->dev_private;
@@ -112,9 +101,9 @@ static const struct drm_encoder_funcs mdp5_encoder_funcs = {
 	.destroy = mdp5_encoder_destroy,
 };
 
-static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
-		struct drm_display_mode *mode,
-		struct drm_display_mode *adjusted_mode)
+static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
+				      struct drm_display_mode *mode,
+				      struct drm_display_mode *adjusted_mode)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
@@ -221,7 +210,7 @@ static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
 				mdp5_encoder->ctl);
 }
 
-static void mdp5_encoder_disable(struct drm_encoder *encoder)
+static void mdp5_vid_encoder_disable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
@@ -256,7 +245,7 @@ static void mdp5_encoder_disable(struct drm_encoder *encoder)
 	mdp5_encoder->enabled = false;
 }
 
-static void mdp5_encoder_enable(struct drm_encoder *encoder)
+static void mdp5_vid_encoder_enable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
@@ -279,6 +268,41 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder)
 	mdp5_encoder->enabled = true;
 }
 
+static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_interface *intf = &mdp5_encoder->intf;
+
+	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
+		mdp5_cmd_encoder_mode_set(encoder, mode, adjusted_mode);
+	else
+		mdp5_vid_encoder_mode_set(encoder, mode, adjusted_mode);
+}
+
+static void mdp5_encoder_disable(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_interface *intf = &mdp5_encoder->intf;
+
+	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
+		mdp5_cmd_encoder_disable(encoder);
+	else
+		mdp5_vid_encoder_disable(encoder);
+}
+
+static void mdp5_encoder_enable(struct drm_encoder *encoder)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_interface *intf = &mdp5_encoder->intf;
+
+	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
+		mdp5_cmd_encoder_disable(encoder);
+	else
+		mdp5_vid_encoder_enable(encoder);
+}
+
 static const struct drm_encoder_helper_funcs mdp5_encoder_helper_funcs = {
 	.mode_set = mdp5_encoder_mode_set,
 	.disable = mdp5_encoder_disable,
@@ -303,8 +327,8 @@ u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder)
 	return mdp5_read(mdp5_kms, REG_MDP5_INTF_FRAME_COUNT(intf));
 }
 
-int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
-					struct drm_encoder *slave_encoder)
+int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder,
+				       struct drm_encoder *slave_encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_encoder *mdp5_slave_enc = to_mdp5_encoder(slave_encoder);
@@ -342,6 +366,23 @@ int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
 	return 0;
 }
 
+void mdp5_encoder_set_intf_mode(struct drm_encoder *encoder, bool cmd_mode)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_interface *intf = &mdp5_encoder->intf;
+
+	/* TODO: Expand this to set writeback modes too */
+	if (cmd_mode) {
+		WARN_ON(intf->type != INTF_DSI);
+		intf->mode = MDP5_INTF_DSI_MODE_COMMAND;
+	} else {
+		if (intf->type == INTF_DSI)
+			intf->mode = MDP5_INTF_DSI_MODE_VIDEO;
+		else
+			intf->mode = MDP5_INTF_MODE_NONE;
+	}
+}
+
 /* initialize encoder */
 struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 			struct mdp5_interface *intf, struct mdp5_ctl *ctl)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index c396d459a9d0..3eb0749223d9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -148,7 +148,15 @@ static int mdp5_set_split_display(struct msm_kms *kms,
 		return mdp5_cmd_encoder_set_split_display(encoder,
 							slave_encoder);
 	else
-		return mdp5_encoder_set_split_display(encoder, slave_encoder);
+		return mdp5_vid_encoder_set_split_display(encoder,
+							  slave_encoder);
+}
+
+static void mdp5_set_encoder_mode(struct msm_kms *kms,
+				  struct drm_encoder *encoder,
+				  bool cmd_mode)
+{
+	mdp5_encoder_set_intf_mode(encoder, cmd_mode);
 }
 
 static void mdp5_kms_destroy(struct msm_kms *kms)
@@ -230,6 +238,7 @@ static const struct mdp_kms_funcs kms_funcs = {
 		.get_format      = mdp_get_format,
 		.round_pixclk    = mdp5_round_pixclk,
 		.set_split_display = mdp5_set_split_display,
+		.set_encoder_mode = mdp5_set_encoder_mode,
 		.destroy         = mdp5_kms_destroy,
 #ifdef CONFIG_DEBUG_FS
 		.debugfs_init    = mdp5_kms_debugfs_init,
@@ -267,7 +276,7 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms)
 
 static struct drm_encoder *construct_encoder(struct mdp5_kms *mdp5_kms,
 		enum mdp5_intf_type intf_type, int intf_num,
-		enum mdp5_intf_mode intf_mode, struct mdp5_ctl *ctl)
+		struct mdp5_ctl *ctl)
 {
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
@@ -275,21 +284,15 @@ static struct drm_encoder *construct_encoder(struct mdp5_kms *mdp5_kms,
 	struct mdp5_interface intf = {
 			.num	= intf_num,
 			.type	= intf_type,
-			.mode	= intf_mode,
+			.mode	= MDP5_INTF_MODE_NONE,
 	};
 
-	if ((intf_type == INTF_DSI) &&
-		(intf_mode == MDP5_INTF_DSI_MODE_COMMAND))
-		encoder = mdp5_cmd_encoder_init(dev, &intf, ctl);
-	else
-		encoder = mdp5_encoder_init(dev, &intf, ctl);
-
+	encoder = mdp5_encoder_init(dev, &intf, ctl);
 	if (IS_ERR(encoder)) {
 		dev_err(dev->dev, "failed to construct encoder\n");
 		return encoder;
 	}
 
-	encoder->possible_crtcs = (1 << priv->num_crtcs) - 1;
 	priv->encoders[priv->num_encoders++] = encoder;
 
 	return encoder;
@@ -338,8 +341,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 			break;
 		}
 
-		encoder = construct_encoder(mdp5_kms, INTF_eDP, intf_num,
-					MDP5_INTF_MODE_NONE, ctl);
+		encoder = construct_encoder(mdp5_kms, INTF_eDP, intf_num, ctl);
 		if (IS_ERR(encoder)) {
 			ret = PTR_ERR(encoder);
 			break;
@@ -357,8 +359,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 			break;
 		}
 
-		encoder = construct_encoder(mdp5_kms, INTF_HDMI, intf_num,
-					MDP5_INTF_MODE_NONE, ctl);
+		encoder = construct_encoder(mdp5_kms, INTF_HDMI, intf_num, ctl);
 		if (IS_ERR(encoder)) {
 			ret = PTR_ERR(encoder);
 			break;
@@ -369,9 +370,6 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 	case INTF_DSI:
 	{
 		int dsi_id = get_dsi_id_from_intf(hw_cfg, intf_num);
-		struct drm_encoder *dsi_encs[MSM_DSI_ENCODER_NUM];
-		enum mdp5_intf_mode mode;
-		int i;
 
 		if ((dsi_id >= ARRAY_SIZE(priv->dsi)) || (dsi_id < 0)) {
 			dev_err(dev->dev, "failed to find dsi from intf %d\n",
@@ -389,19 +387,13 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 			break;
 		}
 
-		for (i = 0; i < MSM_DSI_ENCODER_NUM; i++) {
-			mode = (i == MSM_DSI_CMD_ENCODER_ID) ?
-				MDP5_INTF_DSI_MODE_COMMAND :
-				MDP5_INTF_DSI_MODE_VIDEO;
-			dsi_encs[i] = construct_encoder(mdp5_kms, INTF_DSI,
-							intf_num, mode, ctl);
-			if (IS_ERR(dsi_encs[i])) {
-				ret = PTR_ERR(dsi_encs[i]);
-				break;
-			}
+		encoder = construct_encoder(mdp5_kms, INTF_DSI, intf_num, ctl);
+		if (IS_ERR(encoder)) {
+			ret = PTR_ERR(encoder);
+			break;
 		}
 
-		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, dsi_encs);
+		ret = msm_dsi_modeset_init(priv->dsi[dsi_id], dev, encoder);
 		break;
 	}
 	default:
@@ -418,20 +410,48 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	const struct mdp5_cfg_hw *hw_cfg;
-	int i, ret;
+	unsigned int num_crtcs;
+	int i, ret, pi = 0, ci = 0;
+	struct drm_plane *primary[MAX_BASES] = { NULL };
+	struct drm_plane *cursor[MAX_BASES] = { NULL };
 
 	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
 
-	/* Construct planes equaling the number of hw pipes, and CRTCs
-	 * for the N layer-mixers (LM).  The first N planes become primary
+	/*
+	 * Construct encoders and modeset initialize connector devices
+	 * for each external display interface.
+	 */
+	for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) {
+		ret = modeset_init_intf(mdp5_kms, i);
+		if (ret)
+			goto fail;
+	}
+
+	/*
+	 * We should ideally have less number of encoders (set up by parsing
+	 * the MDP5 interfaces) than the number of layer mixers present in HW,
+	 * but let's be safe here anyway
+	 */
+	num_crtcs = min(priv->num_encoders, mdp5_cfg->lm.count);
+
+	/*
+	 * Construct planes equaling the number of hw pipes, and CRTCs for the
+	 * N encoders set up by the driver. The first N planes become primary
 	 * planes for the CRTCs, with the remainder as overlay planes:
 	 */
 	for (i = 0; i < mdp5_kms->num_hwpipes; i++) {
-		bool primary = i < mdp5_cfg->lm.count;
+		struct mdp5_hw_pipe *hwpipe = mdp5_kms->hwpipes[i];
 		struct drm_plane *plane;
-		struct drm_crtc *crtc;
+		enum drm_plane_type type;
 
-		plane = mdp5_plane_init(dev, primary);
+		if (i < num_crtcs)
+			type = DRM_PLANE_TYPE_PRIMARY;
+		else if (hwpipe->caps & MDP_PIPE_CAP_CURSOR)
+			type = DRM_PLANE_TYPE_CURSOR;
+		else
+			type = DRM_PLANE_TYPE_OVERLAY;
+
+		plane = mdp5_plane_init(dev, type);
 		if (IS_ERR(plane)) {
 			ret = PTR_ERR(plane);
 			dev_err(dev->dev, "failed to construct plane %d (%d)\n", i, ret);
@@ -439,10 +459,16 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 		}
 		priv->planes[priv->num_planes++] = plane;
 
-		if (!primary)
-			continue;
+		if (type == DRM_PLANE_TYPE_PRIMARY)
+			primary[pi++] = plane;
+		if (type == DRM_PLANE_TYPE_CURSOR)
+			cursor[ci++] = plane;
+	}
+
+	for (i = 0; i < num_crtcs; i++) {
+		struct drm_crtc *crtc;
 
-		crtc  = mdp5_crtc_init(dev, plane, i);
+		crtc  = mdp5_crtc_init(dev, primary[i], cursor[i], i);
 		if (IS_ERR(crtc)) {
 			ret = PTR_ERR(crtc);
 			dev_err(dev->dev, "failed to construct crtc %d (%d)\n", i, ret);
@@ -451,13 +477,14 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 		priv->crtcs[priv->num_crtcs++] = crtc;
 	}
 
-	/* Construct encoders and modeset initialize connector devices
-	 * for each external display interface.
+	/*
+	 * Now that we know the number of crtcs we've created, set the possible
+	 * crtcs for the encoders
 	 */
-	for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) {
-		ret = modeset_init_intf(mdp5_kms, i);
-		if (ret)
-			goto fail;
+	for (i = 0; i < priv->num_encoders; i++) {
+		struct drm_encoder *encoder = priv->encoders[i];
+
+		encoder->possible_crtcs = (1 << priv->num_crtcs) - 1;
 	}
 
 	return 0;
@@ -773,6 +800,9 @@ static int hwpipe_init(struct mdp5_kms *mdp5_kms)
 	static const enum mdp5_pipe dma_planes[] = {
 			SSPP_DMA0, SSPP_DMA1,
 	};
+	static const enum mdp5_pipe cursor_planes[] = {
+			SSPP_CURSOR0, SSPP_CURSOR1,
+	};
 	const struct mdp5_cfg_hw *hw_cfg;
 	int ret;
 
@@ -796,6 +826,13 @@ static int hwpipe_init(struct mdp5_kms *mdp5_kms)
 	if (ret)
 		return ret;
 
+	/* Construct cursor pipes: */
+	ret = construct_pipes(mdp5_kms, hw_cfg->pipe_cursor.count,
+			cursor_planes, hw_cfg->pipe_cursor.base,
+			hw_cfg->pipe_cursor.caps);
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index cdfc63d90c7b..9de471191eba 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -126,6 +126,17 @@ struct mdp5_interface {
 	enum mdp5_intf_mode mode;
 };
 
+struct mdp5_encoder {
+	struct drm_encoder base;
+	struct mdp5_interface intf;
+	spinlock_t intf_lock;	/* protect REG_MDP5_INTF_* registers */
+	bool enabled;
+	uint32_t bsc;
+
+	struct mdp5_ctl *ctl;
+};
+#define to_mdp5_encoder(x) container_of(x, struct mdp5_encoder, base)
+
 static inline void mdp5_write(struct mdp5_kms *mdp5_kms, u32 reg, u32 data)
 {
 	msm_writel(data, mdp5_kms->mmio + reg);
@@ -156,6 +167,7 @@ static inline const char *pipe2name(enum mdp5_pipe pipe)
 		NAME(RGB0), NAME(RGB1), NAME(RGB2),
 		NAME(DMA0), NAME(DMA1),
 		NAME(VIG3), NAME(RGB3),
+		NAME(CURSOR0), NAME(CURSOR1),
 #undef NAME
 	};
 	return names[pipe];
@@ -231,8 +243,10 @@ void mdp5_irq_domain_fini(struct mdp5_kms *mdp5_kms);
 
 uint32_t mdp5_plane_get_flush(struct drm_plane *plane);
 enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane);
-struct drm_plane *mdp5_plane_init(struct drm_device *dev, bool primary);
+struct drm_plane *mdp5_plane_init(struct drm_device *dev,
+				  enum drm_plane_type type);
 
+struct mdp5_ctl *mdp5_crtc_get_ctl(struct drm_crtc *crtc);
 uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc);
 
 int mdp5_crtc_get_lm(struct drm_crtc *crtc);
@@ -240,25 +254,36 @@ void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
 void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc);
 struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
-		struct drm_plane *plane, int id);
+				struct drm_plane *plane,
+				struct drm_plane *cursor_plane, int id);
 
 struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
-int mdp5_encoder_set_split_display(struct drm_encoder *encoder,
-					struct drm_encoder *slave_encoder);
+int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder,
+				       struct drm_encoder *slave_encoder);
+void mdp5_encoder_set_intf_mode(struct drm_encoder *encoder, bool cmd_mode);
 int mdp5_encoder_get_linecount(struct drm_encoder *encoder);
 u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder);
 
 #ifdef CONFIG_DRM_MSM_DSI
-struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
-		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
+void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode);
+void mdp5_cmd_encoder_disable(struct drm_encoder *encoder);
+void mdp5_cmd_encoder_enable(struct drm_encoder *encoder);
 int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
-					struct drm_encoder *slave_encoder);
+				       struct drm_encoder *slave_encoder);
 #else
-static inline struct drm_encoder *mdp5_cmd_encoder_init(struct drm_device *dev,
-		struct mdp5_interface *intf, struct mdp5_ctl *ctl)
+static inline void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
+					     struct drm_display_mode *mode,
+					     struct drm_display_mode *adjusted_mode)
+{
+}
+static inline void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
+{
+}
+static inline void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 {
-	return ERR_PTR(-EINVAL);
 }
 static inline int mdp5_cmd_encoder_set_split_display(
 	struct drm_encoder *encoder, struct drm_encoder *slave_encoder)
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
index 1ae9dc8d260d..35c4dabb0c0c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
@@ -53,6 +53,14 @@ struct mdp5_hw_pipe *mdp5_pipe_assign(struct drm_atomic_state *s,
 		if (caps & ~cur->caps)
 			continue;
 
+		/*
+		 * don't assign a cursor pipe to a plane that isn't going to
+		 * be used as a cursor
+		 */
+		if (cur->caps & MDP_PIPE_CAP_CURSOR &&
+				plane->type != DRM_PLANE_TYPE_CURSOR)
+			continue;
+
 		/* possible candidate, take the one with the
 		 * fewest unneeded caps bits set:
 		 */
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index b9fb111d3428..0ffb8affef35 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -29,6 +29,11 @@ struct mdp5_plane {
 
 static int mdp5_plane_mode_set(struct drm_plane *plane,
 		struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		struct drm_rect *src, struct drm_rect *dest);
+
+static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane,
+		struct drm_crtc *crtc,
+		struct drm_framebuffer *fb,
 		int crtc_x, int crtc_y,
 		unsigned int crtc_w, unsigned int crtc_h,
 		uint32_t src_x, uint32_t src_y,
@@ -45,7 +50,7 @@ static struct mdp5_kms *get_kms(struct drm_plane *plane)
 
 static bool plane_enabled(struct drm_plane_state *state)
 {
-	return state->fb && state->crtc;
+	return state->visible;
 }
 
 static void mdp5_plane_destroy(struct drm_plane *plane)
@@ -246,6 +251,19 @@ static const struct drm_plane_funcs mdp5_plane_funcs = {
 		.atomic_print_state = mdp5_plane_atomic_print_state,
 };
 
+static const struct drm_plane_funcs mdp5_cursor_plane_funcs = {
+		.update_plane = mdp5_update_cursor_plane_legacy,
+		.disable_plane = drm_atomic_helper_disable_plane,
+		.destroy = mdp5_plane_destroy,
+		.set_property = drm_atomic_helper_plane_set_property,
+		.atomic_set_property = mdp5_plane_atomic_set_property,
+		.atomic_get_property = mdp5_plane_atomic_get_property,
+		.reset = mdp5_plane_reset,
+		.atomic_duplicate_state = mdp5_plane_duplicate_state,
+		.atomic_destroy_state = mdp5_plane_destroy_state,
+		.atomic_print_state = mdp5_plane_atomic_print_state,
+};
+
 static int mdp5_plane_prepare_fb(struct drm_plane *plane,
 				 struct drm_plane_state *new_state)
 {
@@ -272,15 +290,20 @@ static void mdp5_plane_cleanup_fb(struct drm_plane *plane,
 	msm_framebuffer_cleanup(fb, mdp5_kms->id);
 }
 
-static int mdp5_plane_atomic_check(struct drm_plane *plane,
-		struct drm_plane_state *state)
+#define FRAC_16_16(mult, div)    (((mult) << 16) / (div))
+static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
+					      struct drm_plane_state *state)
 {
 	struct mdp5_plane_state *mdp5_state = to_mdp5_plane_state(state);
+	struct drm_plane *plane = state->plane;
 	struct drm_plane_state *old_state = plane->state;
 	struct mdp5_cfg *config = mdp5_cfg_get_config(get_kms(plane)->cfg);
 	bool new_hwpipe = false;
 	uint32_t max_width, max_height;
 	uint32_t caps = 0;
+	struct drm_rect clip;
+	int min_scale, max_scale;
+	int ret;
 
 	DBG("%s: check (%d -> %d)", plane->name,
 			plane_enabled(old_state), plane_enabled(state));
@@ -296,6 +319,18 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
 		return -ERANGE;
 	}
 
+	clip.x1 = 0;
+	clip.y1 = 0;
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
+	min_scale = FRAC_16_16(1, 8);
+	max_scale = FRAC_16_16(8, 1);
+
+	ret = drm_plane_helper_check_state(state, &clip, min_scale,
+					   max_scale, true, true);
+	if (ret)
+		return ret;
+
 	if (plane_enabled(state)) {
 		unsigned int rotation;
 		const struct mdp_format *format;
@@ -321,6 +356,9 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
 		if (rotation & DRM_REFLECT_Y)
 			caps |= MDP_PIPE_CAP_VFLIP;
 
+		if (plane->type == DRM_PLANE_TYPE_CURSOR)
+			caps |= MDP_PIPE_CAP_CURSOR;
+
 		/* (re)allocate hw pipe if we don't have one or caps-mismatch: */
 		if (!mdp5_state->hwpipe || (caps & ~mdp5_state->hwpipe->caps))
 			new_hwpipe = true;
@@ -356,6 +394,23 @@ static int mdp5_plane_atomic_check(struct drm_plane *plane,
 	return 0;
 }
 
+static int mdp5_plane_atomic_check(struct drm_plane *plane,
+				   struct drm_plane_state *state)
+{
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+
+	crtc = state->crtc ? state->crtc : plane->state->crtc;
+	if (!crtc)
+		return 0;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(state->state, crtc);
+	if (WARN_ON(!crtc_state))
+		return -EINVAL;
+
+	return mdp5_plane_atomic_check_with_state(crtc_state, state);
+}
+
 static void mdp5_plane_atomic_update(struct drm_plane *plane,
 				     struct drm_plane_state *old_state)
 {
@@ -368,10 +423,7 @@ static void mdp5_plane_atomic_update(struct drm_plane *plane,
 
 		ret = mdp5_plane_mode_set(plane,
 				state->crtc, state->fb,
-				state->crtc_x, state->crtc_y,
-				state->crtc_w, state->crtc_h,
-				state->src_x,  state->src_y,
-				state->src_w, state->src_h);
+				&state->src, &state->dst);
 		/* atomic_check should have ensured that this doesn't fail */
 		WARN_ON(ret < 0);
 	}
@@ -664,10 +716,7 @@ static void mdp5_write_pixel_ext(struct mdp5_kms *mdp5_kms, enum mdp5_pipe pipe,
 
 static int mdp5_plane_mode_set(struct drm_plane *plane,
 		struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		int crtc_x, int crtc_y,
-		unsigned int crtc_w, unsigned int crtc_h,
-		uint32_t src_x, uint32_t src_y,
-		uint32_t src_w, uint32_t src_h)
+		struct drm_rect *src, struct drm_rect *dest)
 {
 	struct drm_plane_state *pstate = plane->state;
 	struct mdp5_hw_pipe *hwpipe = to_mdp5_plane_state(pstate)->hwpipe;
@@ -683,6 +732,10 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	uint32_t pix_format;
 	unsigned int rotation;
 	bool vflip, hflip;
+	int crtc_x, crtc_y;
+	unsigned int crtc_w, crtc_h;
+	uint32_t src_x, src_y;
+	uint32_t src_w, src_h;
 	unsigned long flags;
 	int ret;
 
@@ -695,6 +748,16 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	format = to_mdp_format(msm_framebuffer_format(fb));
 	pix_format = format->base.pixel_format;
 
+	src_x = src->x1;
+	src_y = src->y1;
+	src_w = drm_rect_width(src);
+	src_h = drm_rect_height(src);
+
+	crtc_x = dest->x1;
+	crtc_y = dest->y1;
+	crtc_w = drm_rect_width(dest);
+	crtc_h = drm_rect_height(dest);
+
 	/* src values are in Q16 fixed point, convert to integer: */
 	src_x = src_x >> 16;
 	src_y = src_y >> 16;
@@ -818,12 +881,88 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	return ret;
 }
 
+static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane,
+			struct drm_crtc *crtc, struct drm_framebuffer *fb,
+			int crtc_x, int crtc_y,
+			unsigned int crtc_w, unsigned int crtc_h,
+			uint32_t src_x, uint32_t src_y,
+			uint32_t src_w, uint32_t src_h)
+{
+	struct drm_plane_state *plane_state, *new_plane_state;
+	struct mdp5_plane_state *mdp5_pstate;
+	struct drm_crtc_state *crtc_state = crtc->state;
+	int ret;
+
+	if (!crtc_state->active || drm_atomic_crtc_needs_modeset(crtc_state))
+		goto slow;
+
+	plane_state = plane->state;
+	mdp5_pstate = to_mdp5_plane_state(plane_state);
+
+	/* don't use fast path if we don't have a hwpipe allocated yet */
+	if (!mdp5_pstate->hwpipe)
+		goto slow;
+
+	/* only allow changing of position(crtc x/y or src x/y) in fast path */
+	if (plane_state->crtc != crtc ||
+	    plane_state->src_w != src_w ||
+	    plane_state->src_h != src_h ||
+	    plane_state->crtc_w != crtc_w ||
+	    plane_state->crtc_h != crtc_h ||
+	    !plane_state->fb ||
+	    plane_state->fb != fb)
+		goto slow;
+
+	new_plane_state = mdp5_plane_duplicate_state(plane);
+	if (!new_plane_state)
+		return -ENOMEM;
+
+	new_plane_state->src_x = src_x;
+	new_plane_state->src_y = src_y;
+	new_plane_state->src_w = src_w;
+	new_plane_state->src_h = src_h;
+	new_plane_state->crtc_x = crtc_x;
+	new_plane_state->crtc_y = crtc_y;
+	new_plane_state->crtc_w = crtc_w;
+	new_plane_state->crtc_h = crtc_h;
+
+	ret = mdp5_plane_atomic_check_with_state(crtc_state, new_plane_state);
+	if (ret)
+		goto slow_free;
+
+	if (new_plane_state->visible) {
+		struct mdp5_ctl *ctl;
+
+		ret = mdp5_plane_mode_set(plane, crtc, fb,
+					  &new_plane_state->src,
+					  &new_plane_state->dst);
+		WARN_ON(ret < 0);
+
+		ctl = mdp5_crtc_get_ctl(crtc);
+
+		mdp5_ctl_commit(ctl, mdp5_plane_get_flush(plane));
+	}
+
+	*to_mdp5_plane_state(plane_state) =
+		*to_mdp5_plane_state(new_plane_state);
+
+	mdp5_plane_destroy_state(plane, new_plane_state);
+
+	return 0;
+slow_free:
+	mdp5_plane_destroy_state(plane, new_plane_state);
+slow:
+	return drm_atomic_helper_update_plane(plane, crtc, fb,
+					      crtc_x, crtc_y, crtc_w, crtc_h,
+					      src_x, src_y, src_w, src_h);
+}
+
 enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane)
 {
 	struct mdp5_plane_state *pstate = to_mdp5_plane_state(plane->state);
 
 	if (WARN_ON(!pstate->hwpipe))
-		return 0;
+		return SSPP_NONE;
 
 	return pstate->hwpipe->pipe;
 }
@@ -839,12 +978,12 @@ uint32_t mdp5_plane_get_flush(struct drm_plane *plane)
 }
 
 /* initialize plane */
-struct drm_plane *mdp5_plane_init(struct drm_device *dev, bool primary)
+struct drm_plane *mdp5_plane_init(struct drm_device *dev,
+				  enum drm_plane_type type)
 {
 	struct drm_plane *plane = NULL;
 	struct mdp5_plane *mdp5_plane;
 	int ret;
-	enum drm_plane_type type;
 
 	mdp5_plane = kzalloc(sizeof(*mdp5_plane), GFP_KERNEL);
 	if (!mdp5_plane) {
@@ -857,10 +996,16 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev, bool primary)
 	mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats,
 		ARRAY_SIZE(mdp5_plane->formats), false);
 
-	type = primary ? DRM_PLANE_TYPE_PRIMARY : DRM_PLANE_TYPE_OVERLAY;
-	ret = drm_universal_plane_init(dev, plane, 0xff, &mdp5_plane_funcs,
-				 mdp5_plane->formats, mdp5_plane->nformats,
-				 type, NULL);
+	if (type == DRM_PLANE_TYPE_CURSOR)
+		ret = drm_universal_plane_init(dev, plane, 0xff,
+				&mdp5_cursor_plane_funcs,
+				mdp5_plane->formats, mdp5_plane->nformats,
+				type, NULL);
+	else
+		ret = drm_universal_plane_init(dev, plane, 0xff,
+				&mdp5_plane_funcs,
+				mdp5_plane->formats, mdp5_plane->nformats,
+				type, NULL);
 	if (ret)
 		goto fail;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.h b/drivers/gpu/drm/msm/mdp/mdp_kms.h
index 303130320748..7574cdfef418 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp_kms.h
@@ -112,6 +112,7 @@ const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format);
 #define MDP_PIPE_CAP_CSC			BIT(3)
 #define MDP_PIPE_CAP_DECIMATION			BIT(4)
 #define MDP_PIPE_CAP_SW_PIX_EXT			BIT(5)
+#define MDP_PIPE_CAP_CURSOR			BIT(6)
 
 static inline bool pipe_supports_yuv(uint32_t pipe_caps)
 {
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index 30b5d23e53b4..9633a68b14d7 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -93,11 +93,6 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev,
 		if (!crtc->state->enable)
 			continue;
 
-		/* Legacy cursor ioctls are completely unsynced, and userspace
-		 * relies on that (by doing tons of cursor updates). */
-		if (old_state->legacy_cursor_update)
-			continue;
-
 		kms->funcs->wait_for_crtc_commit_done(kms, crtc);
 	}
 }
@@ -151,20 +146,29 @@ static void commit_worker(struct work_struct *work)
 	complete_commit(container_of(work, struct msm_commit, work), true);
 }
 
+/*
+ * this func is identical to the drm_atomic_helper_check, but we keep this
+ * because we might eventually need to have a more finegrained check
+ * sequence without using the atomic helpers.
+ *
+ * In the past, we first called drm_atomic_helper_check_planes, and then
+ * drm_atomic_helper_check_modeset. We needed this because the MDP5 plane's
+ * ->atomic_check could update ->mode_changed for pixel format changes.
+ * This, however isn't needed now because if there is a pixel format change,
+ * we just assign a new hwpipe for it with a new SMP allocation. We might
+ * eventually hit a condition where we would need to do a full modeset if
+ * we run out of planes. There, we'd probably need to set mode_changed.
+ */
 int msm_atomic_check(struct drm_device *dev,
 		     struct drm_atomic_state *state)
 {
 	int ret;
 
-	/*
-	 * msm ->atomic_check can update ->mode_changed for pixel format
-	 * changes, hence must be run before we check the modeset changes.
-	 */
-	ret = drm_atomic_helper_check_planes(dev, state);
+	ret = drm_atomic_helper_check_modeset(dev, state);
 	if (ret)
 		return ret;
 
-	ret = drm_atomic_helper_check_modeset(dev, state);
+	ret = drm_atomic_helper_check_planes(dev, state);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index e29bb66f55b1..70226eaa5cac 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -91,6 +91,25 @@ module_param(dumpstate, bool, 0600);
  * Util/helpers:
  */
 
+struct clk *msm_clk_get(struct platform_device *pdev, const char *name)
+{
+	struct clk *clk;
+	char name2[32];
+
+	clk = devm_clk_get(&pdev->dev, name);
+	if (!IS_ERR(clk) || PTR_ERR(clk) == -EPROBE_DEFER)
+		return clk;
+
+	snprintf(name2, sizeof(name2), "%s_clk", name);
+
+	clk = devm_clk_get(&pdev->dev, name2);
+	if (!IS_ERR(clk))
+		dev_warn(&pdev->dev, "Using legacy clk name binding.  Use "
+				"\"%s\" instead of \"%s\"\n", name, name2);
+
+	return clk;
+}
+
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 		const char *dbgname)
 {
@@ -985,6 +1004,7 @@ static int add_display_components(struct device *dev,
  * as components.
  */
 static const struct of_device_id msm_gpu_match[] = {
+	{ .compatible = "qcom,adreno" },
 	{ .compatible = "qcom,adreno-3xx" },
 	{ .compatible = "qcom,kgsl-3d0" },
 	{ },
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index ed4dad3ca133..c3b14876edaa 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -206,7 +206,7 @@ void msm_gem_shrinker_cleanup(struct drm_device *dev);
 int msm_gem_mmap_obj(struct drm_gem_object *obj,
 			struct vm_area_struct *vma);
 int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int msm_gem_fault(struct vm_fault *vmf);
 uint64_t msm_gem_mmap_offset(struct drm_gem_object *obj);
 int msm_gem_get_iova_locked(struct drm_gem_object *obj, int id,
 		uint64_t *iova);
@@ -275,16 +275,11 @@ int msm_edp_modeset_init(struct msm_edp *edp, struct drm_device *dev,
 		struct drm_encoder *encoder);
 
 struct msm_dsi;
-enum msm_dsi_encoder_id {
-	MSM_DSI_VIDEO_ENCODER_ID = 0,
-	MSM_DSI_CMD_ENCODER_ID = 1,
-	MSM_DSI_ENCODER_NUM = 2
-};
 #ifdef CONFIG_DRM_MSM_DSI
 void __init msm_dsi_register(void);
 void __exit msm_dsi_unregister(void);
 int msm_dsi_modeset_init(struct msm_dsi *msm_dsi, struct drm_device *dev,
-		struct drm_encoder *encoders[MSM_DSI_ENCODER_NUM]);
+			 struct drm_encoder *encoder);
 #else
 static inline void __init msm_dsi_register(void)
 {
@@ -293,8 +288,8 @@ static inline void __exit msm_dsi_unregister(void)
 {
 }
 static inline int msm_dsi_modeset_init(struct msm_dsi *msm_dsi,
-		struct drm_device *dev,
-		struct drm_encoder *encoders[MSM_DSI_ENCODER_NUM])
+				       struct drm_device *dev,
+				       struct drm_encoder *encoder)
 {
 	return -EINVAL;
 }
@@ -318,6 +313,7 @@ static inline int msm_debugfs_late_init(struct drm_device *dev) { return 0; }
 static inline void msm_rd_dump_submit(struct msm_gem_submit *submit) {}
 #endif
 
+struct clk *msm_clk_get(struct platform_device *pdev, const char *name);
 void __iomem *msm_ioremap(struct platform_device *pdev, const char *name,
 		const char *dbgname);
 void msm_writel(u32 data, void __iomem *addr);
diff --git a/drivers/gpu/drm/msm/msm_fbdev.c b/drivers/gpu/drm/msm/msm_fbdev.c
index f8a587eac6b8..6b1b375653f7 100644
--- a/drivers/gpu/drm/msm/msm_fbdev.c
+++ b/drivers/gpu/drm/msm/msm_fbdev.c
@@ -201,8 +201,7 @@ struct drm_fb_helper *msm_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, helper, &msm_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, helper,
-			priv->num_crtcs, priv->num_connectors);
+	ret = drm_fb_helper_init(dev, helper, priv->num_connectors);
 	if (ret) {
 		dev_err(dev->dev, "could not init fbdev: ret=%d\n", ret);
 		goto fail;
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 8098677a3916..59811f29607d 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -54,8 +54,7 @@ static struct page **get_pages_vram(struct drm_gem_object *obj,
 	if (!p)
 		return ERR_PTR(-ENOMEM);
 
-	ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node,
-			npages, 0, DRM_MM_SEARCH_DEFAULT);
+	ret = drm_mm_insert_node(&priv->vram.mm, msm_obj->vram_node, npages);
 	if (ret) {
 		drm_free_large(p);
 		return ERR_PTR(ret);
@@ -192,8 +191,9 @@ int msm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	return msm_gem_mmap_obj(vma->vm_private_data, vma);
 }
 
-int msm_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int msm_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct drm_device *dev = obj->dev;
 	struct msm_drm_private *priv = dev->dev_private;
@@ -642,7 +642,7 @@ void msm_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 
 	seq_printf(m, "%08x: %c %2d (%2d) %08llx %p\t",
 			msm_obj->flags, is_active(msm_obj) ? 'A' : 'I',
-			obj->name, obj->refcount.refcount.counter,
+			obj->name, kref_read(&obj->refcount),
 			off, msm_obj->vaddr);
 
 	for (id = 0; id < priv->num_aspaces; id++)
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 489676568a10..1172fe7a9252 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -95,13 +95,13 @@ static int submit_lookup_objects(struct msm_gem_submit *submit,
 		 */
 		submit->bos[i].flags = 0;
 
-		ret = copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo));
-		if (unlikely(ret)) {
+		if (copy_from_user_inatomic(&submit_bo, userptr, sizeof(submit_bo))) {
 			pagefault_enable();
 			spin_unlock(&file->table_lock);
-			ret = copy_from_user(&submit_bo, userptr, sizeof(submit_bo));
-			if (ret)
+			if (copy_from_user(&submit_bo, userptr, sizeof(submit_bo))) {
+				ret = -EFAULT;
 				goto out;
+			}
 			spin_lock(&file->table_lock);
 			pagefault_disable();
 		}
@@ -317,9 +317,10 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 		uint64_t iova;
 		bool valid;
 
-		ret = copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc));
-		if (ret)
+		if (copy_from_user(&submit_reloc, userptr, sizeof(submit_reloc))) {
+			ret = -EFAULT;
 			goto out;
+		}
 
 		if (submit_reloc.submit_offset % 4) {
 			DRM_ERROR("non-aligned reloc offset: %u\n",
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index a311d26ccb21..b654eca7636a 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -45,8 +45,7 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace,
 	if (WARN_ON(drm_mm_node_allocated(&vma->node)))
 		return 0;
 
-	ret = drm_mm_insert_node(&aspace->mm, &vma->node, npages,
-			0, DRM_MM_SEARCH_DEFAULT);
+	ret = drm_mm_insert_node(&aspace->mm, &vma->node, npages);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index b28527a65d09..99e05aacbee1 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -560,8 +560,7 @@ static irqreturn_t irq_handler(int irq, void *data)
 }
 
 static const char *clk_names[] = {
-		"core_clk", "iface_clk", "rbbmtimer_clk", "mem_clk",
-		"mem_iface_clk", "alt_mem_iface_clk",
+	"core", "iface", "rbbmtimer", "mem", "mem_iface", "alt_mem_iface",
 };
 
 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
@@ -625,13 +624,13 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 
 	/* Acquire clocks: */
 	for (i = 0; i < ARRAY_SIZE(clk_names); i++) {
-		gpu->grp_clks[i] = devm_clk_get(&pdev->dev, clk_names[i]);
+		gpu->grp_clks[i] = msm_clk_get(pdev, clk_names[i]);
 		DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]);
 		if (IS_ERR(gpu->grp_clks[i]))
 			gpu->grp_clks[i] = NULL;
 	}
 
-	gpu->ebi1_clk = devm_clk_get(&pdev->dev, "bus_clk");
+	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
 	DBG("ebi1_clk: %p", gpu->ebi1_clk);
 	if (IS_ERR(gpu->ebi1_clk))
 		gpu->ebi1_clk = NULL;
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 61aaaa1de6eb..7f5779daf5c8 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -24,9 +24,12 @@ struct msm_iommu {
 };
 #define to_msm_iommu(x) container_of(x, struct msm_iommu, base)
 
-static int msm_fault_handler(struct iommu_domain *iommu, struct device *dev,
+static int msm_fault_handler(struct iommu_domain *domain, struct device *dev,
 		unsigned long iova, int flags, void *arg)
 {
+	struct msm_iommu *iommu = arg;
+	if (iommu->base.handler)
+		return iommu->base.handler(iommu->base.arg, iova, flags);
 	pr_warn_ratelimited("*** fault: iova=%08lx, flags=%d\n", iova, flags);
 	return 0;
 }
@@ -136,7 +139,7 @@ struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain)
 
 	iommu->domain = domain;
 	msm_mmu_init(&iommu->base, dev, &funcs);
-	iommu_set_fault_handler(domain, msm_fault_handler, dev);
+	iommu_set_fault_handler(domain, msm_fault_handler, iommu);
 
 	return &iommu->base;
 }
diff --git a/drivers/gpu/drm/msm/msm_kms.h b/drivers/gpu/drm/msm/msm_kms.h
index e470f4cf8f76..117635d2b8c5 100644
--- a/drivers/gpu/drm/msm/msm_kms.h
+++ b/drivers/gpu/drm/msm/msm_kms.h
@@ -56,6 +56,9 @@ struct msm_kms_funcs {
 			struct drm_encoder *encoder,
 			struct drm_encoder *slave_encoder,
 			bool is_cmd_mode);
+	void (*set_encoder_mode)(struct msm_kms *kms,
+				 struct drm_encoder *encoder,
+				 bool cmd_mode);
 	/* cleanup: */
 	void (*destroy)(struct msm_kms *kms);
 #ifdef CONFIG_DEBUG_FS
diff --git a/drivers/gpu/drm/msm/msm_mmu.h b/drivers/gpu/drm/msm/msm_mmu.h
index f85c879e68d2..aa2c5d4580c8 100644
--- a/drivers/gpu/drm/msm/msm_mmu.h
+++ b/drivers/gpu/drm/msm/msm_mmu.h
@@ -33,6 +33,8 @@ struct msm_mmu_funcs {
 struct msm_mmu {
 	const struct msm_mmu_funcs *funcs;
 	struct device *dev;
+	int (*handler)(void *arg, unsigned long iova, int flags);
+	void *arg;
 };
 
 static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
@@ -45,4 +47,11 @@ static inline void msm_mmu_init(struct msm_mmu *mmu, struct device *dev,
 struct msm_mmu *msm_iommu_new(struct device *dev, struct iommu_domain *domain);
 struct msm_mmu *msm_gpummu_new(struct device *dev, struct msm_gpu *gpu);
 
+static inline void msm_mmu_set_fault_handler(struct msm_mmu *mmu, void *arg,
+		int (*handler)(void *arg, unsigned long iova, int flags))
+{
+	mmu->arg = arg;
+	mmu->handler = handler;
+}
+
 #endif /* __MSM_MMU_H__ */
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_drv.c b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
index 955441f71500..cdfbe0284635 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_drv.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_drv.c
@@ -218,7 +218,7 @@ static int mxsfb_load(struct drm_device *drm, unsigned long flags)
 
 	drm_kms_helper_poll_init(drm);
 
-	mxsfb->fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+	mxsfb->fbdev = drm_fbdev_cma_init(drm, 32,
 					  drm->mode_config.num_connector);
 	if (IS_ERR(mxsfb->fbdev)) {
 		mxsfb->fbdev = NULL;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/arb.c b/drivers/gpu/drm/nouveau/dispnv04/arb.c
index a555681c3096..90075b676256 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/arb.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/arb.c
@@ -198,7 +198,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		int *burst, int *lwm)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nv_fifo_info fifo_data;
 	struct nv_sim_state sim_data;
 	int MClk = nouveau_hw_get_clock(dev, PLL_MEMORY);
@@ -227,7 +227,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		sim_data.mem_page_miss = ((cfg1 >> 4) & 0xf) + ((cfg1 >> 31) & 0x1);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		nv04_calc_arb(&fifo_data, &sim_data);
 	else
 		nv10_calc_arb(&fifo_data, &sim_data);
@@ -254,7 +254,7 @@ nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN)
 		nv04_update_arb(dev, vclk, bpp, burst, lwm);
 	else if ((dev->pdev->device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
 		 (dev->pdev->device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index a72754d73c84..ab7b69c11d40 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -113,8 +113,8 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 {
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv04_mode_state *state = &nv04_display(dev)->mode_reg;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[nv_crtc->index];
@@ -138,7 +138,7 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	 * has yet been observed in allowing the use a single stage pll on all
 	 * nv43 however.  the behaviour of single stage use is untested on nv40
 	 */
-	if (drm->device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
+	if (drm->client.device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
 		memset(&pll_lim.vco2, 0, sizeof(pll_lim.vco2));
 
 
@@ -148,10 +148,10 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	state->pllsel &= PLLSEL_VPLL1_MASK | PLLSEL_VPLL2_MASK | PLLSEL_TV_MASK;
 
 	/* The blob uses this always, so let's do the same */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_USE_VPLL2_TRUE;
 	/* again nv40 and some nv43 act more like nv3x as described above */
-	if (drm->device.info.chipset < 0x41)
+	if (drm->client.device.info.chipset < 0x41)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_MPLL |
 				 NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_NVPLL;
 	state->pllsel |= nv_crtc->index ? PLLSEL_VPLL2_MASK : PLLSEL_VPLL1_MASK;
@@ -270,7 +270,7 @@ nv_crtc_mode_set_vga(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		horizEnd = horizTotal - 2;
 		horizBlankEnd = horizTotal + 4;
 #if 0
-		if (dev->overlayAdaptor && drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+		if (dev->overlayAdaptor && drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 			/* This reportedly works around some video overlay bandwidth problems */
 			horizTotal += 2;
 #endif
@@ -505,7 +505,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	regp->cursor_cfg = NV_PCRTC_CURSOR_CONFIG_CUR_LINES_64 |
 			     NV_PCRTC_CURSOR_CONFIG_CUR_PIXELS_64 |
 			     NV_PCRTC_CURSOR_CONFIG_ADDRESS_SPACE_PNVM;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_CUR_BPP_32;
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_DOUBLE_SCAN_ENABLE;
@@ -546,26 +546,26 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	 * 1 << 30 on 0x60.830), for no apparent reason */
 	regp->CRTC[NV_CIO_CRE_59] = off_chip_digital;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->CRTC[0x9f] = off_chip_digital ? 0x11 : 0x1;
 
 	regp->crtc_830 = mode->crtc_vdisplay - 3;
 	regp->crtc_834 = mode->crtc_vdisplay - 1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		/* This is what the blob does */
 		regp->crtc_850 = NVReadCRTC(dev, 0, NV_PCRTC_850);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->gpio_ext = NVReadCRTC(dev, 0, NV_PCRTC_GPIO_EXT);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->crtc_cfg = NV10_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 	else
 		regp->crtc_cfg = NV04_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 
 	/* Some misc regs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->CRTC[NV_CIO_CRE_85] = 0xFF;
 		regp->CRTC[NV_CIO_CRE_86] = 0x1;
 	}
@@ -577,7 +577,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 
 	/* Generic PRAMDAC regs */
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		/* Only bit that bios and blob set. */
 		regp->nv10_cursync = (1 << 25);
 
@@ -586,7 +586,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 				NV_PRAMDAC_GENERAL_CONTROL_PIXMIX_ON;
 	if (fb->format->depth == 16)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_PIPE_LONG;
 
 	regp->ramdac_630 = 0; /* turn off green mode (tv test pattern?) */
@@ -649,7 +649,7 @@ nv_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
 
 	nv_crtc_mode_set_vga(crtc, adjusted_mode);
 	/* calculated in nv04_dfp_prepare, nv40 needs it written before calculating PLLs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, nv04_display(dev)->mode_reg.sel_clk);
 	nv_crtc_mode_set_regs(crtc, adjusted_mode);
 	nv_crtc_calc_state_ext(crtc, mode, adjusted_mode->clock);
@@ -710,7 +710,7 @@ static void nv_crtc_prepare(struct drm_crtc *crtc)
 
 	/* Some more preparation. */
 	NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_CONFIG, NV_PCRTC_CONFIG_START_ADDRESS_NON_VGA);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		uint32_t reg900 = NVReadRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900);
 		NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900, reg900 & ~0x10000);
 	}
@@ -886,7 +886,7 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
 		regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
 		crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
 	}
@@ -967,7 +967,7 @@ static void nv11_cursor_upload(struct drm_device *dev, struct nouveau_bo *src,
 		{
 			struct nouveau_drm *drm = nouveau_drm(dev);
 
-			if (drm->device.info.chipset == 0x11) {
+			if (drm->client.device.info.chipset == 0x11) {
 				pixel = ((pixel & 0x000000ff) << 24) |
 					((pixel & 0x0000ff00) << 8) |
 					((pixel & 0x00ff0000) >> 8) |
@@ -1008,7 +1008,7 @@ nv04_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	if (ret)
 		goto out;
 
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		nv11_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
 	else
 		nv04_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
@@ -1124,8 +1124,9 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 	drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs);
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
-	ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, NULL, &nv_crtc->cursor.nvbo);
+	ret = nouveau_bo_new(&nouveau_drm(dev)->client, 64*64*4, 0x100,
+			     TTM_PL_FLAG_VRAM, 0, 0x0000, NULL, NULL,
+			     &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, false);
 		if (!ret) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/cursor.c b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
index c83116a308a4..f26e44ea7389 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/cursor.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
@@ -55,7 +55,7 @@ nv04_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, nv_crtc->index);
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index b6cc7766e6f7..4feab0a5419d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c
@@ -66,7 +66,7 @@ int nv04_dac_output_offset(struct drm_encoder *encoder)
 static int sample_load_twice(struct drm_device *dev, bool sense[2])
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int i;
 
 	for (i = 0; i < 2; i++) {
@@ -80,19 +80,19 @@ static int sample_load_twice(struct drm_device *dev, bool sense[2])
 		 * use a 10ms timeout (guards against crtc being inactive, in
 		 * which case blank state would never change)
 		 */
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
@@ -133,7 +133,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
 						 struct drm_connector *connector)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode;
 	uint8_t saved_palette0[3], saved_palette_mask;
@@ -236,8 +236,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	uint32_t sample, testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t saved_powerctrl_2 = 0, saved_powerctrl_4 = 0, saved_routput,
@@ -288,7 +288,7 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 	/* nv driver and nv31 use 0xfffffeee, nv34 and 6600 use 0xfffffece */
 	routput = (saved_routput & 0xfffffece) | head << 8;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
 		if (dcb->type == DCB_OUTPUT_TV)
 			routput |= 0x1a << 16;
 		else
@@ -403,7 +403,7 @@ static void nv04_dac_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index 2e5bb2afda7c..9805d2cdc1a1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
@@ -281,7 +281,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			      struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nv04_crtc_reg *regp = &nv04_display(dev)->mode_reg.crtc_reg[nv_crtc->index];
@@ -417,7 +417,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 	if ((nv_connector->dithering_mode == DITHERING_MODE_ON) ||
 	    (nv_connector->dithering_mode == DITHERING_MODE_AUTO &&
 	     fb->format->depth > connector->display_info.bpc * 3)) {
-		if (drm->device.info.chipset == 0x11)
+		if (drm->client.device.info.chipset == 0x11)
 			regp->dither = savep->dither | 0x00010000;
 		else {
 			int i;
@@ -428,7 +428,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			}
 		}
 	} else {
-		if (drm->device.info.chipset != 0x11) {
+		if (drm->client.device.info.chipset != 0x11) {
 			/* reset them */
 			int i;
 			for (i = 0; i < 3; i++) {
@@ -464,7 +464,7 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
 		NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL);
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
@@ -486,7 +486,7 @@ static void nv04_dfp_update_backlight(struct drm_encoder *encoder, int mode)
 {
 #ifdef __powerpc__
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 
 	/* BIOS scripts usually take care of the backlight, thanks
 	 * Apple for your consistency.
@@ -624,7 +624,7 @@ static void nv04_tmds_slave_init(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_PRI);
 	struct nvkm_i2c_bus_probe info[] = {
 		{
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 34c0f2f67548..5b9d549aa791 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -35,7 +35,7 @@ int
 nv04_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *ct;
 	struct drm_encoder *encoder;
@@ -48,7 +48,7 @@ nv04_display_create(struct drm_device *dev)
 	if (!disp)
 		return -ENOMEM;
 
-	nvif_object_map(&drm->device.object);
+	nvif_object_map(&drm->client.device.object);
 
 	nouveau_display(dev)->priv = disp;
 	nouveau_display(dev)->dtor = nv04_display_destroy;
@@ -139,7 +139,7 @@ nv04_display_destroy(struct drm_device *dev)
 	nouveau_display(dev)->priv = NULL;
 	kfree(disp);
 
-	nvif_object_unmap(&drm->device.object);
+	nvif_object_unmap(&drm->client.device.object);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index 7030307d2d48..bea4543554ba 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -129,7 +129,7 @@ nv_two_heads(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
 	    impl != 0x0150 && impl != 0x01a0 && impl != 0x0200)
 		return true;
 
@@ -148,7 +148,7 @@ nv_two_reg_pll(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (impl == 0x0310 || impl == 0x0340 || drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE)
+	if (impl == 0x0310 || impl == 0x0340 || drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE)
 		return true;
 	return false;
 }
@@ -170,7 +170,7 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table,
 			    struct dcb_output *outp, int crtc)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios_init init = {
 		.subdev = &bios->subdev,
 		.bios = bios,
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c
index 74856a8b8f35..b98599002831 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c
@@ -89,7 +89,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	if (owner == 1)
 		owner *= 3;
 
-	if (drm->device.info.chipset == 0x11) {
+	if (drm->client.device.info.chipset == 0x11) {
 		/* This might seem stupid, but the blob does it and
 		 * omitting it often locks the system up.
 		 */
@@ -100,7 +100,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	/* CR44 is always changed on CRTC0 */
 	NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_44, owner);
 
-	if (drm->device.info.chipset == 0x11) {	/* set me harder */
+	if (drm->client.device.info.chipset == 0x11) {	/* set me harder */
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 	}
@@ -149,7 +149,7 @@ nouveau_hw_decode_pll(struct drm_device *dev, uint32_t reg1, uint32_t pll1,
 		pllvals->NM1 = pll1 & 0xffff;
 		if (nv_two_reg_pll(dev) && pll2 & NV31_RAMDAC_ENABLE_VCO2)
 			pllvals->NM2 = pll2 & 0xffff;
-		else if (drm->device.info.chipset == 0x30 || drm->device.info.chipset == 0x35) {
+		else if (drm->client.device.info.chipset == 0x30 || drm->client.device.info.chipset == 0x35) {
 			pllvals->M1 &= 0xf; /* only 4 bits */
 			if (pll1 & NV30_RAMDAC_ENABLE_VCO2) {
 				pllvals->M2 = (pll1 >> 4) & 0x7;
@@ -165,8 +165,8 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		       struct nvkm_pll_vals *pllvals)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvif_object *device = &drm->client.device.object;
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	uint32_t reg1, pll1, pll2 = 0;
 	struct nvbios_pll pll_lim;
 	int ret;
@@ -184,7 +184,7 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		pll2 = nvif_rd32(device, reg2);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
 		uint32_t ramdac580 = NVReadRAMDAC(dev, 0, NV_PRAMDAC_580);
 
 		/* check whether vpll has been forced into single stage mode */
@@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t mpllP;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP);
+		mpllP = (mpllP >> 8) & 0xf;
 		if (!mpllP)
 			mpllP = 4;
 
@@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t clock;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock);
-		return clock;
+		return clock / 1000;
 	}
 
 	ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals);
@@ -252,7 +253,7 @@ nouveau_hw_fix_bad_vpll(struct drm_device *dev, int head)
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_clk *clk = nvxx_clk(device);
 	struct nvkm_bios *bios = nvxx_bios(device);
 	struct nvbios_pll pll_lim;
@@ -391,21 +392,21 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->nv10_cursync = NVReadRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC);
 
 	nouveau_hw_get_pllvals(dev, head ? PLL_VPLL1 : PLL_VPLL0, &regp->pllvals);
 	state->pllsel = NVReadRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT);
 	if (nv_two_heads(dev))
 		state->sel_clk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		regp->dither = NVReadRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11);
 
 	regp->ramdac_gen_ctrl = NVReadRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL);
 
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_630 = NVReadRAMDAC(dev, head, NV_PRAMDAC_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		regp->ramdac_634 = NVReadRAMDAC(dev, head, NV_PRAMDAC_634);
 
 	regp->tv_setup = NVReadRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP);
@@ -447,7 +448,7 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_8c0 = NVReadRAMDAC(dev, head, NV_PRAMDAC_8C0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->ramdac_a20 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A20);
 		regp->ramdac_a24 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A24);
 		regp->ramdac_a34 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A34);
@@ -463,26 +464,26 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 		     struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t pllreg = head ? NV_RAMDAC_VPLL2 : NV_PRAMDAC_VPLL_COEFF;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC, regp->nv10_cursync);
 
 	clk->pll_prog(clk, pllreg, &regp->pllvals);
 	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT, state->pllsel);
 	if (nv_two_heads(dev))
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, state->sel_clk);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11, regp->dither);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL, regp->ramdac_gen_ctrl);
 
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_630, regp->ramdac_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_634, regp->ramdac_634);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP, regp->tv_setup);
@@ -519,7 +520,7 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_8C0, regp->ramdac_8c0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A20, regp->ramdac_a20);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A24, regp->ramdac_a24);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A34, regp->ramdac_a34);
@@ -600,10 +601,10 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_21);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		rd_cio_state(dev, head, regp, 0x9f);
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_49);
@@ -612,14 +613,14 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		regp->crtc_830 = NVReadCRTC(dev, head, NV_PCRTC_830);
 		regp->crtc_834 = NVReadCRTC(dev, head, NV_PCRTC_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			regp->gpio_ext = NVReadCRTC(dev, head, NV_PCRTC_GPIO_EXT);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 			regp->crtc_850 = NVReadCRTC(dev, head, NV_PCRTC_850);
 
 		if (nv_two_heads(dev))
@@ -631,7 +632,7 @@ nv_save_state_ext(struct drm_device *dev, int head,
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -660,12 +661,12 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		  struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t reg900;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		if (nv_two_heads(dev))
 			/* setting ENGINE_CTRL (EC) *must* come before
 			 * CIO_CRE_LCD, as writing CRE_LCD sets bits 16 & 17 in
@@ -677,20 +678,20 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		nvif_wr32(device, NV_PVIDEO_INTR_EN, 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(0), 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(1), 0);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->client.device.info.ram_size - 1);
 		nvif_wr32(device, NV_PBUS_POWERCTRL_2, 0);
 
 		NVWriteCRTC(dev, head, NV_PCRTC_CURSOR_CONFIG, regp->cursor_cfg);
 		NVWriteCRTC(dev, head, NV_PCRTC_830, regp->crtc_830);
 		NVWriteCRTC(dev, head, NV_PCRTC_834, regp->crtc_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			NVWriteCRTC(dev, head, NV_PCRTC_GPIO_EXT, regp->gpio_ext);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 			NVWriteCRTC(dev, head, NV_PCRTC_850, regp->crtc_850);
 
 			reg900 = NVReadRAMDAC(dev, head, NV_PRAMDAC_900);
@@ -713,23 +714,23 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		wr_cio_state(dev, head, regp, 0x9f);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_49);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -737,14 +738,14 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	}
 	/* NV11 and NV20 stop at 0x52. */
 	if (nv_gf4_disp_arch(dev)) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
 			/* Not waiting for vertical retrace before modifying
 			   CRE_53/CRE_54 causes lockups. */
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
@@ -770,7 +771,7 @@ static void
 nv_save_state_palette(struct drm_device *dev, int head,
 		      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -789,7 +790,7 @@ void
 nouveau_hw_load_state_palette(struct drm_device *dev, int head,
 			      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -809,7 +810,7 @@ void nouveau_hw_save_state(struct drm_device *dev, int head,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		/* NB: no attempt is made to restore the bad pll later on */
 		nouveau_hw_fix_bad_vpll(dev, head);
 	nv_save_state_ramdac(dev, head, state);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.h b/drivers/gpu/drm/nouveau/dispnv04/hw.h
index 3bded60c5596..3a2be47fb4f1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.h
@@ -60,7 +60,7 @@ extern void nouveau_calc_arb(struct drm_device *, int vclk, int bpp,
 static inline uint32_t NVReadCRTC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
@@ -71,7 +71,7 @@ static inline uint32_t NVReadCRTC(struct drm_device *dev,
 static inline void NVWriteCRTC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -80,7 +80,7 @@ static inline void NVWriteCRTC(struct drm_device *dev,
 static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
@@ -91,7 +91,7 @@ static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 static inline void NVWriteRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -120,7 +120,7 @@ static inline void nv_write_tmds(struct drm_device *dev,
 static inline void NVWriteVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	nvif_wr08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE, value);
 }
@@ -128,7 +128,7 @@ static inline void NVWriteVgaCrtc(struct drm_device *dev,
 static inline uint8_t NVReadVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	val = nvif_rd08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE);
@@ -165,13 +165,13 @@ static inline uint8_t NVReadVgaCrtc5758(struct drm_device *dev, int head, uint8_
 static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t val;
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	val = nvif_rd08(device, reg);
@@ -181,12 +181,12 @@ static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 static inline void NVWritePRMVIO(struct drm_device *dev,
 					int head, uint32_t reg, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	nvif_wr08(device, reg, value);
@@ -194,14 +194,14 @@ static inline void NVWritePRMVIO(struct drm_device *dev,
 
 static inline void NVSetEnablePalette(struct drm_device *dev, int head, bool enable)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	nvif_wr08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE, enable ? 0 : 0x20);
 }
 
 static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	return !(nvif_rd08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE) & 0x20);
 }
@@ -209,7 +209,7 @@ static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 static inline void NVWriteVgaAttr(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
 	else
@@ -223,7 +223,7 @@ static inline void NVWriteVgaAttr(struct drm_device *dev,
 static inline uint8_t NVReadVgaAttr(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
@@ -259,10 +259,10 @@ static inline void NVVgaProtect(struct drm_device *dev, int head, bool protect)
 static inline bool
 nv_heads_tied(struct drm_device *dev)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		return !!(nvif_rd32(device, NV_PBUS_DEBUG_1) & (1 << 28));
 
 	return NVReadVgaCrtc(dev, 0, NV_CIO_CRE_44) & 0x4;
@@ -318,7 +318,7 @@ NVLockVgaCrtcs(struct drm_device *dev, bool lock)
 	NVWriteVgaCrtc(dev, 0, NV_CIO_SR_LOCK_INDEX,
 		       lock ? NV_CIO_SR_LOCK_VALUE : NV_CIO_SR_UNLOCK_RW_VALUE);
 	/* NV11 has independently lockable extended crtcs, except when tied */
-	if (drm->device.info.chipset == 0x11 && !nv_heads_tied(dev))
+	if (drm->client.device.info.chipset == 0x11 && !nv_heads_tied(dev))
 		NVWriteVgaCrtc(dev, 1, NV_CIO_SR_LOCK_INDEX,
 			       lock ? NV_CIO_SR_LOCK_VALUE :
 				      NV_CIO_SR_UNLOCK_RW_VALUE);
@@ -335,7 +335,7 @@ static inline int nv_cursor_width(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	return drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
+	return drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
 }
 
 static inline void
@@ -357,7 +357,7 @@ nv_set_crtc_base(struct drm_device *dev, int head, uint32_t offset)
 
 	NVWriteCRTC(dev, head, NV_PCRTC_START, offset);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT) {
 		/*
 		 * Hilarious, the 24th bit doesn't want to stick to
 		 * PCRTC_START...
@@ -382,7 +382,7 @@ nv_show_cursor(struct drm_device *dev, int head, bool show)
 		*curctl1 &= ~MASK(NV_CIO_CRE_HCUR_ADDR1_ENABLE);
 	NVWriteVgaCrtc(dev, head, NV_CIO_CRE_HCUR_ADDR1_INDEX, *curctl1);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 }
 
@@ -398,7 +398,7 @@ nv_pitch_align(struct drm_device *dev, uint32_t width, int bpp)
 		bpp = 8;
 
 	/* Alignment requirements taken from the Haiku driver */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		mask = 128 / bpp - 1;
 	else
 		mask = 512 / bpp - 1;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/overlay.c b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
index 6275c270df25..5319f2a7f24d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/overlay.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
@@ -97,7 +97,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_w, uint32_t src_h)
 {
 	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-	struct nvif_object *dev = &drm->device.object;
+	struct nvif_object *dev = &drm->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -119,7 +119,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (format > 0xffff)
 		return -ERANGE;
 
-	if (drm->device.info.chipset >= 0x30) {
+	if (drm->client.device.info.chipset >= 0x30) {
 		if (crtc_w < (src_w >> 1) || crtc_h < (src_h >> 1))
 			return -ERANGE;
 	} else {
@@ -174,7 +174,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv10_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -198,7 +198,7 @@ nv_destroy_plane(struct drm_plane *plane)
 static void
 nv10_set_params(struct nouveau_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->client.device.object;
 	u32 luma = (plane->brightness - 512) << 16 | plane->contrast;
 	u32 chroma = ((sin_mul(plane->hue, plane->saturation) & 0xffff) << 16) |
 		(cos_mul(plane->hue, plane->saturation) & 0xffff);
@@ -268,7 +268,7 @@ nv10_overlay_init(struct drm_device *device)
 	if (!plane)
 		return;
 
-	switch (drm->device.info.chipset) {
+	switch (drm->client.device.info.chipset) {
 	case 0x10:
 	case 0x11:
 	case 0x15:
@@ -347,7 +347,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_x, uint32_t src_y,
 		  uint32_t src_w, uint32_t src_h)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -427,7 +427,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv04_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -495,7 +495,7 @@ err:
 void
 nouveau_overlay_init(struct drm_device *device)
 {
-	struct nvif_device *dev = &nouveau_drm(device)->device;
+	struct nvif_device *dev = &nouveau_drm(device)->client.device;
 	if (dev->info.chipset < 0x10)
 		nv04_overlay_init(device);
 	else if (dev->info.chipset <= 0x40)
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 477a8d072af4..01664357d3e1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
@@ -54,7 +54,7 @@ static struct nvkm_i2c_bus_probe nv04_tv_encoder_info[] = {
 int nv04_tv_identify(struct drm_device *dev, int i2c_index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, i2c_index);
 	if (bus) {
 		return nvkm_i2c_bus_probe(bus, "TV encoder",
@@ -206,7 +206,7 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	struct drm_encoder *encoder;
 	struct drm_device *dev = connector->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, entry->i2c_index);
 	int type, ret;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index 434d1e29f279..6d99f11fee4e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -46,7 +46,7 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	uint32_t testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t gpio0, gpio1, fp_htotal, fp_hsync_start, fp_hsync_end,
 		fp_control, test_ctrl, dacclk, ctv_14, ctv_1c, ctv_6c;
@@ -130,7 +130,7 @@ static bool
 get_tv_detect_quirks(struct drm_device *dev, uint32_t *pin_mask)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	if (device->quirk && device->quirk->tv_pin_mask) {
 		*pin_mask = device->quirk->tv_pin_mask;
@@ -154,8 +154,8 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 		return connector_status_disconnected;
 
 	if (reliable) {
-		if (drm->device.info.chipset == 0x42 ||
-		    drm->device.info.chipset == 0x43)
+		if (drm->client.device.info.chipset == 0x42 ||
+		    drm->client.device.info.chipset == 0x43)
 			tv_enc->pin_mask =
 				nv42_tv_sample_load(encoder) >> 28 & 0xe;
 		else
@@ -362,7 +362,7 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nv17_tv_state *regs = &to_tv_enc(encoder)->state;
 	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
 
@@ -435,7 +435,7 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
 	/* Set the DACCLK register */
 	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		dacclk |= 0x1a << 16;
 
 	if (tv_norm->kind == CTV_ENC_MODE) {
@@ -492,7 +492,7 @@ static void nv17_tv_mode_set(struct drm_encoder *encoder,
 			tv_regs->ptv_614 = 0x13;
 		}
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
 			tv_regs->ptv_500 = 0xe8e0;
 			tv_regs->ptv_504 = 0x1710;
 			tv_regs->ptv_604 = 0x0;
@@ -587,7 +587,7 @@ static void nv17_tv_commit(struct drm_encoder *encoder)
 	nv17_tv_state_load(dev, &to_tv_enc(encoder)->state);
 
 	/* This could use refinement for flatpanels, but it should work */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL +
 					nv04_dac_output_offset(encoder),
 					0xf0000000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
index 1b07521cde0d..29773b325bd9 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
@@ -130,13 +130,13 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder);
 static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg,
 				uint32_t val)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	nvif_wr32(&device->object, reg, val);
 }
 
 static inline uint32_t nv_read_ptv(struct drm_device *dev, uint32_t reg)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	return nvif_rd32(&device->object, reg);
 }
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
index 05e6ef7cd190..91e33db21a2f 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
@@ -10,5 +10,5 @@ struct g82_channel_dma_v0 {
 	__u64 offset;
 };
 
-#define G82_CHANNEL_DMA_V0_NTFY_UEVENT                                     0x00
+#define NV826E_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
index cecafcb1e954..e34efd4ec537 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
@@ -11,5 +11,5 @@ struct g82_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define G82_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                  0x00
+#define NV826F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
index 2caf0838fcfd..a2d5410a491b 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
@@ -10,5 +10,6 @@ struct fermi_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define FERMI_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                0x00
+#define NV906F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NV906F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index 46301ec018ce..2efa3d048bb9 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -25,5 +25,6 @@ struct kepler_channel_gpfifo_a_v0 {
 	__u64 vm;
 };
 
-#define NVA06F_V0_NTFY_UEVENT                                              0x00
+#define NVA06F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NVA06F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 82235f30277c..3a2c0137d4b4 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -2,23 +2,31 @@
 #define __NVIF_CLASS_H__
 
 /* these class numbers are made up by us, and not nvidia-assigned */
-#define NVIF_CLASS_CONTROL                                    /* if0001.h */ -1
-#define NVIF_CLASS_PERFMON                                    /* if0002.h */ -2
-#define NVIF_CLASS_PERFDOM                                    /* if0003.h */ -3
-#define NVIF_CLASS_SW_NV04                                    /* if0004.h */ -4
-#define NVIF_CLASS_SW_NV10                                    /* if0005.h */ -5
-#define NVIF_CLASS_SW_NV50                                    /* if0005.h */ -6
-#define NVIF_CLASS_SW_GF100                                   /* if0005.h */ -7
+#define NVIF_CLASS_CLIENT                            /* if0000.h */ -0x00000000
+
+#define NVIF_CLASS_CONTROL                           /* if0001.h */ -0x00000001
+
+#define NVIF_CLASS_PERFMON                           /* if0002.h */ -0x00000002
+#define NVIF_CLASS_PERFDOM                           /* if0003.h */ -0x00000003
+
+#define NVIF_CLASS_SW_NV04                           /* if0004.h */ -0x00000004
+#define NVIF_CLASS_SW_NV10                           /* if0005.h */ -0x00000005
+#define NVIF_CLASS_SW_NV50                           /* if0005.h */ -0x00000006
+#define NVIF_CLASS_SW_GF100                          /* if0005.h */ -0x00000007
 
 /* the below match nvidia-assigned (either in hw, or sw) class numbers */
+#define NV_NULL_CLASS                                                0x00000030
+
 #define NV_DEVICE                                     /* cl0080.h */ 0x00000080
 
 #define NV_DMA_FROM_MEMORY                            /* cl0002.h */ 0x00000002
 #define NV_DMA_TO_MEMORY                              /* cl0002.h */ 0x00000003
 #define NV_DMA_IN_MEMORY                              /* cl0002.h */ 0x0000003d
 
+#define NV50_TWOD                                                    0x0000502d
 #define FERMI_TWOD_A                                                 0x0000902d
 
+#define NV50_MEMORY_TO_MEMORY_FORMAT                                 0x00005039
 #define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x00009039
 
 #define KEPLER_INLINE_TO_MEMORY_A                                    0x0000a040
@@ -99,6 +107,12 @@
 #define GF110_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000907e
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
+#define NV50_TESLA                                                   0x00005097
+#define G82_TESLA                                                    0x00008297
+#define GT200_TESLA                                                  0x00008397
+#define GT214_TESLA                                                  0x00008597
+#define GT21A_TESLA                                                  0x00008697
+
 #define FERMI_A                                       /* cl9097.h */ 0x00009097
 #define FERMI_B                                       /* cl9097.h */ 0x00009197
 #define FERMI_C                                       /* cl9097.h */ 0x00009297
@@ -140,6 +154,8 @@
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
+#define NV50_COMPUTE                                                 0x000050c0
+#define GT214_COMPUTE                                                0x000085c0
 #define FERMI_COMPUTE_A                                              0x000090c0
 #define FERMI_COMPUTE_B                                              0x000091c0
 #define KEPLER_COMPUTE_A                                             0x0000a0c0
diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h
index 4a7f6f7b836d..b52a8eadce01 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/client.h
@@ -11,8 +11,7 @@ struct nvif_client {
 	bool super;
 };
 
-int  nvif_client_init(const char *drv, const char *name, u64 device,
-		      const char *cfg, const char *dbg,
+int  nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
 		      struct nvif_client *);
 void nvif_client_fini(struct nvif_client *);
 int  nvif_client_ioctl(struct nvif_client *, void *, u32);
diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h
index 8bd39e69229c..0c6f48d8140a 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h
@@ -1,5 +1,7 @@
 #ifndef __NVIF_DRIVER_H__
 #define __NVIF_DRIVER_H__
+#include <nvif/os.h>
+struct nvif_client;
 
 struct nvif_driver {
 	const char *name;
@@ -14,9 +16,11 @@ struct nvif_driver {
 	bool keep;
 };
 
+int nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		     const char *name, u64 device, struct nvif_client *);
+
 extern const struct nvif_driver nvif_driver_nvkm;
 extern const struct nvif_driver nvif_driver_drm;
 extern const struct nvif_driver nvif_driver_lib;
 extern const struct nvif_driver nvif_driver_null;
-
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0000.h b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
index 85c44e8a1201..c2c0fc41e017 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0000.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
@@ -1,9 +1,16 @@
 #ifndef __NVIF_IF0000_H__
 #define __NVIF_IF0000_H__
 
-#define NV_CLIENT_DEVLIST                                                  0x00
+struct nvif_client_v0 {
+	__u8  version;
+	__u8  pad01[7];
+	__u64 device;
+	char  name[32];
+};
+
+#define NVIF_CLIENT_V0_DEVLIST                                             0x00
 
-struct nv_client_devlist_v0 {
+struct nvif_client_devlist_v0 {
 	__u8  version;
 	__u8  count;
 	__u8  pad02[6];
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index eaf5905a87a3..e876634da10a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -1,5 +1,6 @@
 #ifndef __NVKM_CLIENT_H__
 #define __NVKM_CLIENT_H__
+#define nvkm_client(p) container_of((p), struct nvkm_client, object)
 #include <core/object.h>
 
 struct nvkm_client {
@@ -8,9 +9,8 @@ struct nvkm_client {
 	u64 device;
 	u32 debug;
 
-	struct nvkm_client_notify *notify[16];
+	struct nvkm_client_notify *notify[32];
 	struct rb_root objroot;
-	struct rb_root dmaroot;
 
 	bool super;
 	void *data;
@@ -19,15 +19,11 @@ struct nvkm_client {
 	struct nvkm_vm *vm;
 };
 
-bool nvkm_client_insert(struct nvkm_client *, struct nvkm_object *);
-void nvkm_client_remove(struct nvkm_client *, struct nvkm_object *);
-struct nvkm_object *nvkm_client_search(struct nvkm_client *, u64 object);
-
 int  nvkm_client_new(const char *name, u64 device, const char *cfg,
-		     const char *dbg, struct nvkm_client **);
-void nvkm_client_del(struct nvkm_client **);
-int  nvkm_client_init(struct nvkm_client *);
-int  nvkm_client_fini(struct nvkm_client *, bool suspend);
+		     const char *dbg,
+		     int (*)(const void *, u32, const void *, u32),
+		     struct nvkm_client **);
+struct nvkm_client *nvkm_client_search(struct nvkm_client *, u64 handle);
 
 int nvkm_client_notify_new(struct nvkm_object *, struct nvkm_event *,
 			   void *data, u32 size);
@@ -37,8 +33,8 @@ int nvkm_client_notify_put(struct nvkm_client *, int index);
 
 /* logging for client-facing objects */
 #define nvif_printk(o,l,p,f,a...) do {                                         \
-	struct nvkm_object *_object = (o);                                     \
-	struct nvkm_client *_client = _object->client;                         \
+	const struct nvkm_object *_object = (o);                               \
+	const struct nvkm_client *_client = _object->client;                   \
 	if (_client->debug >= NV_DBG_##l)                                      \
 		printk(KERN_##p "nouveau: %s:%08x:%08x: "f, _client->name,     \
 		       _object->handle, _object->oclass, ##a);                 \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 6bc712f32c8b..d426b86e2712 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -262,7 +262,7 @@ extern const struct nvkm_sclass nvkm_udevice_sclass;
 
 /* device logging */
 #define nvdev_printk_(d,l,p,f,a...) do {                                       \
-	struct nvkm_device *_device = (d);                                     \
+	const struct nvkm_device *_device = (d);                               \
 	if (_device->debug >= (l))                                             \
 		dev_##p(_device->dev, f, ##a);                                 \
 } while(0)
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index 9ebfd8782366..d4cd2fbfde88 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -20,6 +20,7 @@ struct nvkm_engine_func {
 	int (*fini)(struct nvkm_engine *, bool suspend);
 	void (*intr)(struct nvkm_engine *);
 	void (*tile)(struct nvkm_engine *, int region, struct nvkm_fb_tile *);
+	bool (*chsw_load)(struct nvkm_engine *);
 
 	struct {
 		int (*sclass)(struct nvkm_oclass *, int index,
@@ -44,4 +45,5 @@ int nvkm_engine_new_(const struct nvkm_engine_func *, struct nvkm_device *,
 struct nvkm_engine *nvkm_engine_ref(struct nvkm_engine *);
 void nvkm_engine_unref(struct nvkm_engine **);
 void nvkm_engine_tile(struct nvkm_engine *, int region);
+bool nvkm_engine_chsw_load(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
index 9363b839a9da..33ca6769266a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
@@ -6,9 +6,10 @@ struct nvkm_vma;
 struct nvkm_vm;
 
 enum nvkm_memory_target {
-	NVKM_MEM_TARGET_INST,
-	NVKM_MEM_TARGET_VRAM,
-	NVKM_MEM_TARGET_HOST,
+	NVKM_MEM_TARGET_INST, /* instance memory */
+	NVKM_MEM_TARGET_VRAM, /* video memory */
+	NVKM_MEM_TARGET_HOST, /* coherent system memory */
+	NVKM_MEM_TARGET_NCOH, /* non-coherent system memory */
 };
 
 struct nvkm_memory {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
index d92fd41e4056..7bd4897a8a2a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
@@ -5,7 +5,7 @@
 struct nvkm_mm_node {
 	struct list_head nl_entry;
 	struct list_head fl_entry;
-	struct list_head rl_entry;
+	struct nvkm_mm_node *next;
 
 #define NVKM_MM_HEAP_ANY 0x00
 	u8  heap;
@@ -38,4 +38,10 @@ int  nvkm_mm_tail(struct nvkm_mm *, u8 heap, u8 type, u32 size_max,
 		  u32 size_min, u32 align, struct nvkm_mm_node **);
 void nvkm_mm_free(struct nvkm_mm *, struct nvkm_mm_node **);
 void nvkm_mm_dump(struct nvkm_mm *, const char *);
+
+static inline bool
+nvkm_mm_contiguous(struct nvkm_mm_node *node)
+{
+	return !node->next;
+}
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
index dcd048b91fac..96dda350ada3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
@@ -62,6 +62,11 @@ int nvkm_object_wr32(struct nvkm_object *, u64 addr, u32  data);
 int nvkm_object_bind(struct nvkm_object *, struct nvkm_gpuobj *, int align,
 		     struct nvkm_gpuobj **);
 
+bool nvkm_object_insert(struct nvkm_object *);
+void nvkm_object_remove(struct nvkm_object *);
+struct nvkm_object *nvkm_object_search(struct nvkm_client *, u64 object,
+				       const struct nvkm_object_func *);
+
 struct nvkm_sclass {
 	int minver;
 	int maxver;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 57adefa8b08e..ca9ed3d68f44 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -32,7 +32,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
 
 /* subdev logging */
 #define nvkm_printk_(s,l,p,f,a...) do {                                        \
-	struct nvkm_subdev *_subdev = (s);                                     \
+	const struct nvkm_subdev *_subdev = (s);                               \
 	if (_subdev->debug >= (l)) {                                           \
 		dev_##p(_subdev->device->dev, "%s: "f,                         \
 			nvkm_subdev_name[_subdev->index], ##a);                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
index 114bfb737a81..d2a6532ce3b9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
@@ -12,9 +12,6 @@ struct nvkm_dmaobj {
 	u32 access;
 	u64 start;
 	u64 limit;
-
-	struct rb_node rb;
-	u64 handle; /*XXX HANDLE MERGE */
 };
 
 struct nvkm_dma {
@@ -22,8 +19,7 @@ struct nvkm_dma {
 	struct nvkm_engine engine;
 };
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *, struct nvkm_client *, u64 object);
+struct nvkm_dmaobj *nvkm_dmaobj_search(struct nvkm_client *, u64 object);
 
 int nv04_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 int nv50_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
index e6baf039c269..7e498e65b1e8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
@@ -4,13 +4,26 @@
 #include <core/engine.h>
 struct nvkm_fifo_chan;
 
+enum nvkm_falcon_dmaidx {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
 struct nvkm_falcon {
 	const struct nvkm_falcon_func *func;
-	struct nvkm_engine engine;
-
+	const struct nvkm_subdev *owner;
+	const char *name;
 	u32 addr;
-	u8  version;
-	u8  secret;
+
+	struct mutex mutex;
+	const struct nvkm_subdev *user;
+
+	u8 version;
+	u8 secret;
+	bool debug;
 
 	struct nvkm_memory *core;
 	bool external;
@@ -19,15 +32,25 @@ struct nvkm_falcon {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} code;
 
 	struct {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} data;
+
+	struct nvkm_engine engine;
 };
 
+int nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		       struct nvkm_falcon **);
+void nvkm_falcon_del(struct nvkm_falcon **);
+int nvkm_falcon_get(struct nvkm_falcon *, const struct nvkm_subdev *);
+void nvkm_falcon_put(struct nvkm_falcon *, const struct nvkm_subdev *);
+
 int nvkm_falcon_new_(const struct nvkm_falcon_func *, struct nvkm_device *,
 		     int index, bool enable, u32 addr, struct nvkm_engine **);
 
@@ -42,6 +65,51 @@ struct nvkm_falcon_func {
 	} data;
 	void (*init)(struct nvkm_falcon *);
 	void (*intr)(struct nvkm_falcon *, struct nvkm_fifo_chan *);
+	void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool);
+	void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8);
+	void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *);
+	void (*bind_context)(struct nvkm_falcon *, struct nvkm_gpuobj *);
+	int (*wait_for_halt)(struct nvkm_falcon *, u32);
+	int (*clear_interrupt)(struct nvkm_falcon *, u32);
+	void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr);
+	void (*start)(struct nvkm_falcon *);
+	int (*enable)(struct nvkm_falcon *falcon);
+	void (*disable)(struct nvkm_falcon *falcon);
+
 	struct nvkm_sclass sclass[];
 };
+
+static inline u32
+nvkm_falcon_rd32(struct nvkm_falcon *falcon, u32 addr)
+{
+	return nvkm_rd32(falcon->owner->device, falcon->addr + addr);
+}
+
+static inline void
+nvkm_falcon_wr32(struct nvkm_falcon *falcon, u32 addr, u32 data)
+{
+	nvkm_wr32(falcon->owner->device, falcon->addr + addr, data);
+}
+
+static inline u32
+nvkm_falcon_mask(struct nvkm_falcon *falcon, u32 addr, u32 mask, u32 val)
+{
+	struct nvkm_device *device = falcon->owner->device;
+
+	return nvkm_mask(device, falcon->addr + addr, mask, val);
+}
+
+void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8,
+			   bool);
+void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8);
+void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *);
+void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_gpuobj *);
+void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32);
+void nvkm_falcon_start(struct nvkm_falcon *);
+int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32);
+int nvkm_falcon_clear_interrupt(struct nvkm_falcon *, u32);
+int nvkm_falcon_enable(struct nvkm_falcon *);
+void nvkm_falcon_disable(struct nvkm_falcon *);
+int nvkm_falcon_reset(struct nvkm_falcon *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index ed92fec5292c..24efa900d8ca 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -40,6 +40,7 @@ struct nvkm_fifo {
 
 	struct nvkm_event uevent; /* async user trigger */
 	struct nvkm_event cevent; /* channel creation event */
+	struct nvkm_event kevent; /* channel killed */
 };
 
 void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
new file mode 100644
index 000000000000..f5f4a14c4030
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
@@ -0,0 +1,26 @@
+#ifndef __NVBIOS_POWER_BUDGET_H__
+#define __NVBIOS_POWER_BUDGET_H__
+
+#include <nvkm/subdev/bios.h>
+
+struct nvbios_power_budget_entry {
+	u32 min_w;
+	u32 avg_w;
+	u32 max_w;
+};
+
+struct nvbios_power_budget {
+	u32 offset;
+	u8  ver;
+	u8  hlen;
+	u8  elen;
+	u8  ecount;
+	u8  cap_entry;
+};
+
+int nvbios_power_budget_header(struct nvkm_bios *,
+                               struct nvbios_power_budget *);
+int nvbios_power_budget_entry(struct nvkm_bios *, struct nvbios_power_budget *,
+                              u8 idx, struct nvbios_power_budget_entry *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 794e432578b2..0b26a4c860ec 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -29,7 +29,7 @@ struct nvkm_mem {
 	u8  page_shift;
 
 	struct nvkm_mm_node *tag;
-	struct list_head regions;
+	struct nvkm_mm_node *mem;
 	dma_addr_t *pages;
 	u32 memtype;
 	u64 offset;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
index 3c2ddd975273..b7a9b041e130 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
@@ -8,6 +8,9 @@ struct nvkm_iccsense {
 	bool data_valid;
 	struct list_head sensors;
 	struct list_head rails;
+
+	u32 power_w_max;
+	u32 power_w_crit;
 };
 
 int gf100_iccsense_new(struct nvkm_device *, int index, struct nvkm_iccsense **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 27d25b18d85c..e68ba636741b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -9,6 +9,7 @@ struct nvkm_mc {
 
 void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+bool nvkm_mc_enabled(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_intr(struct nvkm_device *, bool *handled);
 void nvkm_mc_intr_unarm(struct nvkm_device *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index e6523e2cea9f..ac2a695963c1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -43,6 +43,7 @@ int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv46_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int g92_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
index f37538eb1fe5..179b6ed3f595 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
@@ -1,10 +1,12 @@
 #ifndef __NVKM_PMU_H__
 #define __NVKM_PMU_H__
 #include <core/subdev.h>
+#include <engine/falcon.h>
 
 struct nvkm_pmu {
 	const struct nvkm_pmu_func *func;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *falcon;
 
 	struct {
 		u32 base;
@@ -35,6 +37,7 @@ int gk110_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk208_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk20a_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gm107_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gm20b_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gp100_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gp102_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index b04c38c07761..5dbd8aa4f8c2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -26,7 +26,7 @@
 #include <core/subdev.h>
 
 enum nvkm_secboot_falcon {
-	NVKM_SECBOOT_FALCON_PMU	= 0,
+	NVKM_SECBOOT_FALCON_PMU = 0,
 	NVKM_SECBOOT_FALCON_RESERVED = 1,
 	NVKM_SECBOOT_FALCON_FECS = 2,
 	NVKM_SECBOOT_FALCON_GPCCS = 3,
@@ -35,22 +35,23 @@ enum nvkm_secboot_falcon {
 };
 
 /**
- * @base:		base IO address of the falcon performing secure boot
- * @irq_mask:		IRQ mask of the falcon performing secure boot
- * @enable_mask:	enable mask of the falcon performing secure boot
+ * @wpr_set: whether the WPR region is currently set
 */
 struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
+	struct nvkm_acr *acr;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *boot_falcon;
 
-	enum nvkm_devidx devidx;
-	u32 base;
+	u64 wpr_addr;
+	u32 wpr_size;
+
+	bool wpr_set;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
 bool nvkm_secboot_is_managed(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-int nvkm_secboot_reset(struct nvkm_secboot *, u32 falcon);
-int nvkm_secboot_start(struct nvkm_secboot *, u32 falcon);
+int nvkm_secboot_reset(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
 int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
 int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 82d3e28918fd..6a567fe347b3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -48,10 +48,8 @@ void nvkm_timer_alarm_cancel(struct nvkm_timer *, struct nvkm_alarm *);
 	} while (_taken = nvkm_timer_read(_tmr) - _time0, _taken < _nsecs);    \
                                                                                \
 	if (_taken >= _nsecs) {                                                \
-		if (_warn) {                                                   \
-			dev_warn(_device->dev, "timeout at %s:%d/%s()!\n",     \
-				 __FILE__, __LINE__, __func__);                \
-		}                                                              \
+		if (_warn)                                                     \
+			dev_WARN(_device->dev, "timeout\n");                   \
 		_taken = -ETIMEDOUT;                                           \
 	}                                                                      \
 	_taken;                                                                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 71ebbfd4484f..d23209b62c25 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -11,6 +11,7 @@ struct nvkm_top {
 u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
 u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
 u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+int nvkm_top_fault_id(struct nvkm_device *, enum nvkm_devidx);
 enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
 enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 4df4f6ed4886..f98f800cc011 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -87,7 +87,7 @@ nouveau_abi16_put(struct nouveau_abi16 *abi16, int ret)
 s32
 nouveau_abi16_swclass(struct nouveau_drm *drm)
 {
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 		return NVIF_CLASS_SW_NV04;
 	case NV_DEVICE_INFO_V0_CELSIUS:
@@ -175,7 +175,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_gr *gr = nvxx_gr(device);
 	struct drm_nouveau_getparam *getparam = data;
 
@@ -321,7 +321,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	}
 
 	/* Named memory object area */
-	ret = nouveau_gem_new(dev, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
+	ret = nouveau_gem_new(cli, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
 			      0, 0, &chan->ntfy);
 	if (ret == 0)
 		ret = nouveau_bo_pin(chan->ntfy, TTM_PL_FLAG_TT, false);
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 8b1ca4add2ed..380f340204e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -65,7 +65,7 @@ static int
 nv40_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = (nvif_rd32(device, NV40_PMC_BACKLIGHT) &
 				   NV40_PMC_BACKLIGHT_MASK) >> 16;
 
@@ -76,7 +76,7 @@ static int
 nv40_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = bd->props.brightness;
 	int reg = nvif_rd32(device, NV40_PMC_BACKLIGHT);
 
@@ -96,7 +96,7 @@ static int
 nv40_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct backlight_properties props;
 	struct backlight_device *bd;
 	struct backlight_connector bl_connector;
@@ -133,7 +133,7 @@ nv50_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val;
@@ -148,7 +148,7 @@ nv50_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val = (bd->props.brightness * div) / 100;
@@ -169,7 +169,7 @@ nva3_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -187,7 +187,7 @@ nva3_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -213,7 +213,7 @@ static int
 nv50_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nouveau_encoder *nv_encoder;
 	struct backlight_properties props;
 	struct backlight_device *bd;
@@ -231,9 +231,9 @@ nv50_backlight_init(struct drm_connector *connector)
 	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
 		return 0;
 
-	if (drm->device.info.chipset <= 0xa0 ||
-	    drm->device.info.chipset == 0xaa ||
-	    drm->device.info.chipset == 0xac)
+	if (drm->client.device.info.chipset <= 0xa0 ||
+	    drm->client.device.info.chipset == 0xaa ||
+	    drm->client.device.info.chipset == 0xac)
 		ops = &nv50_bl_ops;
 	else
 		ops = &nva3_bl_ops;
@@ -265,7 +265,7 @@ int
 nouveau_backlight_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct drm_connector *connector;
 
 	if (apple_gmux_present()) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 23ffe8571a99..9a0772ad495a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -215,7 +215,7 @@ int call_lvds_script(struct drm_device *dev, struct dcb_output *dcbent, int head
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	uint8_t lvds_ver = bios->data[bios->fp.lvdsmanufacturerpointer];
 	uint32_t sel_clk_binding, sel_clk;
@@ -319,7 +319,7 @@ static int
 get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
 	/*
 	 * The fp strap is normally dictated by the "User Strap" in
@@ -333,10 +333,10 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 	if (bios->major_version < 5 && bios->data[0x48] & 0x4)
 		return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_MAXWELL)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_MAXWELL)
 		return nvif_rd32(device, 0x001800) & 0x0000000f;
 	else
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf;
 	else
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 16) & 0xf;
@@ -638,7 +638,7 @@ int run_tmds_table(struct drm_device *dev, struct dcb_output *dcbent, int head,
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	int cv = bios->chip_version;
 	uint16_t clktable = 0, scriptptr;
@@ -1255,7 +1255,7 @@ olddcb_table(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	u8 *dcb = NULL;
 
-	if (drm->device.info.family > NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_TNT)
 		dcb = ROMPTR(dev, drm->vbios.data[0x36]);
 	if (!dcb) {
 		NV_WARN(drm, "No DCB data found in VBIOS\n");
@@ -1918,7 +1918,7 @@ static int load_nv17_hwsq_ucode_entry(struct drm_device *dev, struct nvbios *bio
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	uint8_t bytes_to_write;
 	uint16_t hwsq_entry_offset;
 	int i;
@@ -2012,7 +2012,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios *legacy = &drm->vbios;
 
 	memset(legacy, 0, sizeof(struct nvbios));
@@ -2064,7 +2064,7 @@ nouveau_bios_posted(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	unsigned htotal;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return true;
 
 	htotal  = NVReadVgaCrtc(dev, 0, 0x06);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 8a528ebe30f3..548f36d33924 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -48,7 +48,7 @@ nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	int i = reg - drm->tile.reg;
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_fb *fb = device->fb;
 	struct nvkm_fb_tile *tile = &fb->tile.region[i];
 
@@ -100,7 +100,7 @@ nv10_bo_set_tiling(struct drm_device *dev, u32 addr,
 		   u32 size, u32 pitch, u32 flags)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct nouveau_drm_tile *tile, *found = NULL;
 	int i;
 
@@ -139,60 +139,62 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 	kfree(nvbo);
 }
 
+static inline u64
+roundup_64(u64 x, u32 y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return x * y;
+}
+
 static void
 nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
-		       int *align, int *size)
+		       int *align, u64 *size)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 
 	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (nvbo->tile_mode) {
 			if (device->info.chipset >= 0x40) {
 				*align = 65536;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x30) {
 				*align = 32768;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x20) {
 				*align = 16384;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x10) {
 				*align = 16384;
-				*size = roundup(*size, 32 * nvbo->tile_mode);
+				*size = roundup_64(*size, 32 * nvbo->tile_mode);
 			}
 		}
 	} else {
-		*size = roundup(*size, (1 << nvbo->page_shift));
+		*size = roundup_64(*size, (1 << nvbo->page_shift));
 		*align = max((1 <<  nvbo->page_shift), *align);
 	}
 
-	*size = roundup(*size, PAGE_SIZE);
+	*size = roundup_64(*size, PAGE_SIZE);
 }
 
 int
-nouveau_bo_new(struct drm_device *dev, int size, int align,
+nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
 	       struct sg_table *sg, struct reservation_object *robj,
 	       struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	size_t acc_size;
 	int ret;
 	int type = ttm_bo_type_device;
-	int lpg_shift = 12;
-	int max_size;
-
-	if (drm->client.vm)
-		lpg_shift = drm->client.vm->mmu->lpg_shift;
-	max_size = INT_MAX & ~((1 << lpg_shift) - 1);
 
-	if (size <= 0 || size > max_size) {
-		NV_WARN(drm, "skipped size %x\n", (u32)size);
+	if (!size) {
+		NV_WARN(drm, "skipped size %016llx\n", size);
 		return -EINVAL;
 	}
 
@@ -208,8 +210,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
+	nvbo->cli = cli;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
+	if (!nvxx_device(&drm->client.device)->func->cpu_coherent)
 		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
@@ -255,10 +258,10 @@ static void
 set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT;
+	u32 vram_pages = drm->client.device.info.ram_size >> PAGE_SHIFT;
 	unsigned i, fpfn, lpfn;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
 	    nvbo->bo.mem.num_pages < vram_pages / 4) {
 		/*
@@ -316,12 +319,12 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
 	if (ret)
 		return ret;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
 	    memtype == TTM_PL_FLAG_VRAM && contig) {
 		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) {
 			if (bo->mem.mem_type == TTM_PL_VRAM) {
 				struct nvkm_mem *mem = bo->mem.mm_node;
-				if (!list_is_singular(&mem->regions))
+				if (!nvkm_mm_contiguous(mem->mem))
 					evict = true;
 			}
 			nvbo->tile_flags &= ~NOUVEAU_GEM_TILE_NONCONTIG;
@@ -443,7 +446,7 @@ void
 nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -463,7 +466,7 @@ void
 nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -579,9 +582,9 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 					 TTM_PL_FLAG_WC;
 		man->default_caching = TTM_PL_FLAG_WC;
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 			/* Some BARs do not support being ioremapped WC */
-			if (nvxx_bar(&drm->device)->iomap_uncached) {
+			if (nvxx_bar(&drm->client.device)->iomap_uncached) {
 				man->available_caching = TTM_PL_FLAG_UNCACHED;
 				man->default_caching = TTM_PL_FLAG_UNCACHED;
 			}
@@ -594,7 +597,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		}
 		break;
 	case TTM_PL_TT:
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 			man->func = &nouveau_gart_manager;
 		else
 		if (!drm->agp.bridge)
@@ -654,20 +657,20 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 10);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
-		OUT_RING  (chan, new_mem->num_pages);
+		OUT_RING  (chan, new_reg->num_pages);
 		BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386);
 	}
 	return ret;
@@ -686,15 +689,15 @@ nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -724,15 +727,15 @@ nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -763,15 +766,15 @@ nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -801,35 +804,35 @@ nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0320, 6);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* COPY */);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
 	}
 	return ret;
 }
 
 static int
 nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0304, 6);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */);
 	}
 	return ret;
@@ -853,14 +856,14 @@ nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 length = (new_mem->num_pages << PAGE_SHIFT);
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	int src_tiled = !!node->memtype;
-	int dst_tiled = !!((struct nvkm_mem *)new_mem->mm_node)->memtype;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 length = (new_reg->num_pages << PAGE_SHIFT);
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	int src_tiled = !!mem->memtype;
+	int dst_tiled = !!((struct nvkm_mem *)new_reg->mm_node)->memtype;
 	int ret;
 
 	while (length) {
@@ -940,20 +943,20 @@ nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static inline uint32_t
 nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
-		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+		      struct nouveau_channel *chan, struct ttm_mem_reg *reg)
 {
-	if (mem->mem_type == TTM_PL_TT)
+	if (reg->mem_type == TTM_PL_TT)
 		return NvDmaTT;
 	return chan->vram.handle;
 }
 
 static int
 nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	u32 src_offset = old_mem->start << PAGE_SHIFT;
-	u32 dst_offset = new_mem->start << PAGE_SHIFT;
-	u32 page_count = new_mem->num_pages;
+	u32 src_offset = old_reg->start << PAGE_SHIFT;
+	u32 dst_offset = new_reg->start << PAGE_SHIFT;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
 	ret = RING_SPACE(chan, 3);
@@ -961,10 +964,10 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		return ret;
 
 	BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_reg));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_reg));
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -995,33 +998,33 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo,
-		     struct ttm_mem_reg *mem)
+		     struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *old_node = bo->mem.mm_node;
-	struct nvkm_mem *new_node = mem->mm_node;
-	u64 size = (u64)mem->num_pages << PAGE_SHIFT;
+	struct nvkm_mem *old_mem = bo->mem.mm_node;
+	struct nvkm_mem *new_mem = reg->mm_node;
+	u64 size = (u64)reg->num_pages << PAGE_SHIFT;
 	int ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, old_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[0]);
+	ret = nvkm_vm_get(drm->client.vm, size, old_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[0]);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, new_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[1]);
+	ret = nvkm_vm_get(drm->client.vm, size, new_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[1]);
 	if (ret) {
-		nvkm_vm_put(&old_node->vma[0]);
+		nvkm_vm_put(&old_mem->vma[0]);
 		return ret;
 	}
 
-	nvkm_vm_map(&old_node->vma[0], old_node);
-	nvkm_vm_map(&old_node->vma[1], new_node);
+	nvkm_vm_map(&old_mem->vma[0], old_mem);
+	nvkm_vm_map(&old_mem->vma[1], new_mem);
 	return 0;
 }
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		     bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_channel *chan = drm->ttm.chan;
@@ -1033,8 +1036,8 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	 * old nvkm_mem node, these will get cleaned up after ttm has
 	 * destroyed the ttm_mem_reg
 	 */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_move_prep(drm, bo, new_mem);
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_move_prep(drm, bo, new_reg);
 		if (ret)
 			return ret;
 	}
@@ -1042,14 +1045,14 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
 	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
-		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
+		ret = drm->ttm.move(chan, bo, &bo->mem, new_reg);
 		if (ret == 0) {
 			ret = nouveau_fence_new(chan, false, &fence);
 			if (ret == 0) {
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								new_mem);
+								new_reg);
 				nouveau_fence_unref(&fence);
 			}
 		}
@@ -1124,7 +1127,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1132,35 +1135,35 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_tt_bind(bo->ttm, &tmp_mem);
+	ret = ttm_tt_bind(bo->ttm, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_reg);
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1168,34 +1171,34 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_reg);
 	if (ret)
 		goto out;
 
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static void
 nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
-		     struct ttm_mem_reg *new_mem)
+		     struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_vma *vma;
@@ -1205,10 +1208,10 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
 		return;
 
 	list_for_each_entry(vma, &nvbo->vma_list, head) {
-		if (new_mem && new_mem->mem_type != TTM_PL_SYSTEM &&
-			      (new_mem->mem_type == TTM_PL_VRAM ||
+		if (new_reg && new_reg->mem_type != TTM_PL_SYSTEM &&
+			      (new_reg->mem_type == TTM_PL_VRAM ||
 			       nvbo->page_shift != vma->vm->mmu->lpg_shift)) {
-			nvkm_vm_map(vma, new_mem->mm_node);
+			nvkm_vm_map(vma, new_reg->mm_node);
 		} else {
 			WARN_ON(ttm_bo_wait(bo, false, false));
 			nvkm_vm_unmap(vma);
@@ -1217,20 +1220,20 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
 }
 
 static int
-nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg,
 		   struct nouveau_drm_tile **new_tile)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	u64 offset = new_mem->start << PAGE_SHIFT;
+	u64 offset = new_reg->start << PAGE_SHIFT;
 
 	*new_tile = NULL;
-	if (new_mem->mem_type != TTM_PL_VRAM)
+	if (new_reg->mem_type != TTM_PL_VRAM)
 		return 0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
-		*new_tile = nv10_bo_set_tiling(dev, offset, new_mem->size,
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+		*new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size,
 						nvbo->tile_mode,
 						nvbo->tile_flags);
 	}
@@ -1253,11 +1256,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg *old_reg = &bo->mem;
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
@@ -1268,31 +1271,31 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
 		if (ret)
 			return ret;
 	}
 
 	/* Fake bo copy. */
-	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
+	if (old_reg->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
 		BUG_ON(bo->mem.mm_node != NULL);
-		bo->mem = *new_mem;
-		new_mem->mm_node = NULL;
+		bo->mem = *new_reg;
+		new_reg->mm_node = NULL;
 		goto out;
 	}
 
 	/* Hardware assisted copy. */
 	if (drm->ttm.move) {
-		if (new_mem->mem_type == TTM_PL_SYSTEM)
+		if (new_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flipd(bo, evict, intr,
-						    no_wait_gpu, new_mem);
-		else if (old_mem->mem_type == TTM_PL_SYSTEM)
+						    no_wait_gpu, new_reg);
+		else if (old_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flips(bo, evict, intr,
-						    no_wait_gpu, new_mem);
+						    no_wait_gpu, new_reg);
 		else
 			ret = nouveau_bo_move_m2mf(bo, evict, intr,
-						   no_wait_gpu, new_mem);
+						   no_wait_gpu, new_reg);
 		if (!ret)
 			goto out;
 	}
@@ -1300,10 +1303,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_reg);
 
 out:
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (ret)
 			nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
 		else
@@ -1323,54 +1326,54 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 }
 
 static int
-nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type];
 	struct nouveau_drm *drm = nouveau_bdev(bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
+	struct nvkm_mem *mem = reg->mm_node;
 	int ret;
 
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
+	reg->bus.addr = NULL;
+	reg->bus.offset = 0;
+	reg->bus.size = reg->num_pages << PAGE_SHIFT;
+	reg->bus.base = 0;
+	reg->bus.is_iomem = false;
 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
 		return -EINVAL;
-	switch (mem->mem_type) {
+	switch (reg->mem_type) {
 	case TTM_PL_SYSTEM:
 		/* System memory */
 		return 0;
 	case TTM_PL_TT:
 #if IS_ENABLED(CONFIG_AGP)
 		if (drm->agp.bridge) {
-			mem->bus.offset = mem->start << PAGE_SHIFT;
-			mem->bus.base = drm->agp.base;
-			mem->bus.is_iomem = !drm->agp.cma;
+			reg->bus.offset = reg->start << PAGE_SHIFT;
+			reg->bus.base = drm->agp.base;
+			reg->bus.is_iomem = !drm->agp.cma;
 		}
 #endif
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA || !node->memtype)
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || !mem->memtype)
 			/* untiled */
 			break;
 		/* fallthrough, tiled memory */
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = device->func->resource_addr(device, 1);
-		mem->bus.is_iomem = true;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-			struct nvkm_bar *bar = nvxx_bar(&drm->device);
+		reg->bus.offset = reg->start << PAGE_SHIFT;
+		reg->bus.base = device->func->resource_addr(device, 1);
+		reg->bus.is_iomem = true;
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+			struct nvkm_bar *bar = nvxx_bar(&drm->client.device);
 			int page_shift = 12;
-			if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
-				page_shift = node->page_shift;
+			if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+				page_shift = mem->page_shift;
 
-			ret = nvkm_bar_umap(bar, node->size << 12, page_shift,
-					    &node->bar_vma);
+			ret = nvkm_bar_umap(bar, mem->size << 12, page_shift,
+					    &mem->bar_vma);
 			if (ret)
 				return ret;
 
-			nvkm_vm_map(&node->bar_vma, node);
-			mem->bus.offset = node->bar_vma.offset;
+			nvkm_vm_map(&mem->bar_vma, mem);
+			reg->bus.offset = mem->bar_vma.offset;
 		}
 		break;
 	default:
@@ -1380,15 +1383,15 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 }
 
 static void
-nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *mem = reg->mm_node;
 
-	if (!node->bar_vma.node)
+	if (!mem->bar_vma.node)
 		return;
 
-	nvkm_vm_unmap(&node->bar_vma);
-	nvkm_vm_put(&node->bar_vma);
+	nvkm_vm_unmap(&mem->bar_vma);
+	nvkm_vm_put(&mem->bar_vma);
 }
 
 static int
@@ -1396,7 +1399,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	u32 mappable = device->func->resource_size(device, 1) >> PAGE_SHIFT;
 	int i, ret;
 
@@ -1404,7 +1407,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	 * nothing to do here.
 	 */
 	if (bo->mem.mem_type != TTM_PL_VRAM) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA ||
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA ||
 		    !nouveau_bo_tile_layout(nvbo))
 			return 0;
 
@@ -1419,7 +1422,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 
 	/* make sure bo is in mappable vram */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
 	    bo->mem.start + bo->mem.num_pages < mappable)
 		return 0;
 
@@ -1461,7 +1464,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	}
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
@@ -1518,7 +1521,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index e42360983229..b06a5385d6dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,8 @@ struct nouveau_bo {
 	struct list_head vma_list;
 	unsigned page_shift;
 
+	struct nouveau_cli *cli;
+
 	u32 tile_mode;
 	u32 tile_flags;
 	struct nouveau_drm_tile *tile;
@@ -69,7 +71,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
-int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
+int  nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
 		    struct nouveau_bo **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index f9b3c811187e..dbc41fa86ee8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -45,10 +45,20 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+static int
+nouveau_channel_killed(struct nvif_notify *ntfy)
+{
+	struct nouveau_channel *chan = container_of(ntfy, typeof(*chan), kill);
+	struct nouveau_cli *cli = (void *)chan->user.client;
+	NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
+	atomic_set(&chan->killed, 1);
+	return NVIF_NOTIFY_DROP;
+}
+
 int
 nouveau_channel_idle(struct nouveau_channel *chan)
 {
-	if (likely(chan && chan->fence)) {
+	if (likely(chan && chan->fence && !atomic_read(&chan->killed))) {
 		struct nouveau_cli *cli = (void *)chan->user.client;
 		struct nouveau_fence *fence = NULL;
 		int ret;
@@ -78,6 +88,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 		nvif_object_fini(&chan->nvsw);
 		nvif_object_fini(&chan->gart);
 		nvif_object_fini(&chan->vram);
+		nvif_notify_fini(&chan->kill);
 		nvif_object_fini(&chan->user);
 		nvif_object_fini(&chan->push.ctxdma);
 		nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma);
@@ -107,13 +118,14 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 	chan->device = device;
 	chan->drm = drm;
+	atomic_set(&chan->killed, 0);
 
 	/* allocate memory for dma push buffer */
 	target = TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
 	if (nouveau_vram_pushbuf)
 		target = TTM_PL_FLAG_VRAM;
 
-	ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL, NULL,
+	ret = nouveau_bo_new(cli, size, 0, target, 0, 0, NULL, NULL,
 			    &chan->push.buffer);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(chan->push.buffer, target, false);
@@ -301,12 +313,26 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 {
 	struct nvif_device *device = chan->device;
 	struct nouveau_cli *cli = (void *)chan->user.client;
+	struct nouveau_drm *drm = chan->drm;
 	struct nvkm_mmu *mmu = nvxx_mmu(device);
 	struct nv_dma_v0 args = {};
 	int ret, i;
 
 	nvif_object_map(&chan->user);
 
+	if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
+		ret = nvif_notify_init(&chan->user, nouveau_channel_killed,
+				       true, NV906F_V0_NTFY_KILLED,
+				       NULL, 0, 0, &chan->kill);
+		if (ret == 0)
+			ret = nvif_notify_get(&chan->kill);
+		if (ret) {
+			NV_ERROR(drm, "Failed to request channel kill "
+				      "notification: %d\n", ret);
+			return ret;
+		}
+	}
+
 	/* allocate dma objects to cover all allowed vram, and gart */
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
 		if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 48062c94f36d..46b947ba1cf4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -1,7 +1,7 @@
 #ifndef __NOUVEAU_CHAN_H__
 #define __NOUVEAU_CHAN_H__
-
 #include <nvif/object.h>
+#include <nvif/notify.h>
 struct nvif_device;
 
 struct nouveau_channel {
@@ -38,6 +38,9 @@ struct nouveau_channel {
 	u32 user_put;
 
 	struct nvif_object user;
+
+	struct nvif_notify kill;
+	atomic_t killed;
 };
 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 966d20ab4de4..f5add64c093f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -419,7 +419,7 @@ nouveau_connector_ddc_detect(struct drm_connector *connector)
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 	int i, panel = -ENODEV;
@@ -521,7 +521,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		return;
 	nv_connector->detected_encoder = nv_encoder;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 	} else
@@ -531,8 +531,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		connector->interlace_allowed = false;
 	} else {
 		connector->doublescan_allowed = true;
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
-		    (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
+		    (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 		     (dev->pdev->device & 0x0ff0) != 0x0100 &&
 		     (dev->pdev->device & 0x0ff0) != 0x0150))
 			/* HW is broken */
@@ -984,17 +984,17 @@ get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi)
 		/* Note: these limits are conservative, some Fermi's
 		 * can do 297 MHz. Unclear how this can be determined.
 		 */
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
 			return 297000;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
 			return 225000;
 	}
 	if (dcb->location != DCB_LOC_ON_CHIP ||
-	    drm->device.info.chipset >= 0x46)
+	    drm->client.device.info.chipset >= 0x46)
 		return 165000;
-	else if (drm->device.info.chipset >= 0x40)
+	else if (drm->client.device.info.chipset >= 0x40)
 		return 155000;
-	else if (drm->device.info.chipset >= 0x18)
+	else if (drm->client.device.info.chipset >= 0x18)
 		return 135000;
 	else
 		return 112000;
@@ -1041,7 +1041,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 		clock = clock * (connector->display_info.bpc * 3) / 10;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		return MODE_BAD;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 411c12cdb249..fd64dfdc7d4f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -259,8 +259,9 @@ nouveau_debugfs_init(struct nouveau_drm *drm)
 	if (!drm->debugfs)
 		return -ENOMEM;
 
-	ret = nvif_object_init(&drm->device.object, 0, NVIF_CLASS_CONTROL,
-			       NULL, 0, &drm->debugfs->ctrl);
+	ret = nvif_object_init(&drm->client.device.object, 0,
+			       NVIF_CLASS_CONTROL, NULL, 0,
+			       &drm->debugfs->ctrl);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 6b570079d185..72fdba1a1c5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -414,7 +414,8 @@ nouveau_display_init(struct drm_device *dev)
 		return ret;
 
 	/* enable polling for external displays */
-	drm_kms_helper_poll_enable(dev);
+	if (!dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(dev);
 
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -495,7 +496,7 @@ int
 nouveau_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nouveau_display *disp;
 	int ret;
 
@@ -512,15 +513,15 @@ nouveau_display_create(struct drm_device *dev)
 
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
 		dev->mode_config.max_width = 2048;
 		dev->mode_config.max_height = 2048;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		dev->mode_config.max_width = 4096;
 		dev->mode_config.max_height = 4096;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI) {
 		dev->mode_config.max_width = 8192;
 		dev->mode_config.max_height = 8192;
 	} else {
@@ -531,7 +532,7 @@ nouveau_display_create(struct drm_device *dev)
 	dev->mode_config.preferred_depth = 24;
 	dev->mode_config.prefer_shadow = 1;
 
-	if (drm->device.info.chipset < 0x11)
+	if (drm->client.device.info.chipset < 0x11)
 		dev->mode_config.async_page_flip = false;
 	else
 		dev->mode_config.async_page_flip = true;
@@ -558,7 +559,7 @@ nouveau_display_create(struct drm_device *dev)
 		int i;
 
 		for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) {
-			ret = nvif_object_init(&drm->device.object, 0,
+			ret = nvif_object_init(&drm->client.device.object, 0,
 					       oclass[i], NULL, 0, &disp->disp);
 		}
 
@@ -1057,6 +1058,7 @@ int
 nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 			    struct drm_mode_create_dumb *args)
 {
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_bo *bo;
 	uint32_t domain;
 	int ret;
@@ -1066,12 +1068,12 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 	args->size = roundup(args->size, PAGE_SIZE);
 
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	if (nouveau_drm(dev)->device.info.ram_size != 0)
+	if (nouveau_drm(dev)->client.device.info.ram_size != 0)
 		domain = NOUVEAU_GEM_DOMAIN_VRAM;
 	else
 		domain = NOUVEAU_GEM_DOMAIN_GART;
 
-	ret = nouveau_gem_new(dev, args->size, 0, domain, 0, 0, &bo);
+	ret = nouveau_gem_new(cli, args->size, 0, domain, 0, 0, &bo);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index dd7b52ab505a..468ed1d3bb26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -37,6 +37,8 @@
 #include <core/pci.h>
 #include <core/tegra.h>
 
+#include <nvif/driver.h>
+
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
 #include <nvif/cla06f.h>
@@ -109,35 +111,53 @@ nouveau_name(struct drm_device *dev)
 		return nouveau_platform_name(dev->platformdev);
 }
 
+static void
+nouveau_cli_fini(struct nouveau_cli *cli)
+{
+	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
+	usif_client_fini(cli);
+	nvif_device_fini(&cli->device);
+	nvif_client_fini(&cli->base);
+}
+
 static int
-nouveau_cli_create(struct drm_device *dev, const char *sname,
-		   int size, void **pcli)
+nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
+		 struct nouveau_cli *cli)
 {
-	struct nouveau_cli *cli = *pcli = kzalloc(size, GFP_KERNEL);
+	u64 device = nouveau_name(drm->dev);
 	int ret;
-	if (cli) {
-		snprintf(cli->name, sizeof(cli->name), "%s", sname);
-		cli->dev = dev;
 
-		ret = nvif_client_init(NULL, cli->name, nouveau_name(dev),
-				       nouveau_config, nouveau_debug,
+	snprintf(cli->name, sizeof(cli->name), "%s", sname);
+	cli->dev = drm->dev;
+	mutex_init(&cli->mutex);
+	usif_client_init(cli);
+
+	if (cli == &drm->client) {
+		ret = nvif_driver_init(NULL, nouveau_config, nouveau_debug,
+				       cli->name, device, &cli->base);
+	} else {
+		ret = nvif_client_init(&drm->client.base, cli->name, device,
 				       &cli->base);
-		if (ret == 0) {
-			mutex_init(&cli->mutex);
-			usif_client_init(cli);
-		}
-		return ret;
 	}
-	return -ENOMEM;
-}
+	if (ret) {
+		NV_ERROR(drm, "Client allocation failed: %d\n", ret);
+		goto done;
+	}
 
-static void
-nouveau_cli_destroy(struct nouveau_cli *cli)
-{
-	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
-	nvif_client_fini(&cli->base);
-	usif_client_fini(cli);
-	kfree(cli);
+	ret = nvif_device_init(&cli->base.object, 0, NV_DEVICE,
+			       &(struct nv_device_v0) {
+					.device = ~0,
+			       }, sizeof(struct nv_device_v0),
+			       &cli->device);
+	if (ret) {
+		NV_ERROR(drm, "Device allocation failed: %d\n", ret);
+		goto done;
+	}
+
+done:
+	if (ret)
+		nouveau_cli_fini(cli);
+	return ret;
 }
 
 static void
@@ -161,7 +181,7 @@ nouveau_accel_fini(struct nouveau_drm *drm)
 static void
 nouveau_accel_init(struct nouveau_drm *drm)
 {
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvif_sclass *sclass;
 	u32 arg0, arg1;
 	int ret, i, n;
@@ -215,7 +235,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NVA06F_V0_ENGINE_CE0 |
 					  NVA06F_V0_ENGINE_CE1,
 					  0, &drm->cechan);
@@ -228,7 +248,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	if (device->info.chipset >= 0xa3 &&
 	    device->info.chipset != 0xaa &&
 	    device->info.chipset != 0xac) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NvDmaFB, NvDmaTT, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
@@ -240,7 +260,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		arg1 = NvDmaTT;
 	}
 
-	ret = nouveau_channel_new(drm, &drm->device, arg0, arg1, &drm->channel);
+	ret = nouveau_channel_new(drm, &drm->client.device,
+				  arg0, arg1, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_fini(drm);
@@ -280,8 +301,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
-		ret = nvkm_gpuobj_new(nvxx_device(&drm->device), 32, 0, false,
-				      NULL, &drm->notify);
+		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
+				      false, NULL, &drm->notify);
 		if (ret) {
 			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
 			nouveau_accel_fini(drm);
@@ -407,12 +428,17 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	struct nouveau_drm *drm;
 	int ret;
 
-	ret = nouveau_cli_create(dev, "DRM", sizeof(*drm), (void **)&drm);
+	if (!(drm = kzalloc(sizeof(*drm), GFP_KERNEL)))
+		return -ENOMEM;
+	dev->dev_private = drm;
+	drm->dev = dev;
+
+	ret = nouveau_cli_init(drm, "DRM", &drm->client);
 	if (ret)
 		return ret;
 
-	dev->dev_private = drm;
-	drm->dev = dev;
+	dev->irq_enabled = true;
+
 	nvxx_client(&drm->client.base)->debug =
 		nvkm_dbgopt(nouveau_debug, "DRM");
 
@@ -421,33 +447,24 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 
 	nouveau_get_hdmi_dev(drm);
 
-	ret = nvif_device_init(&drm->client.base.object, 0, NV_DEVICE,
-			       &(struct nv_device_v0) {
-					.device = ~0,
-			       }, sizeof(struct nv_device_v0),
-			       &drm->device);
-	if (ret)
-		goto fail_device;
-
-	dev->irq_enabled = true;
-
 	/* workaround an odd issue on nvc1 by disabling the device's
 	 * nosnoop capability.  hopefully won't cause issues until a
 	 * better fix is found - assuming there is one...
 	 */
-	if (drm->device.info.chipset == 0xc1)
-		nvif_mask(&drm->device.object, 0x00088080, 0x00000800, 0x00000000);
+	if (drm->client.device.info.chipset == 0xc1)
+		nvif_mask(&drm->client.device.object, 0x00088080, 0x00000800, 0x00000000);
 
 	nouveau_vga_init(drm);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		if (!nvxx_device(&drm->device)->mmu) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->client.device)->mmu) {
 			ret = -ENOSYS;
 			goto fail_device;
 		}
 
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &drm->client.vm);
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device),
+				  0, (1ULL << 40), 0x1000, NULL,
+				  &drm->client.vm);
 		if (ret)
 			goto fail_device;
 
@@ -497,8 +514,8 @@ fail_bios:
 fail_ttm:
 	nouveau_vga_fini(drm);
 fail_device:
-	nvif_device_fini(&drm->device);
-	nouveau_cli_destroy(&drm->client);
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 	return ret;
 }
 
@@ -527,10 +544,10 @@ nouveau_drm_unload(struct drm_device *dev)
 	nouveau_ttm_fini(drm);
 	nouveau_vga_fini(drm);
 
-	nvif_device_fini(&drm->device);
 	if (drm->hdmi_device)
 		pci_dev_put(drm->hdmi_device);
-	nouveau_cli_destroy(&drm->client);
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 }
 
 void
@@ -560,7 +577,6 @@ static int
 nouveau_do_suspend(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 	int ret;
 
 	nouveau_led_suspend(dev);
@@ -590,7 +606,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 			goto fail_display;
 	}
 
-	NV_INFO(drm, "suspending client object trees...\n");
+	NV_INFO(drm, "suspending fence...\n");
 	if (drm->fence && nouveau_fence(drm)->suspend) {
 		if (!nouveau_fence(drm)->suspend(drm)) {
 			ret = -ENOMEM;
@@ -598,13 +614,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 		}
 	}
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		ret = nvif_client_suspend(&cli->base);
-		if (ret)
-			goto fail_client;
-	}
-
-	NV_INFO(drm, "suspending kernel object tree...\n");
+	NV_INFO(drm, "suspending object tree...\n");
 	ret = nvif_client_suspend(&drm->client.base);
 	if (ret)
 		goto fail_client;
@@ -612,10 +622,6 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	return 0;
 
 fail_client:
-	list_for_each_entry_continue_reverse(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
@@ -631,19 +637,14 @@ static int
 nouveau_do_resume(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 
-	NV_INFO(drm, "resuming kernel object tree...\n");
+	NV_INFO(drm, "resuming object tree...\n");
 	nvif_client_resume(&drm->client.base);
 
-	NV_INFO(drm, "resuming client object trees...\n");
+	NV_INFO(drm, "resuming fence...\n");
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	nouveau_run_vbios_init(dev);
 
 	if (dev->mode_config.num_crtc) {
@@ -758,7 +759,7 @@ nouveau_pmops_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	struct nvif_device *device = &nouveau_drm(drm_dev)->device;
+	struct nvif_device *device = &nouveau_drm(drm_dev)->client.device;
 	int ret;
 
 	if (nouveau_runtime_pm == 0)
@@ -772,7 +773,10 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	pci_set_master(pdev);
 
 	ret = nouveau_do_resume(drm_dev, true);
-	drm_kms_helper_poll_enable(drm_dev);
+
+	if (!drm_dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(drm_dev);
+
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
@@ -841,20 +845,20 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	get_task_comm(tmpname, current);
 	snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid));
 
-	ret = nouveau_cli_create(dev, name, sizeof(*cli), (void **)&cli);
+	if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL)))
+		return ret;
 
+	ret = nouveau_cli_init(drm, name, cli);
 	if (ret)
-		goto out_suspend;
+		goto done;
 
 	cli->base.super = false;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &cli->vm);
-		if (ret) {
-			nouveau_cli_destroy(cli);
-			goto out_suspend;
-		}
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device), 0,
+				  (1ULL << 40), 0x1000, NULL, &cli->vm);
+		if (ret)
+			goto done;
 
 		nvxx_client(&cli->base)->vm = cli->vm;
 	}
@@ -865,10 +869,14 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	list_add(&cli->head, &drm->clients);
 	mutex_unlock(&drm->client.mutex);
 
-out_suspend:
+done:
+	if (ret && cli) {
+		nouveau_cli_fini(cli);
+		kfree(cli);
+	}
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-
 	return ret;
 }
 
@@ -895,7 +903,8 @@ static void
 nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
-	nouveau_cli_destroy(cli);
+	nouveau_cli_fini(cli);
+	kfree(cli);
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 8d5ed5bfdacb..eadec2f49ad3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -86,14 +86,17 @@ enum nouveau_drm_handle {
 
 struct nouveau_cli {
 	struct nvif_client base;
+	struct drm_device *dev;
+	struct mutex mutex;
+
+	struct nvif_device device;
+
 	struct nvkm_vm *vm; /*XXX*/
 	struct list_head head;
-	struct mutex mutex;
 	void *abi16;
 	struct list_head objects;
 	struct list_head notifys;
 	char name[32];
-	struct drm_device *dev;
 };
 
 static inline struct nouveau_cli *
@@ -111,7 +114,6 @@ struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
 
-	struct nvif_device device;
 	struct list_head clients;
 
 	struct {
@@ -165,6 +167,8 @@ struct nouveau_drm {
 	struct backlight_device *backlight;
 	struct list_head bl_connectors;
 	struct work_struct hpd_work;
+	struct work_struct fbcon_work;
+	int fbcon_new_state;
 #ifdef CONFIG_ACPI
 	struct notifier_block acpi_nb;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 9de6abb65781..442e25c17383 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -60,7 +60,7 @@ nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -92,7 +92,7 @@ nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -124,7 +124,7 @@ nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
 	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -266,10 +266,10 @@ nouveau_fbcon_accel_init(struct drm_device *dev)
 	struct fb_info *info = fbcon->helper.fbdev;
 	int ret;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		ret = nv04_fbcon_accel_init(info);
 	else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
 		ret = nv50_fbcon_accel_init(info);
 	else
 		ret = nvc0_fbcon_accel_init(info);
@@ -324,7 +324,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 		container_of(helper, struct nouveau_fbdev, helper);
 	struct drm_device *dev = fbcon->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct fb_info *info;
 	struct nouveau_framebuffer *fb;
 	struct nouveau_channel *chan;
@@ -341,8 +341,9 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
-	ret = nouveau_gem_new(dev, mode_cmd.pitches[0] * mode_cmd.height,
-			      0, NOUVEAU_GEM_DOMAIN_VRAM, 0, 0x0000, &nvbo);
+	ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] *
+			      mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM,
+			      0, 0x0000, &nvbo);
 	if (ret) {
 		NV_ERROR(drm, "failed to allocate framebuffer\n");
 		goto out;
@@ -472,19 +473,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
 	.fb_probe = nouveau_fbcon_create,
 };
 
+static void
+nouveau_fbcon_set_suspend_work(struct work_struct *work)
+{
+	struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
+	int state = READ_ONCE(drm->fbcon_new_state);
+
+	if (state == FBINFO_STATE_RUNNING)
+		pm_runtime_get_sync(drm->dev->dev);
+
+	console_lock();
+	if (state == FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_restore(drm->dev);
+	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (state != FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_save_disable(drm->dev);
+	console_unlock();
+
+	if (state == FBINFO_STATE_RUNNING) {
+		pm_runtime_mark_last_busy(drm->dev->dev);
+		pm_runtime_put_sync(drm->dev->dev);
+	}
+}
+
 void
 nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon) {
-		console_lock();
-		if (state == FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_restore(dev);
-		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
-		if (state != FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_save_disable(dev);
-		console_unlock();
-	}
+
+	if (!drm->fbcon)
+		return;
+
+	drm->fbcon_new_state = state;
+	/* Since runtime resume can happen as a result of a sysfs operation,
+	 * it's possible we already have the console locked. So handle fbcon
+	 * init/deinit from a seperate work thread
+	 */
+	schedule_work(&drm->fbcon_work);
 }
 
 int
@@ -504,11 +529,11 @@ nouveau_fbcon_init(struct drm_device *dev)
 		return -ENOMEM;
 
 	drm->fbcon = fbcon;
+	INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
 
 	drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, &fbcon->helper,
-				 dev->mode_config.num_crtc, 4);
+	ret = drm_fb_helper_init(dev, &fbcon->helper, 4);
 	if (ret)
 		goto free;
 
@@ -516,10 +541,10 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	if (drm->device.info.ram_size <= 32 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 32 * 1024 * 1024)
 		preferred_bpp = 8;
 	else
-	if (drm->device.info.ram_size <= 64 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 64 * 1024 * 1024)
 		preferred_bpp = 16;
 	else
 		preferred_bpp = 32;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index a6126c93f215..99e14e3e0fe4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -190,7 +190,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 		return;
 
 	ret = nvif_notify_init(&chan->user, nouveau_fence_wait_uevent_handler,
-			       false, G82_CHANNEL_DMA_V0_NTFY_UEVENT,
+			       false, NV826E_V0_NTFY_NON_STALL_INTERRUPT,
 			       &(struct nvif_notify_uevent_req) { },
 			       sizeof(struct nvif_notify_uevent_req),
 			       sizeof(struct nvif_notify_uevent_rep),
@@ -527,7 +527,7 @@ static bool nouveau_fence_no_signaling(struct dma_fence *f)
 	 * caller should have a reference on the fence,
 	 * else fence could get freed here
 	 */
-	WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
+	WARN_ON(kref_read(&fence->base.refcount) <= 1);
 
 	/*
 	 * This needs uevents to work correctly, but dma_fence_add_callback relies on
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index ccdce1b4eec4..d5e58a38f160 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -99,6 +99,7 @@ struct nv84_fence_priv {
 	struct nouveau_bo *bo;
 	struct nouveau_bo *bo_gart;
 	u32 *suspend;
+	struct mutex mutex;
 };
 
 int  nv84_fence_context_new(struct nouveau_channel *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 201b52b750dd..ca5397beb357 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -175,11 +175,11 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 }
 
 int
-nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
+nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		uint32_t tile_mode, uint32_t tile_flags,
 		struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	u32 flags = 0;
 	int ret;
@@ -194,7 +194,7 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	if (domain & NOUVEAU_GEM_DOMAIN_COHERENT)
 		flags |= TTM_PL_FLAG_UNCACHED;
 
-	ret = nouveau_bo_new(dev, size, align, flags, tile_mode,
+	ret = nouveau_bo_new(cli, size, align, flags, tile_mode,
 			     tile_flags, NULL, NULL, pnvbo);
 	if (ret)
 		return ret;
@@ -206,12 +206,12 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	 */
 	nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_VRAM |
 			      NOUVEAU_GEM_DOMAIN_GART;
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		nvbo->valid_domains &= domain;
 
 	/* Initialize the embedded gem-object. We return a single gem-reference
 	 * to the caller, instead of a normal nouveau_bo ttm reference. */
-	ret = drm_gem_object_init(dev, &nvbo->gem, nvbo->bo.mem.size);
+	ret = drm_gem_object_init(drm->dev, &nvbo->gem, nvbo->bo.mem.size);
 	if (ret) {
 		nouveau_bo_ref(NULL, pnvbo);
 		return -ENOMEM;
@@ -257,7 +257,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct drm_nouveau_gem_new *req = data;
 	struct nouveau_bo *nvbo = NULL;
 	int ret = 0;
@@ -267,7 +267,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	ret = nouveau_gem_new(dev, req->info.size, req->align,
+	ret = nouveau_gem_new(cli, req->info.size, req->align,
 			      req->info.domain, req->info.tile_mode,
 			      req->info.tile_flags, &nvbo);
 	if (ret)
@@ -496,7 +496,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 			if (nvbo->bo.offset == b->presumed.offset &&
 			    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
 			      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
@@ -767,7 +767,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 				      push[i].length);
 		}
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_PRINTK(err, cli, "cal_space: %d\n", ret);
@@ -840,7 +840,7 @@ out_next:
 		req->suffix0 = 0x00000000;
 		req->suffix1 = 0x00000000;
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
 	} else {
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h
index 7e32da2e037a..8fa6ed9ddd3a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.h
@@ -16,7 +16,7 @@ nouveau_gem_object(struct drm_gem_object *gem)
 }
 
 /* nouveau_gem.c */
-extern int nouveau_gem_new(struct drm_device *, int size, int align,
+extern int nouveau_gem_new(struct nouveau_cli *, u64 size, int align,
 			   uint32_t domain, uint32_t tile_mode,
 			   uint32_t tile_flags, struct nouveau_bo **);
 extern void nouveau_gem_object_del(struct drm_gem_object *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 71f764bf4cc6..23b1670c1c2f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -43,7 +43,7 @@ nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int temp = nvkm_therm_temp_get(therm);
 
 	if (temp < 0)
@@ -69,7 +69,7 @@ nouveau_hwmon_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	      therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST) * 1000);
@@ -81,7 +81,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -102,7 +102,7 @@ nouveau_hwmon_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	 therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST_HYST) * 1000);
@@ -114,7 +114,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -134,7 +134,7 @@ nouveau_hwmon_max_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK) * 1000);
@@ -145,7 +145,7 @@ nouveau_hwmon_set_max_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -165,7 +165,7 @@ nouveau_hwmon_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK_HYST) * 1000);
@@ -176,7 +176,7 @@ nouveau_hwmon_set_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -197,7 +197,7 @@ nouveau_hwmon_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL) * 1000);
@@ -209,7 +209,7 @@ nouveau_hwmon_set_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -230,7 +230,7 @@ nouveau_hwmon_critical_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL_HYST) * 1000);
@@ -243,7 +243,7 @@ nouveau_hwmon_set_critical_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -263,7 +263,7 @@ nouveau_hwmon_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN) * 1000);
@@ -275,7 +275,7 @@ nouveau_hwmon_set_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -296,7 +296,7 @@ nouveau_hwmon_emergency_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN_HYST) * 1000);
@@ -309,7 +309,7 @@ nouveau_hwmon_set_emergency_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -349,7 +349,7 @@ nouveau_hwmon_show_fan1_input(struct device *d, struct device_attribute *attr,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", nvkm_therm_fan_sense(therm));
 }
@@ -362,7 +362,7 @@ nouveau_hwmon_get_pwm1_enable(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MODE);
@@ -378,7 +378,7 @@ nouveau_hwmon_set_pwm1_enable(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -401,7 +401,7 @@ nouveau_hwmon_get_pwm1(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->fan_get(therm);
@@ -417,7 +417,7 @@ nouveau_hwmon_set_pwm1(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret = -ENODEV;
 	long value;
 
@@ -441,7 +441,7 @@ nouveau_hwmon_get_pwm1_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MIN_DUTY);
@@ -457,7 +457,7 @@ nouveau_hwmon_set_pwm1_min(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -481,7 +481,7 @@ nouveau_hwmon_get_pwm1_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MAX_DUTY);
@@ -497,7 +497,7 @@ nouveau_hwmon_set_pwm1_max(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -521,7 +521,7 @@ nouveau_hwmon_get_in0_input(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 	int ret;
 
 	ret = nvkm_volt_get(volt);
@@ -540,7 +540,7 @@ nouveau_hwmon_get_in0_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->min_uv)
 		return -ENODEV;
@@ -557,7 +557,7 @@ nouveau_hwmon_get_in0_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->max_uv)
 		return -ENODEV;
@@ -584,7 +584,7 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	int result = nvkm_iccsense_read_all(iccsense);
 
 	if (result < 0)
@@ -596,6 +596,32 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO,
 			  nouveau_hwmon_get_power1_input, NULL, 0);
 
+static ssize_t
+nouveau_hwmon_get_power1_max(struct device *d, struct device_attribute *a,
+			     char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_max);
+}
+
+static SENSOR_DEVICE_ATTR(power1_max, S_IRUGO,
+			  nouveau_hwmon_get_power1_max, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_power1_crit(struct device *d, struct device_attribute *a,
+			      char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_crit);
+}
+
+static SENSOR_DEVICE_ATTR(power1_crit, S_IRUGO,
+			  nouveau_hwmon_get_power1_crit, NULL, 0);
+
 static struct attribute *hwmon_default_attributes[] = {
 	&sensor_dev_attr_name.dev_attr.attr,
 	&sensor_dev_attr_update_rate.dev_attr.attr,
@@ -639,6 +665,12 @@ static struct attribute *hwmon_power_attributes[] = {
 	NULL
 };
 
+static struct attribute *hwmon_power_caps_attributes[] = {
+	&sensor_dev_attr_power1_max.dev_attr.attr,
+	&sensor_dev_attr_power1_crit.dev_attr.attr,
+	NULL
+};
+
 static const struct attribute_group hwmon_default_attrgroup = {
 	.attrs = hwmon_default_attributes,
 };
@@ -657,6 +689,9 @@ static const struct attribute_group hwmon_in0_attrgroup = {
 static const struct attribute_group hwmon_power_attrgroup = {
 	.attrs = hwmon_power_attributes,
 };
+static const struct attribute_group hwmon_power_caps_attrgroup = {
+	.attrs = hwmon_power_caps_attributes,
+};
 #endif
 
 int
@@ -664,9 +699,9 @@ nouveau_hwmon_init(struct drm_device *dev)
 {
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	struct nouveau_hwmon *hwmon;
 	struct device *hwmon_dev;
 	int ret = 0;
@@ -728,8 +763,16 @@ nouveau_hwmon_init(struct drm_device *dev)
 	if (iccsense && iccsense->data_valid && !list_empty(&iccsense->rails)) {
 		ret = sysfs_create_group(&hwmon_dev->kobj,
 					 &hwmon_power_attrgroup);
+
 		if (ret)
 			goto error;
+
+		if (iccsense->power_w_max && iccsense->power_w_crit) {
+			ret = sysfs_create_group(&hwmon_dev->kobj,
+						 &hwmon_power_caps_attrgroup);
+			if (ret)
+				goto error;
+		}
 	}
 
 	hwmon->hwmon = hwmon_dev;
@@ -759,6 +802,7 @@ nouveau_hwmon_fini(struct drm_device *dev)
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_fan_rpm_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_in0_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_attrgroup);
+		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_caps_attrgroup);
 
 		hwmon_device_unregister(hwmon->hwmon);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.c b/drivers/gpu/drm/nouveau/nouveau_led.c
index 3e2f1b6cd4df..2c5e0628da12 100644
--- a/drivers/gpu/drm/nouveau/nouveau_led.c
+++ b/drivers/gpu/drm/nouveau/nouveau_led.c
@@ -38,7 +38,7 @@ nouveau_led_get_brightness(struct led_classdev *led)
 {
 	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	u32 div, duty;
 
 	div =  nvif_rd32(device, 0x61c880) & 0x00ffffff;
@@ -55,7 +55,7 @@ nouveau_led_set_brightness(struct led_classdev *led, enum led_brightness value)
 {
 	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
 	struct nouveau_drm *drm = nouveau_drm(drm_dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
 	u32 input_clk = 27e6; /* PDISPLAY.SOR[1].PWM is connected to the crystal */
 	u32 freq = 100; /* this is what nvidia uses and it should be good-enough */
@@ -78,7 +78,7 @@ int
 nouveau_led_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct dcb_gpio_func logo_led;
 	int ret;
 
@@ -102,6 +102,7 @@ nouveau_led_init(struct drm_device *dev)
 	ret = led_classdev_register(dev->dev, &drm->led->led);
 	if (ret) {
 		kfree(drm->led);
+		drm->led = NULL;
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.h b/drivers/gpu/drm/nouveau/nouveau_led.h
index 187ecdb82002..21a5775028cc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_led.h
+++ b/drivers/gpu/drm/nouveau/nouveau_led.h
@@ -42,7 +42,7 @@ nouveau_led(struct drm_device *dev)
 }
 
 /* nouveau_led.c */
-#if IS_ENABLED(CONFIG_LEDS_CLASS)
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
 int  nouveau_led_init(struct drm_device *dev);
 void nouveau_led_suspend(struct drm_device *dev);
 void nouveau_led_resume(struct drm_device *dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index 15f0925ea13b..b3f29b1ce9ea 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -60,20 +60,15 @@ nvkm_client_ioctl(void *priv, bool super, void *data, u32 size, void **hack)
 static int
 nvkm_client_resume(void *priv)
 {
-	return nvkm_client_init(priv);
+	struct nvkm_client *client = priv;
+	return nvkm_object_init(&client->object);
 }
 
 static int
 nvkm_client_suspend(void *priv)
 {
-	return nvkm_client_fini(priv, true);
-}
-
-static void
-nvkm_client_driver_fini(void *priv)
-{
 	struct nvkm_client *client = priv;
-	nvkm_client_del(&client);
+	return nvkm_object_fini(&client->object, true);
 }
 
 static int
@@ -108,23 +103,14 @@ static int
 nvkm_client_driver_init(const char *name, u64 device, const char *cfg,
 			const char *dbg, void **ppriv)
 {
-	struct nvkm_client *client;
-	int ret;
-
-	ret = nvkm_client_new(name, device, cfg, dbg, &client);
-	*ppriv = client;
-	if (ret)
-		return ret;
-
-	client->ntfy = nvkm_client_ntfy;
-	return 0;
+	return nvkm_client_new(name, device, cfg, dbg, nvkm_client_ntfy,
+			       (struct nvkm_client **)ppriv);
 }
 
 const struct nvif_driver
 nvif_driver_nvkm = {
 	.name = "nvkm",
 	.init = nvkm_client_driver_init,
-	.fini = nvkm_client_driver_fini,
 	.suspend = nvkm_client_suspend,
 	.resume = nvkm_client_resume,
 	.ioctl = nvkm_client_ioctl,
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index a0a9704cfe2b..1fefc93af1d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -60,6 +60,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 							 struct dma_buf_attachment *attach,
 							 struct sg_table *sg)
 {
+	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_bo *nvbo;
 	struct reservation_object *robj = attach->dmabuf->resv;
 	u32 flags = 0;
@@ -68,7 +69,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 	flags = TTM_PL_FLAG_TT;
 
 	ww_mutex_lock(&robj->lock, NULL);
-	ret = nouveau_bo_new(dev, attach->dmabuf->size, 0, flags, 0, 0,
+	ret = nouveau_bo_new(&drm->client, attach->dmabuf->size, 0, flags, 0, 0,
 			     sg, robj, &nvbo);
 	ww_mutex_unlock(&robj->lock);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index db35ab5883ac..b7ab268f7d6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -24,10 +24,10 @@ nouveau_sgdma_destroy(struct ttm_tt *ttm)
 }
 
 static int
-nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	if (ttm->sg) {
 		node->sg    = ttm->sg;
@@ -36,7 +36,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 
 	nvkm_vm_map(&node->vma[0], node);
 	nvbe->node = node;
@@ -58,10 +58,10 @@ static struct ttm_backend_func nv04_sgdma_backend = {
 };
 
 static int
-nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	/* noop: bound in move_notify() */
 	if (ttm->sg) {
@@ -71,7 +71,7 @@ nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 	return 0;
 }
 
@@ -100,7 +100,7 @@ nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
 	if (!nvbe)
 		return NULL;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		nvbe->ttm.ttm.func = &nv04_sgdma_backend;
 	else
 		nvbe->ttm.ttm.func = &nv50_sgdma_backend;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index ec4668a41e01..13e5cc5f07fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -36,7 +36,7 @@ static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	man->priv = fb;
 	return 0;
 }
@@ -64,45 +64,45 @@ nvkm_mem_node_cleanup(struct nvkm_mem *node)
 
 static void
 nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
-	nvkm_mem_node_cleanup(mem->mm_node);
-	ram->func->put(ram, (struct nvkm_mem **)&mem->mm_node);
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	ram->func->put(ram, (struct nvkm_mem **)&reg->mm_node);
 }
 
 static int
 nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_mem *node;
 	u32 size_nc = 0;
 	int ret;
 
-	if (drm->device.info.ram_size == 0)
+	if (drm->client.device.info.ram_size == 0)
 		return -ENOMEM;
 
 	if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG)
 		size_nc = 1 << nvbo->page_shift;
 
-	ret = ram->func->get(ram, mem->num_pages << PAGE_SHIFT,
-			     mem->page_alignment << PAGE_SHIFT, size_nc,
+	ret = ram->func->get(ram, reg->num_pages << PAGE_SHIFT,
+			     reg->page_alignment << PAGE_SHIFT, size_nc,
 			     (nvbo->tile_flags >> 8) & 0x3ff, &node);
 	if (ret) {
-		mem->mm_node = NULL;
+		reg->mm_node = NULL;
 		return (ret == -ENOSPC) ? 0 : ret;
 	}
 
 	node->page_shift = nvbo->page_shift;
 
-	mem->mm_node = node;
-	mem->start   = node->offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->offset >> PAGE_SHIFT;
 	return 0;
 }
 
@@ -127,18 +127,18 @@ nouveau_gart_manager_fini(struct ttm_mem_type_manager *man)
 
 static void
 nouveau_gart_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
-	nvkm_mem_node_cleanup(mem->mm_node);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -150,7 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 	case NV_DEVICE_INFO_V0_CELSIUS:
 	case NV_DEVICE_INFO_V0_KELVIN:
@@ -158,7 +158,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_CURIE:
 		break;
 	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
+		if (drm->client.device.info.chipset != 0x50)
 			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
 		break;
 	case NV_DEVICE_INFO_V0_FERMI:
@@ -169,12 +169,12 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 		break;
 	default:
 		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
+			drm->client.device.info.family);
 		break;
 	}
 
-	mem->mm_node = node;
-	mem->start   = 0;
+	reg->mm_node = node;
+	reg->start   = 0;
 	return 0;
 }
 
@@ -197,7 +197,7 @@ static int
 nv04_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_mmu *mmu = nvxx_mmu(&drm->device);
+	struct nvkm_mmu *mmu = nvxx_mmu(&drm->client.device);
 	struct nv04_mmu *priv = (void *)mmu;
 	struct nvkm_vm *vm = NULL;
 	nvkm_vm_ref(priv->vm, &vm, NULL);
@@ -215,20 +215,20 @@ nv04_gart_manager_fini(struct ttm_mem_type_manager *man)
 }
 
 static void
-nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem)
+nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 	if (node->vma[0].node)
 		nvkm_vm_put(&node->vma[0]);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 		      struct ttm_buffer_object *bo,
 		      const struct ttm_place *place,
-		      struct ttm_mem_reg *mem)
+		      struct ttm_mem_reg *reg)
 {
 	struct nvkm_mem *node;
 	int ret;
@@ -239,15 +239,15 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	ret = nvkm_vm_get(man->priv, mem->num_pages << 12, node->page_shift,
+	ret = nvkm_vm_get(man->priv, reg->num_pages << 12, node->page_shift,
 			  NV_MEM_ACCESS_RW, &node->vma[0]);
 	if (ret) {
 		kfree(node);
 		return ret;
 	}
 
-	mem->mm_node = node;
-	mem->start   = node->vma[0].offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->vma[0].offset >> PAGE_SHIFT;
 	return 0;
 }
 
@@ -339,7 +339,7 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
 int
 nouveau_ttm_init(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_pci *pci = device->pci;
 	struct drm_device *dev = drm->dev;
 	u8 bits;
@@ -352,8 +352,8 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 		drm->agp.cma = pci->agp.cma;
 	}
 
-	bits = nvxx_mmu(&drm->device)->dma_bits;
-	if (nvxx_device(&drm->device)->func->pci) {
+	bits = nvxx_mmu(&drm->client.device)->dma_bits;
+	if (nvxx_device(&drm->client.device)->func->pci) {
 		if (drm->agp.bridge)
 			bits = 32;
 	} else if (device->func->tegra) {
@@ -396,7 +396,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	}
 
 	/* VRAM init */
-	drm->gem.vram_available = drm->device.info.ram_user;
+	drm->gem.vram_available = drm->client.device.info.ram_user;
 
 	arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
 				   device->func->resource_size(device, 1));
@@ -413,7 +413,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 
 	/* GART init */
 	if (!drm->agp.bridge) {
-		drm->gem.gart_available = nvxx_mmu(&drm->device)->limit;
+		drm->gem.gart_available = nvxx_mmu(&drm->client.device)->limit;
 	} else {
 		drm->gem.gart_available = drm->agp.size;
 	}
@@ -433,7 +433,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 void
 nouveau_ttm_fini(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 08f9c6fa0f7f..afbdbed1a690 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -103,7 +103,7 @@ usif_notify(const void *header, u32 length, const void *data, u32 size)
 	}
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -313,7 +313,8 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	if (!(ret = nvif_unpack(-ENOSYS, &data, &size, argv->v0, 0, 0, true))) {
 		/* block access to objects not created via this interface */
 		owner = argv->v0.owner;
-		if (argv->v0.object == 0ULL)
+		if (argv->v0.object == 0ULL &&
+		    argv->v0.type != NVIF_IOCTL_V0_DEL)
 			argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
 		else
 			argv->v0.owner = NVDRM_OBJECT_USIF;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index c6a180a0c284..eef22c6b9665 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -13,13 +13,13 @@ static unsigned int
 nouveau_vga_set_decode(void *priv, bool state)
 {
 	struct nouveau_drm *drm = nouveau_drm(priv);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE &&
-	    drm->device.info.chipset >= 0x4c)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE &&
+	    drm->client.device.info.chipset >= 0x4c)
 		nvif_wr32(device, 0x088060, state);
 	else
-	if (drm->device.info.chipset >= 0x40)
+	if (drm->client.device.info.chipset >= 0x40)
 		nvif_wr32(device, 0x088054, state);
 	else
 		nvif_wr32(device, 0x001854, state);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 6a2b187e3c3b..01731dbeb3d8 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -136,7 +136,7 @@ nv04_fbcon_accel_init(struct fb_info *info)
 	struct drm_device *dev = nfbdev->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int surface_fmt, pattern_fmt, rect_fmt;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 79bc01111351..6477b7069e14 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -76,9 +76,9 @@ nv17_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
 	int ret = 0;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -129,7 +129,7 @@ nv17_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 452da483ca01..0b4440ffbeae 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -447,18 +447,18 @@ nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb)
 	args.base.target = NV_DMA_V0_TARGET_VRAM;
 	args.base.access = NV_DMA_V0_ACCESS_RDWR;
 	args.base.start  = 0;
-	args.base.limit  = drm->device.info.ram_user - 1;
+	args.base.limit  = drm->client.device.info.ram_user - 1;
 
-	if (drm->device.info.chipset < 0x80) {
+	if (drm->client.device.info.chipset < 0x80) {
 		args.nv50.part = NV50_DMA_V0_PART_256;
 		argc += sizeof(args.nv50);
 	} else
-	if (drm->device.info.chipset < 0xc0) {
+	if (drm->client.device.info.chipset < 0xc0) {
 		args.nv50.part = NV50_DMA_V0_PART_256;
 		args.nv50.kind = kind;
 		argc += sizeof(args.nv50);
 	} else
-	if (drm->device.info.chipset < 0xd0) {
+	if (drm->client.device.info.chipset < 0xd0) {
 		args.gf100.kind = kind;
 		argc += sizeof(args.gf100);
 	} else {
@@ -848,7 +848,7 @@ nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
 	asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
 	if (asyw->image.kind) {
 		asyw->image.layout = 0;
-		if (drm->device.info.chipset >= 0xc0)
+		if (drm->client.device.info.chipset >= 0xc0)
 			asyw->image.block = fb->nvbo->tile_mode >> 4;
 		else
 			asyw->image.block = fb->nvbo->tile_mode;
@@ -1397,7 +1397,7 @@ nv50_base_ntfy_wait_begun(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
 	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
-	if (nvif_msec(&drm->device, 2000ULL,
+	if (nvif_msec(&drm->client.device, 2000ULL,
 		u32 data = nouveau_bo_rd32(disp->sync, asyw->ntfy.offset / 4);
 		if ((data & 0xc0000000) == 0x40000000)
 			break;
@@ -1522,7 +1522,7 @@ nv50_base_new(struct nouveau_drm *drm, struct nv50_head *head,
 		return ret;
 	}
 
-	ret = nv50_base_create(&drm->device, disp->disp, base->id,
+	ret = nv50_base_create(&drm->client.device, disp->disp, base->id,
 			       disp->sync->bo.offset, &base->chan);
 	if (ret)
 		return ret;
@@ -2394,7 +2394,7 @@ static int
 nv50_head_create(struct drm_device *dev, int index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nv50_disp *disp = nv50_disp(dev);
 	struct nv50_head *head;
 	struct nv50_base *base;
@@ -2428,7 +2428,7 @@ nv50_head_create(struct drm_device *dev, int index)
 	drm_crtc_helper_add(crtc, &nv50_head_help);
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
-	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 8192, 0x100, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &head->base.lut.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(head->base.lut.nvbo, TTM_PL_FLAG_VRAM, true);
@@ -2667,7 +2667,7 @@ static int
 nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus;
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
@@ -3623,7 +3623,7 @@ nv50_sor_enable(struct drm_encoder *encoder)
 		nv50_audio_enable(encoder, mode);
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -3657,7 +3657,7 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 	int type, ret;
@@ -3796,7 +3796,7 @@ nv50_pior_enable(struct drm_encoder *encoder)
 		proto = 0x0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -3842,7 +3842,7 @@ static int
 nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = NULL;
 	struct nvkm_i2c_aux *aux = NULL;
 	struct i2c_adapter *ddc;
@@ -3915,7 +3915,7 @@ nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 interlock)
 		evo_data(push, 0x00000000);
 		nouveau_bo_wr32(disp->sync, 0, 0x00000000);
 		evo_kick(push, core);
-		if (nvif_msec(&drm->device, 2000ULL,
+		if (nvif_msec(&drm->client.device, 2000ULL,
 			if (nouveau_bo_rd32(disp->sync, 0))
 				break;
 			usleep_range(1, 2);
@@ -4050,6 +4050,11 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 		}
 	}
 
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc->state->event)
+			drm_crtc_vblank_get(crtc);
+	}
+
 	/* Update plane(s). */
 	for_each_plane_in_state(state, plane, plane_state, i) {
 		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
@@ -4099,6 +4104,7 @@ nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 			drm_crtc_send_vblank_event(crtc, crtc->state->event);
 			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 			crtc->state->event = NULL;
+			drm_crtc_vblank_put(crtc);
 		}
 	}
 
@@ -4427,7 +4433,7 @@ module_param_named(atomic, nouveau_atomic, int, 0400);
 int
 nv50_display_create(struct drm_device *dev)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *tmp;
@@ -4451,7 +4457,7 @@ nv50_display_create(struct drm_device *dev)
 		dev->driver->driver_features |= DRIVER_ATOMIC;
 
 	/* small shared memory area we use for notifiers and semaphores */
-	ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &disp->sync);
 	if (!ret) {
 		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true);
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index f68c7054fd53..a369d978e267 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -37,9 +37,9 @@ nv50_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
 	int ret;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -82,7 +82,7 @@ nv50_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 52b87ae83e7b..bd7a8a1e4ad9 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -107,8 +107,10 @@ nv84_fence_context_del(struct nouveau_channel *chan)
 	struct nv84_fence_chan *fctx = chan->fence;
 
 	nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence);
+	mutex_lock(&priv->mutex);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma_gart);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	mutex_unlock(&priv->mutex);
 	nouveau_fence_context_del(&fctx->base);
 	chan->fence = NULL;
 	nouveau_fence_context_free(&fctx->base);
@@ -134,11 +136,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	fctx->base.sync32 = nv84_fence_sync32;
 	fctx->base.sequence = nv84_fence_read(chan);
 
+	mutex_lock(&priv->mutex);
 	ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
 	if (ret == 0) {
 		ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm,
 					&fctx->vma_gart);
 	}
+	mutex_unlock(&priv->mutex);
 
 	if (ret)
 		nv84_fence_context_del(chan);
@@ -193,7 +197,7 @@ nv84_fence_destroy(struct nouveau_drm *drm)
 int
 nv84_fence_create(struct nouveau_drm *drm)
 {
-	struct nvkm_fifo *fifo = nvxx_fifo(&drm->device);
+	struct nvkm_fifo *fifo = nvxx_fifo(&drm->client.device);
 	struct nv84_fence_priv *priv;
 	u32 domain;
 	int ret;
@@ -212,15 +216,17 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
+	mutex_init(&priv->mutex);
+
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	domain = drm->device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
+	domain = drm->client.device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
 			 /*
 			  * fences created in sysmem must be non-cached or we
 			  * will lose CPU/GPU coherency!
 			  */
 			 TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
-	ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0, domain, 0,
-			     0, NULL, NULL, &priv->bo);
+	ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
+			     domain, 0, 0, NULL, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, domain, false);
 		if (ret == 0) {
@@ -233,7 +239,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	}
 
 	if (ret == 0)
-		ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
+		ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
 				     TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED, 0,
 				     0, NULL, NULL, &priv->bo_gart);
 	if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild
index ff8ed3a04d06..067b5e9f5ec1 100644
--- a/drivers/gpu/drm/nouveau/nvif/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvif/Kbuild
@@ -1,4 +1,5 @@
 nvif-y := nvif/object.o
 nvif-y += nvif/client.o
 nvif-y += nvif/device.o
+nvif-y += nvif/driver.o
 nvif-y += nvif/notify.o
diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c
index 29c20dfd894d..12db54965c20 100644
--- a/drivers/gpu/drm/nouveau/nvif/client.c
+++ b/drivers/gpu/drm/nouveau/nvif/client.c
@@ -26,6 +26,9 @@
 #include <nvif/driver.h>
 #include <nvif/ioctl.h>
 
+#include <nvif/class.h>
+#include <nvif/if0000.h>
+
 int
 nvif_client_ioctl(struct nvif_client *client, void *data, u32 size)
 {
@@ -47,37 +50,29 @@ nvif_client_resume(struct nvif_client *client)
 void
 nvif_client_fini(struct nvif_client *client)
 {
+	nvif_object_fini(&client->object);
 	if (client->driver) {
-		client->driver->fini(client->object.priv);
+		if (client->driver->fini)
+			client->driver->fini(client->object.priv);
 		client->driver = NULL;
-		client->object.client = NULL;
-		nvif_object_fini(&client->object);
 	}
 }
 
-static const struct nvif_driver *
-nvif_drivers[] = {
-#ifdef __KERNEL__
-	&nvif_driver_nvkm,
-#else
-	&nvif_driver_drm,
-	&nvif_driver_lib,
-	&nvif_driver_null,
-#endif
-	NULL
-};
-
 int
-nvif_client_init(const char *driver, const char *name, u64 device,
-		 const char *cfg, const char *dbg, struct nvif_client *client)
+nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
+		 struct nvif_client *client)
 {
+	struct nvif_client_v0 args = { .device = device };
 	struct {
 		struct nvif_ioctl_v0 ioctl;
 		struct nvif_ioctl_nop_v0 nop;
-	} args = {};
-	int ret, i;
+	} nop = {};
+	int ret;
 
-	ret = nvif_object_init(NULL, 0, 0, NULL, 0, &client->object);
+	strncpy(args.name, name, sizeof(args.name));
+	ret = nvif_object_init(parent != client ? &parent->object : NULL,
+			       0, NVIF_CLASS_CLIENT, &args, sizeof(args),
+			       &client->object);
 	if (ret)
 		return ret;
 
@@ -85,19 +80,11 @@ nvif_client_init(const char *driver, const char *name, u64 device,
 	client->object.handle = ~0;
 	client->route = NVIF_IOCTL_V0_ROUTE_NVIF;
 	client->super = true;
-
-	for (i = 0, ret = -EINVAL; (client->driver = nvif_drivers[i]); i++) {
-		if (!driver || !strcmp(client->driver->name, driver)) {
-			ret = client->driver->init(name, device, cfg, dbg,
-						  &client->object.priv);
-			if (!ret || driver)
-				break;
-		}
-	}
+	client->driver = parent->driver;
 
 	if (ret == 0) {
-		ret = nvif_client_ioctl(client, &args, sizeof(args));
-		client->version = args.nop.version;
+		ret = nvif_client_ioctl(client, &nop, sizeof(nop));
+		client->version = nop.nop.version;
 	}
 
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvif/driver.c b/drivers/gpu/drm/nouveau/nvif/driver.c
new file mode 100644
index 000000000000..701330956e33
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/driver.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include <nvif/driver.h>
+#include <nvif/client.h>
+
+static const struct nvif_driver *
+nvif_driver[] = {
+#ifdef __KERNEL__
+	&nvif_driver_nvkm,
+#else
+	&nvif_driver_drm,
+	&nvif_driver_lib,
+	&nvif_driver_null,
+#endif
+	NULL
+};
+
+int
+nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		 const char *name, u64 device, struct nvif_client *client)
+{
+	int ret = -EINVAL, i;
+
+	for (i = 0; (client->driver = nvif_driver[i]); i++) {
+		if (!drv || !strcmp(client->driver->name, drv)) {
+			ret = client->driver->init(name, device, cfg, dbg,
+						   &client->object.priv);
+			if (ret == 0)
+				break;
+			client->driver->fini(client->object.priv);
+		}
+	}
+
+	if (ret == 0)
+		ret = nvif_client_init(client, name, device, client);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/Kbuild
index 2832147b676c..e664378f6eda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/Kbuild
@@ -1,3 +1,4 @@
 include $(src)/nvkm/core/Kbuild
+include $(src)/nvkm/falcon/Kbuild
 include $(src)/nvkm/subdev/Kbuild
 include $(src)/nvkm/engine/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index e1943910858e..0d3a896892b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -31,6 +31,43 @@
 #include <nvif/if0000.h>
 #include <nvif/unpack.h>
 
+static int
+nvkm_uclient_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		 struct nvkm_object **pobject)
+{
+	union {
+		struct nvif_client_v0 v0;
+	} *args = argv;
+	struct nvkm_client *client;
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))){
+		args->v0.name[sizeof(args->v0.name) - 1] = 0;
+		ret = nvkm_client_new(args->v0.name, args->v0.device, NULL,
+				      NULL, oclass->client->ntfy, &client);
+		if (ret)
+			return ret;
+	} else
+		return ret;
+
+	client->object.client = oclass->client;
+	client->object.handle = oclass->handle;
+	client->object.route  = oclass->route;
+	client->object.token  = oclass->token;
+	client->object.object = oclass->object;
+	client->debug = oclass->client->debug;
+	*pobject = &client->object;
+	return 0;
+}
+
+const struct nvkm_sclass
+nvkm_uclient_sclass = {
+	.oclass = NVIF_CLASS_CLIENT,
+	.minver = 0,
+	.maxver = 0,
+	.ctor = nvkm_uclient_new,
+};
+
 struct nvkm_client_notify {
 	struct nvkm_client *client;
 	struct nvkm_notify n;
@@ -138,17 +175,30 @@ nvkm_client_notify_new(struct nvkm_object *object,
 	return ret;
 }
 
+static const struct nvkm_object_func nvkm_client;
+struct nvkm_client *
+nvkm_client_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_client);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_client(object);
+}
+
 static int
-nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
+nvkm_client_mthd_devlist(struct nvkm_client *client, void *data, u32 size)
 {
 	union {
-		struct nv_client_devlist_v0 v0;
+		struct nvif_client_devlist_v0 v0;
 	} *args = data;
 	int ret = -ENOSYS;
 
-	nvif_ioctl(object, "client devlist size %d\n", size);
+	nvif_ioctl(&client->object, "client devlist size %d\n", size);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "client devlist vers %d count %d\n",
+		nvif_ioctl(&client->object, "client devlist vers %d count %d\n",
 			   args->v0.version, args->v0.count);
 		if (size == sizeof(args->v0.device[0]) * args->v0.count) {
 			ret = nvkm_device_list(args->v0.device, args->v0.count);
@@ -167,9 +217,10 @@ nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
 static int
 nvkm_client_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
+	struct nvkm_client *client = nvkm_client(object);
 	switch (mthd) {
-	case NV_CLIENT_DEVLIST:
-		return nvkm_client_mthd_devlist(object, data, size);
+	case NVIF_CLIENT_V0_DEVLIST:
+		return nvkm_client_mthd_devlist(client, data, size);
 	default:
 		break;
 	}
@@ -190,7 +241,8 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	const struct nvkm_sclass *sclass;
 
 	switch (index) {
-	case 0: sclass = &nvkm_udevice_sclass; break;
+	case 0: sclass = &nvkm_uclient_sclass; break;
+	case 1: sclass = &nvkm_udevice_sclass; break;
 	default:
 		return -EINVAL;
 	}
@@ -200,110 +252,54 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	return 0;
 }
 
-static const struct nvkm_object_func
-nvkm_client_object_func = {
-	.mthd = nvkm_client_mthd,
-	.sclass = nvkm_client_child_get,
-};
-
-void
-nvkm_client_remove(struct nvkm_client *client, struct nvkm_object *object)
-{
-	if (!RB_EMPTY_NODE(&object->node))
-		rb_erase(&object->node, &client->objroot);
-}
-
-bool
-nvkm_client_insert(struct nvkm_client *client, struct nvkm_object *object)
-{
-	struct rb_node **ptr = &client->objroot.rb_node;
-	struct rb_node *parent = NULL;
-
-	while (*ptr) {
-		struct nvkm_object *this =
-			container_of(*ptr, typeof(*this), node);
-		parent = *ptr;
-		if (object->object < this->object)
-			ptr = &parent->rb_left;
-		else
-		if (object->object > this->object)
-			ptr = &parent->rb_right;
-		else
-			return false;
-	}
-
-	rb_link_node(&object->node, parent, ptr);
-	rb_insert_color(&object->node, &client->objroot);
-	return true;
-}
-
-struct nvkm_object *
-nvkm_client_search(struct nvkm_client *client, u64 handle)
-{
-	struct rb_node *node = client->objroot.rb_node;
-	while (node) {
-		struct nvkm_object *object =
-			container_of(node, typeof(*object), node);
-		if (handle < object->object)
-			node = node->rb_left;
-		else
-		if (handle > object->object)
-			node = node->rb_right;
-		else
-			return object;
-	}
-	return NULL;
-}
-
-int
-nvkm_client_fini(struct nvkm_client *client, bool suspend)
+static int
+nvkm_client_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nvkm_object *object = &client->object;
+	struct nvkm_client *client = nvkm_client(object);
 	const char *name[2] = { "fini", "suspend" };
 	int i;
 	nvif_debug(object, "%s notify\n", name[suspend]);
 	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
 		nvkm_client_notify_put(client, i);
-	return nvkm_object_fini(&client->object, suspend);
-}
-
-int
-nvkm_client_init(struct nvkm_client *client)
-{
-	return nvkm_object_init(&client->object);
+	return 0;
 }
 
-void
-nvkm_client_del(struct nvkm_client **pclient)
+static void *
+nvkm_client_dtor(struct nvkm_object *object)
 {
-	struct nvkm_client *client = *pclient;
+	struct nvkm_client *client = nvkm_client(object);
 	int i;
-	if (client) {
-		nvkm_client_fini(client, false);
-		for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-			nvkm_client_notify_del(client, i);
-		nvkm_object_dtor(&client->object);
-		kfree(*pclient);
-		*pclient = NULL;
-	}
+	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
+		nvkm_client_notify_del(client, i);
+	return client;
 }
 
+static const struct nvkm_object_func
+nvkm_client = {
+	.dtor = nvkm_client_dtor,
+	.fini = nvkm_client_fini,
+	.mthd = nvkm_client_mthd,
+	.sclass = nvkm_client_child_get,
+};
+
 int
 nvkm_client_new(const char *name, u64 device, const char *cfg,
-		const char *dbg, struct nvkm_client **pclient)
+		const char *dbg,
+		int (*ntfy)(const void *, u32, const void *, u32),
+		struct nvkm_client **pclient)
 {
-	struct nvkm_oclass oclass = {};
+	struct nvkm_oclass oclass = { .base = nvkm_uclient_sclass };
 	struct nvkm_client *client;
 
 	if (!(client = *pclient = kzalloc(sizeof(*client), GFP_KERNEL)))
 		return -ENOMEM;
 	oclass.client = client;
 
-	nvkm_object_ctor(&nvkm_client_object_func, &oclass, &client->object);
+	nvkm_object_ctor(&nvkm_client, &oclass, &client->object);
 	snprintf(client->name, sizeof(client->name), "%s", name);
 	client->device = device;
 	client->debug = nvkm_dbgopt(dbg, "CLIENT");
 	client->objroot = RB_ROOT;
-	client->dmaroot = RB_ROOT;
+	client->ntfy = ntfy;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index ee8e5831fe37..b6c916954a10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -27,6 +27,14 @@
 
 #include <subdev/fb.h>
 
+bool
+nvkm_engine_chsw_load(struct nvkm_engine *engine)
+{
+	if (engine->func->chsw_load)
+		return engine->func->chsw_load(engine);
+	return false;
+}
+
 void
 nvkm_engine_unref(struct nvkm_engine **pengine)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index b0db51847c36..be19bbe56bba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -29,7 +29,8 @@
 #include <nvif/ioctl.h>
 
 static int
-nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_nop(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_nop_v0 v0;
@@ -46,7 +47,8 @@ nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_sclass(struct nvkm_client *client,
+		  struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_sclass_v0 v0;
@@ -78,12 +80,12 @@ nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
+nvkm_ioctl_new(struct nvkm_client *client,
+	       struct nvkm_object *parent, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_new_v0 v0;
 	} *args = data;
-	struct nvkm_client *client = parent->client;
 	struct nvkm_object *object = NULL;
 	struct nvkm_oclass oclass;
 	int ret = -ENOSYS, i = 0;
@@ -104,9 +106,11 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 
 	do {
 		memset(&oclass, 0x00, sizeof(oclass));
-		oclass.client = client;
 		oclass.handle = args->v0.handle;
+		oclass.route  = args->v0.route;
+		oclass.token  = args->v0.token;
 		oclass.object = args->v0.object;
+		oclass.client = client;
 		oclass.parent = parent;
 		ret = parent->func->sclass(parent, i++, &oclass);
 		if (ret)
@@ -125,10 +129,7 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 		ret = nvkm_object_init(object);
 		if (ret == 0) {
 			list_add(&object->head, &parent->tree);
-			object->route = args->v0.route;
-			object->token = args->v0.token;
-			object->object = args->v0.object;
-			if (nvkm_client_insert(client, object)) {
+			if (nvkm_object_insert(object)) {
 				client->data = object;
 				return 0;
 			}
@@ -142,7 +143,8 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_del(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_del none;
@@ -156,11 +158,12 @@ nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
 		nvkm_object_del(&object);
 	}
 
-	return ret;
+	return ret ? ret : 1;
 }
 
 static int
-nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_mthd(struct nvkm_client *client,
+		struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_mthd_v0 v0;
@@ -179,7 +182,8 @@ nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
 
 
 static int
-nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_rd(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_rd_v0 v0;
@@ -218,7 +222,8 @@ nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_wr(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_wr_v0 v0;
@@ -246,7 +251,8 @@ nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_map(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_map_v0 v0;
@@ -264,7 +270,8 @@ nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_unmap(struct nvkm_client *client,
+		 struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_unmap none;
@@ -280,7 +287,8 @@ nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_new(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_ntfy_new_v0 v0;
@@ -306,9 +314,9 @@ nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_del(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_del_v0 v0;
 	} *args = data;
@@ -325,9 +333,9 @@ nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_get(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_get_v0 v0;
 	} *args = data;
@@ -344,9 +352,9 @@ nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_put(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_put_v0 v0;
 	} *args = data;
@@ -364,7 +372,7 @@ nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
 
 static struct {
 	int version;
-	int (*func)(struct nvkm_object *, void *, u32);
+	int (*func)(struct nvkm_client *, struct nvkm_object *, void *, u32);
 }
 nvkm_ioctl_v0[] = {
 	{ 0x00, nvkm_ioctl_nop },
@@ -389,13 +397,10 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 	struct nvkm_object *object;
 	int ret;
 
-	if (handle)
-		object = nvkm_client_search(client, handle);
-	else
-		object = &client->object;
-	if (unlikely(!object)) {
+	object = nvkm_object_search(client, handle, NULL);
+	if (IS_ERR(object)) {
 		nvif_ioctl(&client->object, "object not found\n");
-		return -ENOENT;
+		return PTR_ERR(object);
 	}
 
 	if (owner != NVIF_IOCTL_V0_OWNER_ANY && owner != object->route) {
@@ -407,7 +412,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 
 	if (ret = -EINVAL, type < ARRAY_SIZE(nvkm_ioctl_v0)) {
 		if (nvkm_ioctl_v0[type].version == 0)
-			ret = nvkm_ioctl_v0[type].func(object, data, size);
+			ret = nvkm_ioctl_v0[type].func(client, object, data, size);
 	}
 
 	return ret;
@@ -436,12 +441,13 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor,
 				      &args->v0.route, &args->v0.token);
 	}
 
-	nvif_ioctl(object, "return %d\n", ret);
-	if (hack) {
-		*hack = client->data;
-		client->data = NULL;
+	if (ret != 1) {
+		nvif_ioctl(object, "return %d\n", ret);
+		if (hack) {
+			*hack = client->data;
+			client->data = NULL;
+		}
 	}
 
-	client->super = false;
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
index 09a1eee8fd33..fd19d652a7ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
@@ -147,6 +147,7 @@ nvkm_mm_head(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
@@ -225,6 +226,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 67aa7223dcd7..89d2e9da11c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -25,6 +25,65 @@
 #include <core/client.h>
 #include <core/engine.h>
 
+struct nvkm_object *
+nvkm_object_search(struct nvkm_client *client, u64 handle,
+		   const struct nvkm_object_func *func)
+{
+	struct nvkm_object *object;
+
+	if (handle) {
+		struct rb_node *node = client->objroot.rb_node;
+		while (node) {
+			object = rb_entry(node, typeof(*object), node);
+			if (handle < object->object)
+				node = node->rb_left;
+			else
+			if (handle > object->object)
+				node = node->rb_right;
+			else
+				goto done;
+		}
+		return ERR_PTR(-ENOENT);
+	} else {
+		object = &client->object;
+	}
+
+done:
+	if (unlikely(func && object->func != func))
+		return ERR_PTR(-EINVAL);
+	return object;
+}
+
+void
+nvkm_object_remove(struct nvkm_object *object)
+{
+	if (!RB_EMPTY_NODE(&object->node))
+		rb_erase(&object->node, &object->client->objroot);
+}
+
+bool
+nvkm_object_insert(struct nvkm_object *object)
+{
+	struct rb_node **ptr = &object->client->objroot.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*ptr) {
+		struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node);
+		parent = *ptr;
+		if (object->object < this->object)
+			ptr = &parent->rb_left;
+		else
+		if (object->object > this->object)
+			ptr = &parent->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&object->node, parent, ptr);
+	rb_insert_color(&object->node, &object->client->objroot);
+	return true;
+}
+
 int
 nvkm_object_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
@@ -214,7 +273,7 @@ nvkm_object_del(struct nvkm_object **pobject)
 	struct nvkm_object *object = *pobject;
 	if (object && !WARN_ON(!object->func)) {
 		*pobject = nvkm_object_dtor(object);
-		nvkm_client_remove(object->client, object);
+		nvkm_object_remove(object);
 		list_del(&object->head);
 		kfree(*pobject);
 		*pobject = NULL;
@@ -230,6 +289,9 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	object->engine = nvkm_engine_ref(oclass->engine);
 	object->oclass = oclass->base.oclass;
 	object->handle = oclass->handle;
+	object->route  = oclass->route;
+	object->token  = oclass->token;
+	object->object = oclass->object;
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index cceda959b47c..273562dd6bbd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -993,7 +993,7 @@ nv92_chipset = {
 	.mc = g84_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = g84_pci_new,
+	.pci = g92_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -2138,6 +2138,7 @@ nv12b_chipset = {
 	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.pmu = gm20b_pmu_new,
 	.secboot = gm20b_secboot_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
index 0a1381a84552..070ec5e18fdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
@@ -137,7 +137,6 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
 	struct nvkm_client *client = oclass->client;
 	struct nvkm_dmaobj *dmaobj;
 	struct nv50_disp_dmac *chan;
@@ -153,9 +152,9 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 	if (ret)
 		return ret;
 
-	dmaobj = nvkm_dma_search(device->dma, client, push);
-	if (!dmaobj)
-		return -ENOENT;
+	dmaobj = nvkm_dmaobj_search(client, push);
+	if (IS_ERR(dmaobj))
+		return PTR_ERR(dmaobj);
 
 	if (dmaobj->limit - dmaobj->start != 0xfff)
 		return -EINVAL;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 6f0436df0219..f8f2f16c22a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1)
 			);
 		}
 		for (i = 0; i < size; i++)
-			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
+			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nvkm_wr32(device, 0x61c440 + soff, (i << 8));
 		nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index 567466f93cd5..0db8efbf1c2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device)
 	case 0x94:
 	case 0x96:
 	case 0x98:
-	case 0xaa:
-	case 0xac:
 		return true;
 	default:
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
index 4510cb6e10a8..627b9ee1ddd2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
@@ -39,13 +39,6 @@ g94_sor_loff(struct nvkm_output_dp *outp)
 }
 
 /*******************************************************************************
- * TMDS/LVDS
- ******************************************************************************/
-static const struct nvkm_output_func
-g94_sor_output_func = {
-};
-
-/*******************************************************************************
  * DisplayPort
  ******************************************************************************/
 u32
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
index f11ebdd16c77..11b7b8fd5dda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
@@ -28,24 +28,6 @@
 
 #include <nvif/class.h>
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *dma, struct nvkm_client *client, u64 object)
-{
-	struct rb_node *node = client->dmaroot.rb_node;
-	while (node) {
-		struct nvkm_dmaobj *dmaobj =
-			container_of(node, typeof(*dmaobj), rb);
-		if (object < dmaobj->handle)
-			node = node->rb_left;
-		else
-		if (object > dmaobj->handle)
-			node = node->rb_right;
-		else
-			return dmaobj;
-	}
-	return NULL;
-}
-
 static int
 nvkm_dma_oclass_new(struct nvkm_device *device,
 		    const struct nvkm_oclass *oclass, void *data, u32 size,
@@ -53,34 +35,12 @@ nvkm_dma_oclass_new(struct nvkm_device *device,
 {
 	struct nvkm_dma *dma = nvkm_dma(oclass->engine);
 	struct nvkm_dmaobj *dmaobj = NULL;
-	struct nvkm_client *client = oclass->client;
-	struct rb_node **ptr = &client->dmaroot.rb_node;
-	struct rb_node *parent = NULL;
 	int ret;
 
 	ret = dma->func->class_new(dma, oclass, data, size, &dmaobj);
 	if (dmaobj)
 		*pobject = &dmaobj->object;
-	if (ret)
-		return ret;
-
-	dmaobj->handle = oclass->object;
-
-	while (*ptr) {
-		struct nvkm_dmaobj *obj = container_of(*ptr, typeof(*obj), rb);
-		parent = *ptr;
-		if (dmaobj->handle < obj->handle)
-			ptr = &parent->rb_left;
-		else
-		if (dmaobj->handle > obj->handle)
-			ptr = &parent->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&dmaobj->rb, parent, ptr);
-	rb_insert_color(&dmaobj->rb, &client->dmaroot);
-	return 0;
+	return ret;
 }
 
 static const struct nvkm_device_oclass
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
index 13c661b1ef14..d20cc0681a88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
@@ -31,6 +31,19 @@
 #include <nvif/cl0002.h>
 #include <nvif/unpack.h>
 
+static const struct nvkm_object_func nvkm_dmaobj_func;
+struct nvkm_dmaobj *
+nvkm_dmaobj_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_dmaobj_func);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_dmaobj(object);
+}
+
 static int
 nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 		 int align, struct nvkm_gpuobj **pgpuobj)
@@ -42,10 +55,7 @@ nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 static void *
 nvkm_dmaobj_dtor(struct nvkm_object *base)
 {
-	struct nvkm_dmaobj *dmaobj = nvkm_dmaobj(base);
-	if (!RB_EMPTY_NODE(&dmaobj->rb))
-		rb_erase(&dmaobj->rb, &dmaobj->object.client->dmaroot);
-	return dmaobj;
+	return nvkm_dmaobj(base);
 }
 
 static const struct nvkm_object_func
@@ -74,7 +84,6 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
 	nvkm_object_ctor(&nvkm_dmaobj_func, oclass, &dmaobj->object);
 	dmaobj->func = func;
 	dmaobj->dma = dma;
-	RB_CLEAR_NODE(&dmaobj->rb);
 
 	nvif_ioctl(parent, "create dma size %d\n", *psize);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 1c9682ae3a6b..660ca7aa95ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -32,6 +32,17 @@
 #include <nvif/unpack.h>
 
 void
+nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+{
+	unsigned long flags;
+	if (WARN_ON(!fifo->func->recover_chan))
+		return;
+	spin_lock_irqsave(&fifo->lock, flags);
+	fifo->func->recover_chan(fifo, chid);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
 nvkm_fifo_pause(struct nvkm_fifo *fifo, unsigned long *flags)
 {
 	return fifo->func->pause(fifo, flags);
@@ -55,19 +66,29 @@ nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
 }
 
 struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
 {
 	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
 	list_for_each_entry(chan, &fifo->chan, head) {
 		if (chan->inst->addr == inst) {
 			list_del(&chan->head);
 			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
 			return chan;
 		}
 	}
+	return NULL;
+}
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+{
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	spin_lock_irqsave(&fifo->lock, flags);
+	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
+		*rflags = flags;
+		return chan;
+	}
 	spin_unlock_irqrestore(&fifo->lock, flags);
 	return NULL;
 }
@@ -90,9 +111,34 @@ nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
 	return NULL;
 }
 
+void
+nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
+{
+	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
+}
+
 static int
-nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
-		     struct nvkm_notify *notify)
+nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
+{
+	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
+	if (size == 0) {
+		notify->size  = 0;
+		notify->types = 1;
+		notify->index = chan->chid;
+		return 0;
+	}
+	return -ENOSYS;
+}
+
+static const struct nvkm_event_func
+nvkm_fifo_kevent_func = {
+	.ctor = nvkm_fifo_kevent_ctor,
+};
+
+static int
+nvkm_fifo_cevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	if (size == 0) {
 		notify->size  = 0;
@@ -104,10 +150,16 @@ nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
 }
 
 static const struct nvkm_event_func
-nvkm_fifo_event_func = {
-	.ctor = nvkm_fifo_event_ctor,
+nvkm_fifo_cevent_func = {
+	.ctor = nvkm_fifo_cevent_ctor,
 };
 
+void
+nvkm_fifo_cevent(struct nvkm_fifo *fifo)
+{
+	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+}
+
 static void
 nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
 {
@@ -241,6 +293,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
 	void *data = fifo;
 	if (fifo->func->dtor)
 		data = fifo->func->dtor(fifo);
+	nvkm_event_fini(&fifo->kevent);
 	nvkm_event_fini(&fifo->cevent);
 	nvkm_event_fini(&fifo->uevent);
 	return data;
@@ -283,5 +336,9 @@ nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_event_func, 1, 1, &fifo->cevent);
+	ret = nvkm_event_init(&nvkm_fifo_cevent_func, 1, 1, &fifo->cevent);
+	if (ret)
+		return ret;
+
+	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index dc6d4678f228..fab760ae922f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -371,9 +371,9 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 
 	/* allocate push buffer ctxdma instance */
 	if (push) {
-		dmaobj = nvkm_dma_search(device->dma, oclass->client, push);
-		if (!dmaobj)
-			return -ENOENT;
+		dmaobj = nvkm_dmaobj_search(client, push);
+		if (IS_ERR(dmaobj))
+			return PTR_ERR(dmaobj);
 
 		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
 				       &chan->push);
@@ -410,6 +410,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 		     base + user * chan->chid;
 	chan->size = user;
 
-	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+	nvkm_fifo_cevent(fifo);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index 55dc415c5c08..d8019bdacd61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -29,5 +29,5 @@ struct nvkm_fifo_chan_oclass {
 	struct nvkm_sclass base;
 };
 
-int g84_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
index 15a992b3580a..61797c4dd07a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
@@ -30,12 +30,12 @@
 
 #include <nvif/cl826e.h>
 
-int
+static int
 g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
 		   struct nvkm_event **pevent)
 {
 	switch (type) {
-	case G82_CHANNEL_DMA_V0_NTFY_UEVENT:
+	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
 		*pevent = &chan->fifo->uevent;
 		return 0;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index ec68ea9747d5..cd468ab1db12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -68,7 +68,14 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	}
 	nvkm_done(cur);
 
-	target = (nvkm_memory_target(cur) == NVKM_MEM_TARGET_HOST) ? 0x3 : 0x0;
+	switch (nvkm_memory_target(cur)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		mutex_unlock(&subdev->mutex);
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
 				    (target << 28));
@@ -183,6 +190,7 @@ gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
 	if (engine != &fifo->base.engine)
 		fifo->recover.mask |= 1ULL << engine->subdev.index;
 	schedule_work(&fifo->recover.work);
+	nvkm_fifo_kevent(&fifo->base, chid);
 }
 
 static const struct nvkm_enum
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 38c0910722c0..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -27,11 +27,71 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+struct gk104_fifo_engine_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
+
+static void
+gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
+			 struct gk104_fifo_engine_status *status)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
+
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
+
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (engine && nvkm_engine_chsw_load(engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
+		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
+	}
+
+	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
+			   "save %d load %d %sid %d%s-> %sid %d%s\n",
+		   engn, status->busy, status->faulted,
+		   status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
+}
+
 static int
 gk104_fifo_class_get(struct nvkm_fifo *base, int index,
 		     const struct nvkm_fifo_chan_oclass **psclass)
@@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	}
 	nvkm_done(mem);
 
-	if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM)
-		target = 0;
-	else
-		target = 3;
+	switch (nvkm_memory_target(mem)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
 				    (target << 28));
@@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w)
 	nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
+static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
 static void
-gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-		   struct gk104_fifo_chan *chan)
+gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
 {
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engn;
+	const u32 runm = BIT(runl);
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   nvkm_subdev_name[engine->subdev.index], chid);
 	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.runm & runm)
+		return;
+	fifo->recover.runm |= runm;
 
-	nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800);
-	list_del_init(&chan->head);
-	chan->killed = true;
+	/* Block runlist to prevent channel assignment(s) from changing. */
+	nvkm_mask(device, 0x002630, runm, runm);
 
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			fifo->recover.engm |= BIT(engn);
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+	schedule_work(&fifo->recover.work);
+}
+
+static void
+gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+	const u32  runl = (stat & 0x000f0000) >> 16;
+	const bool used = (stat & 0x00000001);
+	unsigned long engn, engm = fifo->runlist[runl].engm;
+	struct gk104_fifo_chan *chan;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (!used)
+		return;
+
+	/* Lookup SW state for channel, and mark it as dead. */
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		if (chan->base.chid == chid) {
+			list_del_init(&chan->head);
+			chan->killed = true;
+			nvkm_fifo_kevent(&fifo->base, chid);
 			break;
 		}
 	}
 
-	fifo->recover.runm |= BIT(chan->runl);
+	/* Disable channel. */
+	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+	nvkm_warn(subdev, "channel %d: killed\n", chid);
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Schedule recovery for any engines the channel is on. */
+	for_each_set_bit(engn, &engm, fifo->engine_nr) {
+		struct gk104_fifo_engine_status status;
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.chan || status.chan->id != chid)
+			continue;
+		gk104_fifo_recover_engn(fifo, engn);
+	}
+}
+
+static void
+gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 runl = fifo->engine[engn].runl;
+	const u32 engm = BIT(engn);
+	struct gk104_fifo_engine_status status;
+	int mmui = -1;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.engm & engm)
+		return;
+	fifo->recover.engm |= engm;
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Determine which channel (if any) is currently on the engine. */
+	gk104_fifo_engine_status(fifo, engn, &status);
+	if (status.chan) {
+		/* The channel is not longer viable, kill it. */
+		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+	}
+
+	/* Determine MMU fault ID for the engine, if we're not being
+	 * called from the fault handler already.
+	 */
+	if (!status.faulted && engine) {
+		mmui = nvkm_top_fault_id(device, engine->subdev.index);
+		if (mmui < 0) {
+			const struct nvkm_enum *en = fifo->func->fault.engine;
+			for (; en && en->name; en++) {
+				if (en->data2 == engine->subdev.index) {
+					mmui = en->value;
+					break;
+				}
+			}
+		}
+		WARN_ON(mmui < 0);
+	}
+
+	/* Trigger a MMU fault for the engine.
+	 *
+	 * No good idea why this is needed, but nvgpu does something similar,
+	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+	 */
+	if (mmui >= 0) {
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+		/* Wait for fault to trigger. */
+		nvkm_msec(device, 2000,
+			gk104_fifo_engine_status(fifo, engn, &status);
+			if (status.faulted)
+				break;
+		);
+
+		/* Release MMU fault trigger, and ACK the fault. */
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x00259c, BIT(mmui));
+		nvkm_wr32(device, 0x002100, 0x10000000);
+	}
+
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
 	schedule_work(&fifo->recover.work);
 }
 
@@ -211,34 +380,30 @@ static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct gk104_fifo_chan *chan;
-	unsigned long flags;
+	unsigned long flags, engm = 0;
 	u32 engn;
 
+	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
+	 * as MMU_FAULT cannot be triggered while it's pending.
+	 */
 	spin_lock_irqsave(&fifo->base.lock, flags);
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+
 	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct nvkm_engine *engine = fifo->engine[engn].engine;
-		int runl = fifo->engine[engn].runl;
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x0fff0000) >> 16;
-		u32 chsw = (stat & 0x00008000);
-		u32 save = (stat & 0x00004000);
-		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x00000fff);
-		u32 chid = load ? next : prev;
-		(void)save;
-
-		if (!busy || !chsw)
+		struct gk104_fifo_engine_status status;
+
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.busy || !status.chsw)
 			continue;
 
-		list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-			if (chan->base.chid == chid && engine) {
-				gk104_fifo_recover(fifo, engine, chan);
-				break;
-			}
-		}
+		engm |= BIT(engn);
 	}
+
+	for_each_set_bit(engn, &engm, fifo->engine_nr)
+		gk104_fifo_recover_engn(fifo, engn);
+
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
@@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	struct nvkm_fifo_chan *chan;
 	unsigned long flags;
 	char gpcid[8] = "", en[16] = "";
+	int engn;
 
 	er = nvkm_enum_find(fifo->func->fault.reason, reason);
 	eu = nvkm_enum_find(fifo->func->fault.engine, unit);
@@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		snprintf(en, sizeof(en), "%s", eu->name);
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags);
+	spin_lock_irqsave(&fifo->base.lock, flags);
+	chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
 
 	nvkm_error(subdev,
 		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
@@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		   (u64)inst << 12,
 		   chan ? chan->object.client->name : "unknown");
 
-	if (engine && chan)
-		gk104_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+
+	/* Kill the channel that caused the fault. */
+	if (chan)
+		gk104_fifo_recover_chan(&fifo->base, chan->chid);
+
+	/* Channel recovery will probably have already done this for the
+	 * correct engine(s), but just in case we can't find the channel
+	 * information...
+	 */
+	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			gk104_fifo_recover_engn(fifo, engn);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -716,6 +897,7 @@ gk104_fifo_ = {
 	.intr = gk104_fifo_intr,
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
+	.recover_chan = gk104_fifo_recover_chan,
 	.class_get = gk104_fifo_class_get,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
index 12d964260a29..f9e0377d3d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
@@ -32,6 +32,23 @@
 #include <nvif/cl906f.h>
 #include <nvif/unpack.h>
 
+int
+gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
+		     struct nvkm_event **pevent)
+{
+	switch (type) {
+	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
+		*pevent = &chan->fifo->uevent;
+		return 0;
+	case NV906F_V0_NTFY_KILLED:
+		*pevent = &chan->fifo->kevent;
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 static u32
 gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 {
@@ -184,7 +201,7 @@ gf100_fifo_gpfifo_func = {
 	.dtor = gf100_fifo_gpfifo_dtor,
 	.init = gf100_fifo_gpfifo_init,
 	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
 	.engine_init = gf100_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index a2df4f3e7763..8abf6f8ef445 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -50,6 +50,7 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 	) < 0) {
 		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
 			   chan->base.chid, client->name);
+		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
 		ret = -ETIMEDOUT;
 	}
 	mutex_unlock(&subdev->mutex);
@@ -213,7 +214,7 @@ gk104_fifo_gpfifo_func = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gk104_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index f6dfb37d9429..f889b13b5e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -6,6 +6,12 @@
 int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *,
 		   int index, int nr, struct nvkm_fifo *);
 void nvkm_fifo_uevent(struct nvkm_fifo *);
+void nvkm_fifo_cevent(struct nvkm_fifo *);
+void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
+void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
 
 struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
@@ -18,6 +24,7 @@ struct nvkm_fifo_func {
 	void (*start)(struct nvkm_fifo *, unsigned long *);
 	void (*uevent_init)(struct nvkm_fifo *);
 	void (*uevent_fini)(struct nvkm_fifo *);
+	void (*recover_chan)(struct nvkm_fifo *, int chid);
 	int (*class_get)(struct nvkm_fifo *, int index,
 			 const struct nvkm_fifo_chan_oclass **);
 	const struct nvkm_fifo_chan_oclass *chan[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 467065d1b4e6..cd8cf6f7024c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -25,6 +25,15 @@
 
 #include <engine/fifo.h>
 
+static bool
+nvkm_gr_chsw_load(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->chsw_load)
+		return gr->func->chsw_load(gr);
+	return false;
+}
+
 static void
 nvkm_gr_tile(struct nvkm_engine *engine, int region, struct nvkm_fb_tile *tile)
 {
@@ -106,6 +115,15 @@ nvkm_gr_init(struct nvkm_engine *engine)
 	return gr->func->init(gr);
 }
 
+static int
+nvkm_gr_fini(struct nvkm_engine *engine, bool suspend)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->fini)
+		return gr->func->fini(gr, suspend);
+	return 0;
+}
+
 static void *
 nvkm_gr_dtor(struct nvkm_engine *engine)
 {
@@ -120,8 +138,10 @@ nvkm_gr = {
 	.dtor = nvkm_gr_dtor,
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
+	.fini = nvkm_gr_fini,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
+	.chsw_load = nvkm_gr_chsw_load,
 	.fifo.cclass = nvkm_gr_cclass_new,
 	.fifo.sclass = nvkm_gr_oclass_get,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
index ce913300539f..da1ba74682b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
@@ -25,6 +25,8 @@
 
 #include <subdev/timer.h>
 
+#include <nvif/class.h>
+
 static const struct nvkm_bitfield nv50_gr_status[] = {
 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
 	{ 0x00000002, "DISPATCH" },
@@ -180,11 +182,11 @@ g84_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8297, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, G82_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index f65a5b0a1a4d..f9acb8a944d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -702,6 +702,22 @@ gf100_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static bool
+gf100_gr_chsw_load(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	if (!gr->firmware) {
+		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
+		if (trace & 0x00000040)
+			return true;
+	} else {
+		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
+		if (mthd & 0x00080000)
+			return true;
+	}
+	return false;
+}
+
 int
 gf100_gr_rops(struct gf100_gr *gr)
 {
@@ -1136,7 +1152,7 @@ gf100_gr_trap_intr(struct gf100_gr *gr)
 	if (trap & 0x00000008) {
 		u32 stat = nvkm_rd32(device, 0x408030);
 
-		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
+		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
 			       stat & 0x3fffffff);
 		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
 		nvkm_wr32(device, 0x408030, 0xc0000000);
@@ -1391,26 +1407,11 @@ gf100_gr_intr(struct nvkm_gr *base)
 }
 
 static void
-gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
+gf100_gr_init_fw(struct nvkm_falcon *falcon,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int i;
-
-	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
-	for (i = 0; i < data->size / 4; i++)
-		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
-
-	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
-	for (i = 0; i < code->size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
-		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
-	}
-
-	/* code must be padded to 0x40 words */
-	for (; i & 0x3f; i++)
-		nvkm_wr32(device, fuc_base + 0x0184, 0);
+	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
+	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
 }
 
 static void
@@ -1455,162 +1456,149 @@ gf100_gr_init_csdata(struct gf100_gr *gr,
 	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
-int
-gf100_gr_init_ctxctl(struct gf100_gr *gr)
+/* Initialize context from an external (secure or not) firmware */
+static int
+gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 {
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
-	int i;
 	int ret = 0;
 
-	if (gr->firmware) {
-		/* load fuc microcode */
-		nvkm_mc_unk260(device, 0);
-
-		/* securely-managed falcons must be reset using secure boot */
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
-					 &gr->fuc409d);
-		if (ret)
-			return ret;
+	/* load fuc microcode */
+	nvkm_mc_unk260(device, 0);
 
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
-					 &gr->fuc41ad);
-		if (ret)
-			return ret;
+	/* securely-managed falcons must be reset using secure boot */
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+	else
+		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
+	if (ret)
+		return ret;
 
-		nvkm_mc_unk260(device, 1);
-
-		/* start both of them running */
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x41a10c, 0x00000000);
-		nvkm_wr32(device, 0x40910c, 0x00000000);
-
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			nvkm_wr32(device, 0x41a100, 0x00000002);
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			nvkm_wr32(device, 0x409100, 0x00000002);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800) & 0x00000001)
-				break;
-		) < 0)
-			return -EBUSY;
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+	else
+		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
+	if (ret)
+		return ret;
+
+	nvkm_mc_unk260(device, 1);
+
+	/* start both of them running */
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x41a10c, 0x00000000);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+
+	nvkm_falcon_start(gr->gpccs);
+	nvkm_falcon_start(gr->fecs);
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x7fffffff);
-		nvkm_wr32(device, 0x409504, 0x00000021);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) & 0x00000001)
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x7fffffff);
+	nvkm_wr32(device, 0x409504, 0x00000021);
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000010);
+	if (nvkm_msec(device, 2000,
+		if ((gr->size = nvkm_rd32(device, 0x409800)))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000016);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000025);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000010);
+	if (device->chipset >= 0xe0) {
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000030);
 		if (nvkm_msec(device, 2000,
-			if ((gr->size = nvkm_rd32(device, 0x409800)))
+			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000016);
+		nvkm_wr32(device, 0x409810, 0xb00095c8);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000031);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000025);
+		nvkm_wr32(device, 0x409810, 0x00080420);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000032);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (device->chipset >= 0xe0) {
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000030);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0xb00095c8);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000031);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0x00080420);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000032);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
+		nvkm_wr32(device, 0x409614, 0x00000070);
+		nvkm_wr32(device, 0x409614, 0x00000770);
+		nvkm_wr32(device, 0x40802c, 0x00000001);
+	}
 
-			nvkm_wr32(device, 0x409614, 0x00000070);
-			nvkm_wr32(device, 0x409614, 0x00000770);
-			nvkm_wr32(device, 0x40802c, 0x00000001);
+	if (gr->data == NULL) {
+		int ret = gf100_grctx_generate(gr);
+		if (ret) {
+			nvkm_error(subdev, "failed to construct context\n");
+			return ret;
 		}
+	}
 
-		if (gr->data == NULL) {
-			int ret = gf100_grctx_generate(gr);
-			if (ret) {
-				nvkm_error(subdev, "failed to construct context\n");
-				return ret;
-			}
-		}
+	return 0;
+}
+
+static int
+gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 
-		return 0;
-	} else
 	if (!gr->func->fecs.ucode) {
 		return -ENOSYS;
 	}
 
 	/* load HUB microcode */
 	nvkm_mc_unk260(device, 0);
-	nvkm_wr32(device, 0x4091c0, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x409180, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x409188, i >> 6);
-		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
+			      gr->func->fecs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
+			      gr->func->fecs.ucode->code.size, 0, 0, false);
 
 	/* load GPC microcode */
-	nvkm_wr32(device, 0x41a1c0, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x41a180, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x41a188, i >> 6);
-		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
+			      gr->func->gpccs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
+			      gr->func->gpccs.ucode->code.size, 0, 0, false);
 	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
@@ -1642,6 +1630,19 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return 0;
 }
 
+int
+gf100_gr_init_ctxctl(struct gf100_gr *gr)
+{
+	int ret;
+
+	if (gr->firmware)
+		ret = gf100_gr_init_ctxctl_ext(gr);
+	else
+		ret = gf100_gr_init_ctxctl_int(gr);
+
+	return ret;
+}
+
 static int
 gf100_gr_oneinit(struct nvkm_gr *base)
 {
@@ -1711,10 +1712,32 @@ static int
 gf100_gr_init_(struct nvkm_gr *base)
 {
 	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	u32 ret;
+
 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+
+	ret = nvkm_falcon_get(gr->fecs, subdev);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_get(gr->gpccs, subdev);
+	if (ret)
+		return ret;
+
 	return gr->func->init(gr);
 }
 
+static int
+gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	nvkm_falcon_put(gr->gpccs, subdev);
+	nvkm_falcon_put(gr->fecs, subdev);
+	return 0;
+}
+
 void
 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 {
@@ -1737,6 +1760,9 @@ gf100_gr_dtor(struct nvkm_gr *base)
 		gr->func->dtor(gr);
 	kfree(gr->data);
 
+	nvkm_falcon_del(&gr->gpccs);
+	nvkm_falcon_del(&gr->fecs);
+
 	gf100_gr_dtor_fw(&gr->fuc409c);
 	gf100_gr_dtor_fw(&gr->fuc409d);
 	gf100_gr_dtor_fw(&gr->fuc41ac);
@@ -1755,10 +1781,12 @@ gf100_gr_ = {
 	.dtor = gf100_gr_dtor,
 	.oneinit = gf100_gr_oneinit,
 	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini_,
 	.intr = gf100_gr_intr,
 	.units = gf100_gr_units,
 	.chan_new = gf100_gr_chan_new,
 	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
 };
 
 int
@@ -1828,6 +1856,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1840,7 +1869,11 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	if (ret)
 		return ret;
 
-	return 0;
+	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
+	if (ret)
+		return ret;
+
+	return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 268b8d60ff73..db6ee3b06841 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -29,6 +29,7 @@
 #include <core/gpuobj.h>
 #include <subdev/ltc.h>
 #include <subdev/mmu.h>
+#include <engine/falcon.h>
 
 #define GPC_MAX 32
 #define TPC_MAX_PER_GPC 8
@@ -75,6 +76,8 @@ struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
+	struct nvkm_falcon *fecs;
+	struct nvkm_falcon *gpccs;
 	struct gf100_gr_fuc fuc409c;
 	struct gf100_gr_fuc fuc409d;
 	struct gf100_gr_fuc fuc41ac;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
index 2e68919f00b2..c711a55ce392 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt200_gr = {
 	.init = nv50_gr_init,
@@ -31,11 +33,11 @@ gt200_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
index 2bf7aac360cc..fa103df32ec7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt215_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ gt215_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8597, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_TESLA, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
index 95d5219faf93..eb1a90644752 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp79_gr = {
 	.init = nv50_gr_init,
@@ -30,11 +32,11 @@ mcp79_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
index 027b58e5976b..c91eb56e9327 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp89_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ mcp89_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
-		{ -1, -1, 0x8697, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT21A_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index fca67de43f2b..df16ffda1749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -27,6 +27,8 @@
 #include <core/gpuobj.h>
 #include <engine/fifo.h>
 
+#include <nvif/class.h>
+
 u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
@@ -778,11 +780,11 @@ nv50_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x5097, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_TESLA, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index d8adcdf6985a..2a52d9f026ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -15,6 +15,7 @@ struct nvkm_gr_func {
 	void *(*dtor)(struct nvkm_gr *);
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
+	int (*fini)(struct nvkm_gr *, bool);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
@@ -24,6 +25,7 @@ struct nvkm_gr_func {
 	/* Returns chipset-specific counts of units packed into an u64.
 	 */
 	u64 (*units)(struct nvkm_gr *);
+	bool (*chsw_load)(struct nvkm_gr *);
 	struct nvkm_sclass sclass[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
new file mode 100644
index 000000000000..584863db9bfc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
@@ -0,0 +1,2 @@
+nvkm-y += nvkm/falcon/base.o
+nvkm-y += nvkm/falcon/v1.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
new file mode 100644
index 000000000000..4852f313762f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+
+void
+nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u16 tag, u8 port, bool secure)
+{
+	if (secure && !falcon->secret) {
+		nvkm_warn(falcon->user,
+			  "writing with secure tag on a non-secure falcon!\n");
+		return;
+	}
+
+	falcon->func->load_imem(falcon, data, start, size, tag, port,
+				secure);
+}
+
+void
+nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	falcon->func->load_dmem(falcon, data, start, size, port);
+}
+
+void
+nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port,
+		      void *data)
+{
+	falcon->func->read_dmem(falcon, start, size, port, data);
+}
+
+void
+nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst)
+{
+	if (!falcon->func->bind_context) {
+		nvkm_error(falcon->user,
+			   "Context binding not supported on this falcon!\n");
+		return;
+	}
+
+	falcon->func->bind_context(falcon, inst);
+}
+
+void
+nvkm_falcon_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	falcon->func->set_start_addr(falcon, start_addr);
+}
+
+void
+nvkm_falcon_start(struct nvkm_falcon *falcon)
+{
+	falcon->func->start(falcon);
+}
+
+int
+nvkm_falcon_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+	int ret;
+
+	nvkm_mc_enable(device, id);
+	ret = falcon->func->enable(falcon);
+	if (ret) {
+		nvkm_mc_disable(device, id);
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+nvkm_falcon_disable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+
+	/* already disabled, return or wait_idle will timeout */
+	if (!nvkm_mc_enabled(device, id))
+		return;
+
+	falcon->func->disable(falcon);
+
+	nvkm_mc_disable(device, id);
+}
+
+int
+nvkm_falcon_reset(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_disable(falcon);
+	return nvkm_falcon_enable(falcon);
+}
+
+int
+nvkm_falcon_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	return falcon->func->wait_for_halt(falcon, ms);
+}
+
+int
+nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	return falcon->func->clear_interrupt(falcon, mask);
+}
+
+void
+nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user == user) {
+		nvkm_debug(falcon->user, "released %s falcon\n", falcon->name);
+		falcon->user = NULL;
+	}
+	mutex_unlock(&falcon->mutex);
+}
+
+int
+nvkm_falcon_get(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user) {
+		nvkm_error(user, "%s falcon already acquired by %s!\n",
+			   falcon->name, nvkm_subdev_name[falcon->user->index]);
+		mutex_unlock(&falcon->mutex);
+		return -EBUSY;
+	}
+
+	nvkm_debug(user, "acquired %s falcon\n", falcon->name);
+	falcon->user = user;
+	mutex_unlock(&falcon->mutex);
+	return 0;
+}
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
+		 struct nvkm_subdev *subdev, const char *name, u32 addr,
+		 struct nvkm_falcon *falcon)
+{
+	u32 reg;
+
+	falcon->func = func;
+	falcon->owner = subdev;
+	falcon->name = name;
+	falcon->addr = addr;
+	mutex_init(&falcon->mutex);
+
+	reg = nvkm_falcon_rd32(falcon, 0x12c);
+	falcon->version = reg & 0xf;
+	falcon->secret = (reg >> 4) & 0x3;
+	falcon->code.ports = (reg >> 8) & 0xf;
+	falcon->data.ports = (reg >> 12) & 0xf;
+
+	reg = nvkm_falcon_rd32(falcon, 0x108);
+	falcon->code.limit = (reg & 0x1ff) << 8;
+	falcon->data.limit = (reg & 0x3fe00) >> 1;
+
+	reg = nvkm_falcon_rd32(falcon, 0xc08);
+	falcon->debug = (reg >> 20) & 0x1;
+}
+
+void
+nvkm_falcon_del(struct nvkm_falcon **pfalcon)
+{
+	if (*pfalcon) {
+		kfree(*pfalcon);
+		*pfalcon = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
new file mode 100644
index 000000000000..97b56f759d0b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
@@ -0,0 +1,8 @@
+#ifndef __NVKM_FALCON_PRIV_H__
+#define __NVKM_FALCON_PRIV_H__
+#include <engine/falcon.h>
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *, struct nvkm_subdev *,
+		 const char *, u32, struct nvkm_falcon *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
new file mode 100644
index 000000000000..b537f111f39c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/memory.h>
+#include <subdev/timer.h>
+
+static void
+nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+			 u32 size, u16 tag, u8 port, bool secure)
+{
+	u8 rem = size % 4;
+	u32 reg;
+	int i;
+
+	size -= rem;
+
+	reg = start | BIT(24) | (secure ? BIT(28) : 0);
+	nvkm_falcon_wr32(falcon, 0x180 + (port * 16), reg);
+	for (i = 0; i < size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, ((u32 *)data)[i]);
+	}
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get written
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, extra & (BIT(rem * 8) - 1));
+		++i;
+	}
+
+	/* code must be padded to 0x40 words */
+	for (; i & 0x3f; i++)
+		nvkm_falcon_wr32(falcon, 0x184, 0);
+}
+
+static void
+nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 24));
+	for (i = 0; i < size / 4; i++)
+		nvkm_falcon_wr32(falcon, 0x1c4, ((u32 *)data)[i]);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		nvkm_falcon_wr32(falcon, 0x1c4, extra & (BIT(rem * 8) - 1));
+	}
+}
+
+static void
+nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size,
+			 u8 port, void *data)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 25));
+	for (i = 0; i < size / 4; i++)
+		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = nvkm_falcon_rd32(falcon, 0x1c4);
+
+		for (i = size; i < size + rem; i++) {
+			((u8 *)data)[i] = (u8)(extra & 0xff);
+			extra >>= 8;
+		}
+	}
+}
+
+static void
+nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx)
+{
+	u32 inst_loc;
+
+	/* disable instance block binding */
+	if (ctx == NULL) {
+		nvkm_falcon_wr32(falcon, 0x10c, 0x0);
+		return;
+	}
+
+	nvkm_falcon_wr32(falcon, 0x10c, 0x1);
+
+	/* setup apertures - virtual */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_UCODE, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_VIRT, 0x0);
+	/* setup apertures - physical */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_VID, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6);
+
+	/* Set context */
+	switch (nvkm_memory_target(ctx->memory)) {
+	case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break;
+	case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* Enable context */
+	nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1);
+	nvkm_falcon_wr32(falcon, 0x480,
+			 ((ctx->addr >> 12) & 0xfffffff) |
+			 (inst_loc << 28) | (1 << 30));
+}
+
+static void
+nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	nvkm_falcon_wr32(falcon, 0x104, start_addr);
+}
+
+static void
+nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
+{
+	u32 reg = nvkm_falcon_rd32(falcon, 0x100);
+
+	if (reg & BIT(6))
+		nvkm_falcon_wr32(falcon, 0x130, 0x2);
+	else
+		nvkm_falcon_wr32(falcon, 0x100, 0x2);
+}
+
+static int
+nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, ms, falcon->addr + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	/* clear interrupt(s) */
+	nvkm_falcon_mask(falcon, 0x004, mask, mask);
+	/* wait until interrupts are cleared */
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x008, mask, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_v1_wait_idle(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_error(falcon->user, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_v1_wait_idle(falcon);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_falcon_wr32(falcon, 0x010, 0xff);
+
+	return 0;
+}
+
+static void
+nvkm_falcon_v1_disable(struct nvkm_falcon *falcon)
+{
+	/* disable IRQs and wait for any previous code to complete */
+	nvkm_falcon_wr32(falcon, 0x014, 0xff);
+	falcon_v1_wait_idle(falcon);
+}
+
+static const struct nvkm_falcon_func
+nvkm_falcon_v1 = {
+	.load_imem = nvkm_falcon_v1_load_imem,
+	.load_dmem = nvkm_falcon_v1_load_dmem,
+	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_context = nvkm_falcon_v1_bind_context,
+	.start = nvkm_falcon_v1_start,
+	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
+	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
+	.enable = nvkm_falcon_v1_enable,
+	.disable = nvkm_falcon_v1_disable,
+	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+};
+
+int
+nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		   struct nvkm_falcon **pfalcon)
+{
+	struct nvkm_falcon *falcon;
+	if (!(falcon = *pfalcon = kzalloc(sizeof(*falcon), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_falcon_ctor(&nvkm_falcon_v1, owner, name, addr, falcon);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
index be57220a2e01..6b4f1e06a38f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
@@ -19,6 +19,7 @@ nvkm-y += nvkm/subdev/bios/pcir.o
 nvkm-y += nvkm/subdev/bios/perf.o
 nvkm-y += nvkm/subdev/bios/pll.o
 nvkm-y += nvkm/subdev/bios/pmu.o
+nvkm-y += nvkm/subdev/bios/power_budget.o
 nvkm-y += nvkm/subdev/bios/ramcfg.o
 nvkm-y += nvkm/subdev/bios/rammap.o
 nvkm-y += nvkm/subdev/bios/shadow.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
new file mode 100644
index 000000000000..617bfffce4ad
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/power_budget.h>
+
+static u32
+nvbios_power_budget_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
+			  u8 *len)
+{
+	struct bit_entry bit_P;
+	u32 power_budget;
+
+	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
+	    bit_P.length < 0x2c)
+		return 0;
+
+	power_budget = nvbios_rd32(bios, bit_P.offset + 0x2c);
+	if (!power_budget)
+		return 0;
+
+	*ver = nvbios_rd08(bios, power_budget);
+	switch (*ver) {
+	case 0x20:
+	case 0x30:
+		*hdr = nvbios_rd08(bios, power_budget + 0x1);
+		*len = nvbios_rd08(bios, power_budget + 0x2);
+		*cnt = nvbios_rd08(bios, power_budget + 0x3);
+		return power_budget;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nvbios_power_budget_header(struct nvkm_bios *bios,
+                           struct nvbios_power_budget *budget)
+{
+	struct nvkm_subdev *subdev = &bios->subdev;
+	u8 ver, hdr, cnt, len, cap_entry;
+	u32 header;
+
+	if (!bios || !budget)
+		return -EINVAL;
+
+	header = nvbios_power_budget_table(bios, &ver, &hdr, &cnt, &len);
+	if (!header || !cnt)
+		return -ENODEV;
+
+	switch (ver) {
+	case 0x20:
+		cap_entry = nvbios_rd08(bios, header + 0x9);
+		break;
+	case 0x30:
+		cap_entry = nvbios_rd08(bios, header + 0xa);
+		break;
+	default:
+		cap_entry = 0xff;
+	}
+
+	if (cap_entry >= cnt && cap_entry != 0xff) {
+		nvkm_warn(subdev,
+		          "invalid cap_entry in power budget table found\n");
+		budget->cap_entry = 0xff;
+		return -EINVAL;
+	}
+
+	budget->offset = header;
+	budget->ver = ver;
+	budget->hlen = hdr;
+	budget->elen = len;
+	budget->ecount = cnt;
+
+	budget->cap_entry = cap_entry;
+
+	return 0;
+}
+
+int
+nvbios_power_budget_entry(struct nvkm_bios *bios,
+                          struct nvbios_power_budget *budget,
+                          u8 idx, struct nvbios_power_budget_entry *entry)
+{
+	u32 entry_offset;
+
+	if (!bios || !budget || !budget->offset || idx >= budget->ecount
+		|| !entry)
+		return -EINVAL;
+
+	entry_offset = budget->offset + budget->hlen + idx * budget->elen;
+
+	if (budget->ver >= 0x20) {
+		entry->min_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = nvbios_rd32(bios, entry_offset + 0x6);
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0xa);
+	} else {
+		entry->min_w = 0;
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = entry->max_w;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
index 5841f297973c..da1770e47490 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -112,7 +112,7 @@ read_pll_src(struct nv50_clk *clk, u32 base)
 		M    = (coef & 0x000000ff) >> 0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 
 	if (M)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
index c714b097719c..59362f8dee22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
@@ -50,7 +50,7 @@ nv50_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 	ret = nv04_pll_calc(subdev, &info, freq, &N1, &M1, &N2, &M2, &P);
 	if (!ret) {
 		nvkm_error(subdev, "failed pll calculation\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	switch (info.type) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index 093223d1df4f..6758da93a3a1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -445,7 +445,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc;
 	struct nvkm_mm *mm = &ram->vram;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int type = (memtype & 0x0ff);
 	int back = (memtype & 0x800);
@@ -462,7 +462,6 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->size = size;
 
 	mutex_lock(&ram->fb->subdev.mutex);
@@ -478,6 +477,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	}
 	mem->memtype = type;
 
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r);
@@ -489,13 +489,13 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 7904fa41acef..fb8a1239743d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -989,7 +989,7 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 		int *N1, int *fN1, int *M1, int *P1,
 		int *N2, int *M2, int *P2)
 {
-	int best_clk = 0, best_err = target_khz, p_ref, n_ref;
+	int best_err = target_khz, p_ref, n_ref;
 	bool upper = false;
 
 	*M1 = 1;
@@ -1010,7 +1010,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 			/* we found a better combination */
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
@@ -1022,7 +1021,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 				- target_khz;
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
index 0a0e44b75577..017a91de74a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
@@ -39,7 +39,7 @@ mcp77_ram_init(struct nvkm_ram *base)
 	u32 flush  = ((ram->base.size - (ram->poller_base + 0x40)) >> 5) - 1;
 
 	/* Enable NISO poller for various clients and set their associated
-	 * read address, only for MCP77/78 and MCP79/7A. (fd#25701)
+	 * read address, only for MCP77/78 and MCP79/7A. (fd#27501)
 	 */
 	nvkm_wr32(device, 0x100c18, dniso);
 	nvkm_mask(device, 0x100c14, 0x00000000, 0x00000001);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 87bde8ff2d6b..6549b0588309 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -496,15 +496,12 @@ nv50_ram_tidy(struct nvkm_ram *base)
 void
 __nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem)
 {
-	struct nvkm_mm_node *this;
-
-	while (!list_empty(&mem->regions)) {
-		this = list_first_entry(&mem->regions, typeof(*this), rl_entry);
-
-		list_del(&this->rl_entry);
-		nvkm_mm_free(&ram->vram, &this);
+	struct nvkm_mm_node *next = mem->mem;
+	struct nvkm_mm_node *node;
+	while ((node = next)) {
+		next = node->next;
+		nvkm_mm_free(&ram->vram, &node);
 	}
-
 	nvkm_mm_free(&ram->tags, &mem->tag);
 }
 
@@ -530,7 +527,7 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_mm *heap = &ram->vram;
 	struct nvkm_mm *tags = &ram->tags;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
 	int type = (memtype & 0x07f);
@@ -559,11 +556,11 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			comp = 0;
 	}
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->memtype = (comp << 7) | type;
 	mem->size = max;
 
 	type = nv50_fb_memtype[type];
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r);
@@ -575,13 +572,13 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		max -= r->length;
 	} while (max);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index f0af2a381eea..fecfa6afcf54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/extdev.h>
 #include <subdev/bios/iccsense.h>
+#include <subdev/bios/power_budget.h>
 #include <subdev/i2c.h>
 
 static bool
@@ -216,10 +217,25 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
 	struct nvkm_bios *bios = subdev->device->bios;
+	struct nvbios_power_budget budget;
 	struct nvbios_iccsense stbl;
-	int i;
+	int i, ret;
 
-	if (!bios || nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
+	if (!bios)
+		return 0;
+
+	ret = nvbios_power_budget_header(bios, &budget);
+	if (!ret && budget.cap_entry != 0xff) {
+		struct nvbios_power_budget_entry entry;
+		ret = nvbios_power_budget_entry(bios, &budget,
+		                                budget.cap_entry, &entry);
+		if (!ret) {
+			iccsense->power_w_max  = entry.avg_w;
+			iccsense->power_w_crit = entry.max_w;
+		}
+	}
+
+	if (nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
 		return 0;
 
 	iccsense->data_valid = true;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a6a7fa0d7679..9dec58ec3d9f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -116,7 +116,7 @@ struct gk20a_instmem {
 static enum nvkm_memory_target
 gk20a_instobj_target(struct nvkm_memory *memory)
 {
-	return NVKM_MEM_TARGET_HOST;
+	return NVKM_MEM_TARGET_NCOH;
 }
 
 static u64
@@ -305,11 +305,11 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
 	struct gk20a_instmem *imem = node->base.imem;
 	struct device *dev = imem->base.subdev.device->dev;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->base.mem.mem;
 	unsigned long flags;
 	int i;
 
-	if (unlikely(list_empty(&node->base.mem.regions)))
+	if (unlikely(!r))
 		goto out;
 
 	spin_lock_irqsave(&imem->lock, flags);
@@ -320,9 +320,6 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 
 	spin_unlock_irqrestore(&imem->lock, flags);
 
-	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
-			     rl_entry);
-
 	/* clear IOMMU bit to unmap pages */
 	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
@@ -404,10 +401,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 	node->r.length = (npages << PAGE_SHIFT) >> 12;
 
 	node->base.mem.offset = node->handle;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&node->r.rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = &node->r;
 	return 0;
 }
 
@@ -484,10 +478,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&r->rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = r;
 	return 0;
 
 release_area:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 6b25e25f9eba..09f669ac6630 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -161,6 +161,16 @@ nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
 	}
 }
 
+bool
+nvkm_mc_enabled(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+
+	return (pmc_enable != 0) &&
+	       ((nvkm_rd32(device, 0x000200) & pmc_enable) == pmc_enable);
+}
+
+
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index 5df9669ea39c..d06ad2c372bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -31,7 +31,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 {
 	struct nvkm_vm *vm = vma->vm;
 	struct nvkm_mmu *mmu = vm->mmu;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->mem;
 	int big = vma->node->type != mmu->func->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
 	u32 bits = vma->node->type - 12;
@@ -41,7 +41,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 	u32 end, len;
 
 	delta = 0;
-	list_for_each_entry(r, &node->regions, rl_entry) {
+	while (r) {
 		u64 phys = (u64)r->offset << 12;
 		u32 num  = r->length >> bits;
 
@@ -65,7 +65,8 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 
 			delta += (u64)len << vma->node->type;
 		}
-	}
+		r = r->next;
+	};
 
 	mmu->func->flush(vm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 2a31b7d66a6d..87bf41cef0c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -6,6 +6,7 @@ nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv46.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/g84.o
+nvkm-y += nvkm/subdev/pci/g92.o
 nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
new file mode 100644
index 000000000000..48874359d5f6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+int
+g92_pcie_version_supported(struct nvkm_pci *pci)
+{
+	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
+		return 2;
+	return 1;
+}
+
+static const struct nvkm_pci_func
+g92_pci_func = {
+	.init = g84_pci_init,
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv46_pci_msi_rearm,
+
+	.pcie.init = g84_pcie_init,
+	.pcie.set_link = g84_pcie_set_link,
+
+	.pcie.max_speed = g84_pcie_max_speed,
+	.pcie.cur_speed = g84_pcie_cur_speed,
+
+	.pcie.set_version = g84_pcie_set_version,
+	.pcie.version = g84_pcie_version,
+	.pcie.version_supported = g92_pcie_version_supported,
+};
+
+int
+g92_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g92_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
index 43444123bc04..09adb37a5664 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -23,14 +23,6 @@
  */
 #include "priv.h"
 
-int
-g94_pcie_version_supported(struct nvkm_pci *pci)
-{
-	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
-		return 2;
-	return 1;
-}
-
 static const struct nvkm_pci_func
 g94_pci_func = {
 	.init = g84_pci_init,
@@ -47,7 +39,7 @@ g94_pci_func = {
 
 	.pcie.set_version = g84_pcie_set_version,
 	.pcie.version = g84_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
index e30ea676baf6..00a5e7d3ee9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
@@ -92,7 +92,7 @@ gf100_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
index c3b798c5c6dd..11bf419afe3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
@@ -39,7 +39,7 @@ gf106_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 23de3180aae5..86921ec962d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -44,7 +44,7 @@ enum nvkm_pcie_speed g84_pcie_max_speed(struct nvkm_pci *);
 int g84_pcie_init(struct nvkm_pci *);
 int g84_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8);
 
-int g94_pcie_version_supported(struct nvkm_pci *);
+int g92_pcie_version_supported(struct nvkm_pci *);
 
 void gf100_pcie_set_version(struct nvkm_pci *, u8);
 int gf100_pcie_version(struct nvkm_pci *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
index 51fb4bf94a44..ca57c1e491b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
@@ -8,5 +8,6 @@ nvkm-y += nvkm/subdev/pmu/gk110.o
 nvkm-y += nvkm/subdev/pmu/gk208.o
 nvkm-y += nvkm/subdev/pmu/gk20a.o
 nvkm-y += nvkm/subdev/pmu/gm107.o
+nvkm-y += nvkm/subdev/pmu/gm20b.o
 nvkm-y += nvkm/subdev/pmu/gp100.o
 nvkm-y += nvkm/subdev/pmu/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index e611ce80f8ef..a73f690eb4b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -116,6 +116,8 @@ nvkm_pmu_init(struct nvkm_subdev *subdev)
 static void *
 nvkm_pmu_dtor(struct nvkm_subdev *subdev)
 {
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	nvkm_falcon_del(&pmu->falcon);
 	return nvkm_pmu(subdev);
 }
 
@@ -129,15 +131,22 @@ nvkm_pmu = {
 };
 
 int
+nvkm_pmu_ctor(const struct nvkm_pmu_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_pmu *pmu)
+{
+	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
+	pmu->func = func;
+	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
+	init_waitqueue_head(&pmu->recv.wait);
+	return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon);
+}
+
+int
 nvkm_pmu_new_(const struct nvkm_pmu_func *func, struct nvkm_device *device,
 	      int index, struct nvkm_pmu **ppmu)
 {
 	struct nvkm_pmu *pmu;
 	if (!(pmu = *ppmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
-	pmu->func = func;
-	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
-	init_waitqueue_head(&pmu->recv.wait);
-	return 0;
+	return nvkm_pmu_ctor(func, device, index, *ppmu);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index f996d90c9f0d..9ca0db796cbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -19,7 +19,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base.subdev)
+#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base)
 #include "priv.h"
 
 #include <subdev/clk.h>
@@ -43,9 +43,8 @@ struct gk20a_pmu {
 };
 
 struct gk20a_pmu_dvfs_dev_status {
-	unsigned long total;
-	unsigned long busy;
-	int cur_state;
+	u32 total;
+	u32 busy;
 };
 
 static int
@@ -56,13 +55,12 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
 	return nvkm_clk_astate(clk, *state, 0, false);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
 {
 	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
 
 	*state = clk->pstate;
-	return 0;
 }
 
 static int
@@ -90,28 +88,26 @@ gk20a_pmu_dvfs_get_target_state(struct gk20a_pmu *pmu,
 
 	*state = level;
 
-	if (level == cur_level)
-		return 0;
-	else
-		return 1;
+	return (level != cur_level);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_dev_status(struct gk20a_pmu *pmu,
 			      struct gk20a_pmu_dvfs_dev_status *status)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	status->busy = nvkm_rd32(device, 0x10a508 + (BUSY_SLOT * 0x10));
-	status->total= nvkm_rd32(device, 0x10a508 + (CLK_SLOT * 0x10));
-	return 0;
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	status->busy = nvkm_falcon_rd32(falcon, 0x508 + (BUSY_SLOT * 0x10));
+	status->total= nvkm_falcon_rd32(falcon, 0x508 + (CLK_SLOT * 0x10));
 }
 
 static void
 gk20a_pmu_dvfs_reset_dev_status(struct gk20a_pmu *pmu)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	nvkm_wr32(device, 0x10a508 + (BUSY_SLOT * 0x10), 0x80000000);
-	nvkm_wr32(device, 0x10a508 + (CLK_SLOT * 0x10), 0x80000000);
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	nvkm_falcon_wr32(falcon, 0x508 + (BUSY_SLOT * 0x10), 0x80000000);
+	nvkm_falcon_wr32(falcon, 0x508 + (CLK_SLOT * 0x10), 0x80000000);
 }
 
 static void
@@ -127,7 +123,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	struct nvkm_timer *tmr = device->timer;
 	struct nvkm_volt *volt = device->volt;
 	u32 utilization = 0;
-	int state, ret;
+	int state;
 
 	/*
 	 * The PMU is initialized before CLK and VOLT, so we have to make sure the
@@ -136,11 +132,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	if (!clk || !volt)
 		goto resched;
 
-	ret = gk20a_pmu_dvfs_get_dev_status(pmu, &status);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get device status\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_dev_status(pmu, &status);
 
 	if (status.total)
 		utilization = div_u64((u64)status.busy * 100, status.total);
@@ -150,11 +142,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	nvkm_trace(subdev, "utilization = %d %%, avg_load = %d %%\n",
 		   utilization, data->avg_load);
 
-	ret = gk20a_pmu_dvfs_get_cur_state(pmu, &state);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get current state\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_cur_state(pmu, &state);
 
 	if (gk20a_pmu_dvfs_get_target_state(pmu, &state, data->avg_load)) {
 		nvkm_trace(subdev, "set new state to %d\n", state);
@@ -166,32 +154,36 @@ resched:
 	nvkm_timer_alarm(tmr, 100000000, alarm);
 }
 
-static int
-gk20a_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+static void
+gk20a_pmu_fini(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	nvkm_timer_alarm_cancel(subdev->device->timer, &pmu->alarm);
-	return 0;
-}
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	nvkm_timer_alarm_cancel(pmu->subdev.device->timer, &gpmu->alarm);
 
-static void *
-gk20a_pmu_dtor(struct nvkm_subdev *subdev)
-{
-	return gk20a_pmu(subdev);
+	nvkm_falcon_put(pmu->falcon, &pmu->subdev);
 }
 
 static int
-gk20a_pmu_init(struct nvkm_subdev *subdev)
+gk20a_pmu_init(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	struct nvkm_device *device = pmu->base.subdev.device;
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = pmu->subdev.device;
+	struct nvkm_falcon *falcon = pmu->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret) {
+		nvkm_error(subdev, "cannot acquire %s falcon!\n", falcon->name);
+		return ret;
+	}
 
 	/* init pwr perf counter */
-	nvkm_wr32(device, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
-	nvkm_wr32(device, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
-	nvkm_wr32(device, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
+	nvkm_falcon_wr32(falcon, 0x504 + (BUSY_SLOT * 0x10), 0x00200001);
+	nvkm_falcon_wr32(falcon, 0x50c + (BUSY_SLOT * 0x10), 0x00000002);
+	nvkm_falcon_wr32(falcon, 0x50c + (CLK_SLOT * 0x10), 0x00000003);
 
-	nvkm_timer_alarm(device->timer, 2000000000, &pmu->alarm);
+	nvkm_timer_alarm(device->timer, 2000000000, &gpmu->alarm);
 	return 0;
 }
 
@@ -202,26 +194,26 @@ gk20a_dvfs_data= {
 	.p_smooth = 1,
 };
 
-static const struct nvkm_subdev_func
+static const struct nvkm_pmu_func
 gk20a_pmu = {
 	.init = gk20a_pmu_init,
 	.fini = gk20a_pmu_fini,
-	.dtor = gk20a_pmu_dtor,
+	.reset = gt215_pmu_reset,
 };
 
 int
 gk20a_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
 {
-	static const struct nvkm_pmu_func func = {};
 	struct gk20a_pmu *pmu;
 
 	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	pmu->base.func = &func;
 	*ppmu = &pmu->base;
 
-	nvkm_subdev_ctor(&gk20a_pmu, device, index, &pmu->base.subdev);
+	nvkm_pmu_ctor(&gk20a_pmu, device, index, &pmu->base);
+
 	pmu->data = &gk20a_dvfs_data;
 	nvkm_alarm_init(&pmu->alarm, gk20a_pmu_dvfs_work);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
new file mode 100644
index 000000000000..0b8a1cc4a0ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+static const struct nvkm_pmu_func
+gm20b_pmu = {
+	.reset = gt215_pmu_reset,
+};
+
+int
+gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index 2e2179a4ad17..096cba069f72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -4,6 +4,8 @@
 #include <subdev/pmu.h>
 #include <subdev/pmu/fuc/os.h>
 
+int nvkm_pmu_ctor(const struct nvkm_pmu_func *, struct nvkm_device *,
+		  int index, struct nvkm_pmu *);
 int nvkm_pmu_new_(const struct nvkm_pmu_func *, struct nvkm_device *,
 		  int index, struct nvkm_pmu **);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
index b02b868a6589..5076d1500f47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -1,3 +1,7 @@
 nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o
+nvkm-y += nvkm/subdev/secboot/acr.o
+nvkm-y += nvkm/subdev/secboot/acr_r352.o
+nvkm-y += nvkm/subdev/secboot/acr_r361.o
 nvkm-y += nvkm/subdev/secboot/gm200.o
 nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
new file mode 100644
index 000000000000..75dc06557877
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr.h"
+
+#include <core/firmware.h>
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+void *
+nvkm_acr_load_firmware(const struct nvkm_subdev *subdev, const char *name,
+		       size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(subdev->device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
+			   name, min_size);
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
new file mode 100644
index 000000000000..97795b342b6f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_H__
+#define __NVKM_SECBOOT_ACR_H__
+
+#include "priv.h"
+
+struct nvkm_acr;
+
+/**
+ * struct nvkm_acr_func - properties and functions specific to an ACR
+ *
+ * @load: make the ACR ready to run on the given secboot device
+ * @reset: reset the specified falcon
+ * @start: start the specified falcon (assumed to have been reset)
+ */
+struct nvkm_acr_func {
+	void (*dtor)(struct nvkm_acr *);
+	int (*oneinit)(struct nvkm_acr *, struct nvkm_secboot *);
+	int (*fini)(struct nvkm_acr *, struct nvkm_secboot *, bool);
+	int (*load)(struct nvkm_acr *, struct nvkm_secboot *,
+		    struct nvkm_gpuobj *, u64);
+	int (*reset)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+	int (*start)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+};
+
+/**
+ * struct nvkm_acr - instance of an ACR
+ *
+ * @boot_falcon: ID of the falcon that will perform secure boot
+ * @managed_falcons: bitfield of falcons managed by this ACR
+ * @start_address: virtual start address of the HS bootloader
+ */
+struct nvkm_acr {
+	const struct nvkm_acr_func *func;
+	const struct nvkm_subdev *subdev;
+
+	enum nvkm_secboot_falcon boot_falcon;
+	unsigned long managed_falcons;
+	u32 start_address;
+};
+
+void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t);
+
+struct nvkm_acr *acr_r352_new(unsigned long);
+struct nvkm_acr *acr_r361_new(unsigned long);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
new file mode 100644
index 000000000000..1aa37ea18580
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <engine/falcon.h>
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct acr_r352_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r352_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 code_dma_base1;
+	u32 data_dma_base1;
+};
+
+/**
+ * acr_r352_generate_flcn_bl_desc - generate generic BL descriptor for LS image
+ */
+static void
+acr_r352_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r352_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &_img->ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = (base + pdesc->app_resident_code_offset) >> 8;
+	addr_data = (base + pdesc->app_resident_data_offset) >> 8;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(addr_code);
+	desc->code_dma_base1 = upper_32_bits(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = lower_32_bits(addr_data);
+	desc->data_dma_base1 = upper_32_bits(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+
+/*
+ * Low-secure blob creation
+ */
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+struct ls_ucode_img *
+acr_r352_ls_ucode_img_load(const struct acr_r352 *acr,
+			   enum nvkm_secboot_falcon falcon_id)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct ls_ucode_img_r352 *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	img->base.falcon_id = falcon_id;
+
+	ret = acr->func->ls_func[falcon_id]->load(subdev, &img->base);
+
+	if (ret) {
+		kfree(img->base.ucode_data);
+		kfree(img->base.sig);
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	/* Check that the signature size matches our expectations... */
+	if (img->base.sig_size != sizeof(img->lsb_header.signature)) {
+		nvkm_error(subdev, "invalid signature size for %s falcon!\n",
+			   nvkm_secboot_falcon_name[falcon_id]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Copy signature to the right place */
+	memcpy(&img->lsb_header.signature, img->base.sig, img->base.sig_size);
+
+	/* not needed? the signature should already have the right value */
+	img->lsb_header.signature.falcon_id = falcon_id;
+
+	return &img->base;
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * acr_r352_ls_img_fill_headers - fill the WPR and LSB headers of an image
+ * @acr:	ACR to use
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+acr_r352_ls_img_fill_headers(struct acr_r352 *acr,
+			     struct ls_ucode_img_r352 *img, u32 offset)
+{
+	struct ls_ucode_img *_img = &img->base;
+	struct acr_r352_lsf_wpr_header *whdr = &img->wpr_header;
+	struct acr_r352_lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_img_desc *desc = &_img->ucode_desc;
+	const struct acr_r352_ls_func *func =
+					    acr->func->ls_func[_img->falcon_id];
+
+	/* Fill WPR header */
+	whdr->falcon_id = _img->falcon_id;
+	whdr->bootstrap_owner = acr->base.boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Skip bootstrapping falcons started by someone else than ACR */
+	if (acr->lazy_bootstrap & BIT(_img->falcon_id))
+		whdr->lazy_bootstrap = 1;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(*lhdr);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += _img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = func->lhdr_flags;
+	if (_img->falcon_id == acr->base.boot_falcon)
+		lhdr->flags |= LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* Align and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(func->bl_desc_size, LSF_BL_DATA_SIZE_ALIGN);
+
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_fill_headers - fill WPR and LSB headers of all managed images
+ */
+int
+acr_r352_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs)
+{
+	struct ls_ucode_img_r352 *img;
+	struct list_head *l;
+	u32 count = 0;
+	u32 offset;
+
+	/* Count the number of images to manage */
+	list_for_each(l, imgs)
+		count++;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(img->wpr_header) * (count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, imgs, base.node) {
+		offset = acr_r352_ls_img_fill_headers(acr, img, offset);
+	}
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_write_wpr - write the WPR blob contents
+ */
+int
+acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs,
+		      struct nvkm_gpuobj *wpr_blob, u32 wpr_addr)
+{
+	struct ls_ucode_img *_img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(_img, imgs, node) {
+		struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+		const struct acr_r352_ls_func *ls_func =
+					    acr->func->ls_func[_img->falcon_id];
+		u8 gdesc[ls_func->bl_desc_size];
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		memset(gdesc, 0, ls_func->bl_desc_size);
+		ls_func->generate_bl_desc(&acr->base, _img, wpr_addr, gdesc);
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.bl_data_off,
+				      gdesc, ls_func->bl_desc_size);
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      _img->ucode_data, _img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * acr_r352_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct list_head imgs;
+	struct ls_ucode_img *img, *t;
+	unsigned long managed_falcons = acr->base.managed_falcons;
+	int managed_count = 0;
+	u32 image_wpr_size;
+	int falcon_id;
+	int ret;
+
+	INIT_LIST_HEAD(&imgs);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = acr->func->ls_ucode_img_load(acr, falcon_id);
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+
+		list_add_tail(&img->node, &imgs);
+		managed_count++;
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	image_wpr_size = acr->func->ls_fill_headers(acr, &imgs);
+	image_wpr_size = ALIGN(image_wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(subdev->device, image_wpr_size, WPR_ALIGNMENT,
+			      false, NULL, &acr->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    managed_count, image_wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (wpr_size == 0) {
+		wpr_addr = acr->ls_blob->addr;
+		wpr_size = image_wpr_size;
+	/*
+	 * But if the WPR region is set by the bootloader, it is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	} else if (image_wpr_size > wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", image_wpr_size);
+		nvkm_error(subdev, "available: %dB\n", wpr_size);
+		ret = -ENOSPC;
+		goto cleanup;
+	}
+
+	/* Write LS blob */
+	ret = acr->func->ls_write_wpr(acr, &imgs, acr->ls_blob, wpr_addr);
+	if (ret)
+		nvkm_gpuobj_del(&acr->ls_blob);
+
+cleanup:
+	list_for_each_entry_safe(img, t, &imgs, node) {
+		kfree(img->ucode_data);
+		kfree(img->sig);
+		kfree(img);
+	}
+
+	return ret;
+}
+
+
+
+
+/**
+ * acr_r352_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image)
+{
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	void *sig;
+	u32 sig_size;
+
+	/* Falcon in debug or production mode? */
+	if (sb->boot_falcon->debug) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
+}
+
+static void
+acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb,
+		       struct hsflcn_acr_desc *desc)
+{
+	struct nvkm_gpuobj *ls_blob = acr->ls_blob;
+
+	/* WPR region information if WPR is not fixed */
+	if (sb->wpr_size == 0) {
+		u32 wpr_start = ls_blob->addr;
+		u32 wpr_end = wpr_start + ls_blob->size;
+
+		desc->wpr_region_id = 1;
+		desc->regions.no_regions = 2;
+		desc->regions.region_props[0].start_addr = wpr_start >> 8;
+		desc->regions.region_props[0].end_addr = wpr_end >> 8;
+		desc->regions.region_props[0].region_id = 1;
+		desc->regions.region_props[0].read_mask = 0xf;
+		desc->regions.region_props[0].write_mask = 0xc;
+		desc->regions.region_props[0].client_mask = 0x2;
+	} else {
+		desc->ucode_blob_base = ls_blob->addr;
+		desc->ucode_blob_size = ls_blob->size;
+	}
+}
+
+static void
+acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			     u64 offset)
+{
+	struct acr_r352_flcn_bl_desc *bl_desc = _bl_desc;
+	u64 addr_code, addr_data;
+
+	addr_code = offset >> 8;
+	addr_data = (offset + hdr->data_dma_base) >> 8;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = lower_32_bits(addr_code);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = lower_32_bits(addr_data);
+	bl_desc->data_size = hdr->data_size;
+}
+
+/**
+ * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor
+ *
+ * @sb secure boot instance to prepare for
+ * @fw name of the HS firmware to load
+ * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
+ * @bl_desc pointer to the BL descriptor to write for this firmware
+ * @patch whether we should patch the HS descriptor (only for HS loaders)
+ */
+static int
+acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb,
+			 const char *fw, struct nvkm_gpuobj **blob,
+			 struct hsf_load_header *load_header, bool patch)
+{
+	struct nvkm_subdev *subdev = &sb->subdev;
+	void *acr_image;
+	struct fw_bin_header *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	struct hsf_load_header *load_hdr;
+	void *acr_data;
+	int ret;
+
+	acr_image = nvkm_acr_load_firmware(subdev, fw, 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	hsbin_hdr = acr_image;
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	acr_data = acr_image + hsbin_hdr->data_offset;
+
+	/* Patch signature */
+	acr_r352_hsf_patch_signature(sb, acr_image);
+
+	/* Patch descriptor with WPR information? */
+	if (patch) {
+		struct hsflcn_acr_desc *desc;
+
+		desc = acr_data + load_hdr->data_dma_base;
+		acr_r352_fixup_hs_desc(acr, sb, desc);
+	}
+
+	if (load_hdr->num_apps > ACR_R352_MAX_APPS) {
+		nvkm_error(subdev, "more apps (%d) than supported (%d)!",
+			   load_hdr->num_apps, ACR_R352_MAX_APPS);
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	memcpy(load_header, load_hdr, sizeof(*load_header) +
+			       (sizeof(load_hdr->app[0]) * load_hdr->num_apps));
+
+	/* Create ACR blob and copy HS data to it */
+	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
+			      0x1000, false, NULL, blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(*blob);
+	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
+	nvkm_done(*blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+static int
+acr_r352_prepare_hsbl_blob(struct acr_r352 *acr)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct fw_bin_header *hdr;
+	struct fw_bl_desc *hsbl_desc;
+
+	acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0);
+	if (IS_ERR(acr->hsbl_blob)) {
+		int ret = PTR_ERR(acr->hsbl_blob);
+
+		acr->hsbl_blob = NULL;
+		return ret;
+	}
+
+	hdr = acr->hsbl_blob;
+	hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+
+	/* virtual start address for boot vector */
+	acr->base.start_address = hsbl_desc->start_tag << 8;
+
+	return 0;
+}
+
+/**
+ * acr_r352_load_blobs - load blobs common to all ACR V1 versions.
+ *
+ * This includes the LS blob, HS ucode loading blob, and HS bootloader.
+ *
+ * The HS ucode unload blob is only used on dGPU if the WPR region is variable.
+ */
+int
+acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	/* Firmware already loaded? */
+	if (acr->firmware_ok)
+		return 0;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = acr_r352_prepare_ls_blob(acr, sb->wpr_addr, sb->wpr_size);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware that will load the LS firmwares */
+	if (!acr->load_blob) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_load",
+					       &acr->load_blob,
+					       &acr->load_bl_header, true);
+		if (ret)
+			return ret;
+	}
+
+	/* If the ACR region is dynamically programmed, we need an unload FW */
+	if (sb->wpr_size == 0) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_unload",
+					       &acr->unload_blob,
+					       &acr->unload_bl_header, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!acr->hsbl_blob) {
+		ret = acr_r352_prepare_hsbl_blob(acr);
+		if (ret)
+			return ret;
+	}
+
+	acr->firmware_ok = true;
+	nvkm_debug(&sb->subdev, "LS blob successfully created\n");
+
+	return 0;
+}
+
+/**
+ * acr_r352_load() - prepare HS falcon to run the specified blob, mapped
+ * at GPU address offset.
+ */
+static int
+acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	      struct nvkm_gpuobj *blob, u64 offset)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	struct nvkm_falcon *falcon = sb->boot_falcon;
+	struct fw_bin_header *hdr = acr->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+	void *blob_data = acr->hsbl_blob + hdr->data_offset;
+	void *hsbl_code = blob_data + hsbl_desc->code_off;
+	void *hsbl_data = blob_data + hsbl_desc->data_off;
+	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
+	const struct hsf_load_header *load_hdr;
+	const u32 bl_desc_size = acr->func->hs_bl_desc_size;
+	u8 bl_desc[bl_desc_size];
+
+	/* Find the bootloader descriptor for our blob and copy it */
+	if (blob == acr->load_blob) {
+		load_hdr = &acr->load_bl_header;
+	} else if (blob == acr->unload_blob) {
+		load_hdr = &acr->unload_bl_header;
+	} else {
+		nvkm_error(_acr->subdev, "invalid secure boot blob!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy HS bootloader data
+	 */
+	nvkm_falcon_load_dmem(falcon, hsbl_data, 0x0, hsbl_desc->data_size, 0);
+
+	/* Copy HS bootloader code to end of IMEM */
+	nvkm_falcon_load_imem(falcon, hsbl_code, falcon->code.limit - code_size,
+			      code_size, hsbl_desc->start_tag, 0, false);
+
+	/* Generate the BL header */
+	memset(bl_desc, 0, bl_desc_size);
+	acr->func->generate_hs_bl_desc(load_hdr, bl_desc, offset);
+
+	/*
+	 * Copy HS BL header where the HS descriptor expects it to be
+	 */
+	nvkm_falcon_load_dmem(falcon, bl_desc, hsbl_desc->dmem_load_off,
+			      bl_desc_size, 0);
+
+	return 0;
+}
+
+static int
+acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int i;
+
+	/* Run the unload blob to unprotect the WPR region */
+	if (acr->unload_blob && sb->wpr_set) {
+		int ret;
+
+		nvkm_debug(&sb->subdev, "running HS unload blob\n");
+		ret = sb->func->run_blob(sb, acr->unload_blob);
+		if (ret)
+			return ret;
+		nvkm_debug(&sb->subdev, "HS unload blob completed\n");
+	}
+
+	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
+		acr->falcon_state[i] = NON_SECURE;
+
+	sb->wpr_set = false;
+
+	return 0;
+}
+
+static int
+acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	if (sb->wpr_set)
+		return 0;
+
+	/* Make sure all blobs are ready */
+	ret = acr_r352_load_blobs(acr, sb);
+	if (ret)
+		return ret;
+
+	nvkm_debug(&sb->subdev, "running HS load blob\n");
+	ret = sb->func->run_blob(sb, acr->load_blob);
+	/* clear halt interrupt */
+	nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10);
+	if (ret)
+		return ret;
+	nvkm_debug(&sb->subdev, "HS load blob completed\n");
+
+	sb->wpr_set = true;
+
+	return 0;
+}
+
+/*
+ * acr_r352_reset() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+static int
+acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	       enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	int ret;
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed and started
+	 * together, this ought to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcon != NVKM_SECBOOT_FALCON_FECS)
+		goto end;
+
+	ret = acr_r352_shutdown(acr, sb);
+	if (ret)
+		return ret;
+
+	acr_r352_bootstrap(acr, sb);
+	if (ret)
+		return ret;
+
+end:
+	acr->falcon_state[falcon] = RESET;
+	return 0;
+}
+
+static int
+acr_r352_start(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+		    enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	const struct nvkm_subdev *subdev = &sb->subdev;
+	int base;
+
+	switch (falcon) {
+	case NVKM_SECBOOT_FALCON_FECS:
+		base = 0x409000;
+		break;
+	case NVKM_SECBOOT_FALCON_GPCCS:
+		base = 0x41a000;
+		break;
+	default:
+		nvkm_error(subdev, "cannot start unhandled falcon!\n");
+		return -EINVAL;
+	}
+
+	nvkm_wr32(subdev->device, base + 0x130, 0x00000002);
+	acr->falcon_state[falcon] = RUNNING;
+
+	return 0;
+}
+
+static int
+acr_r352_fini(struct nvkm_acr *_acr, struct nvkm_secboot *sb, bool suspend)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	return acr_r352_shutdown(acr, sb);
+}
+
+static void
+acr_r352_dtor(struct nvkm_acr *_acr)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	nvkm_gpuobj_del(&acr->unload_blob);
+
+	kfree(acr->hsbl_blob);
+	nvkm_gpuobj_del(&acr->load_blob);
+	nvkm_gpuobj_del(&acr->ls_blob);
+
+	kfree(acr);
+}
+
+const struct acr_r352_ls_func
+acr_r352_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r352_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r352_func = {
+	.generate_hs_bl_desc = acr_r352_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r352_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r352_ls_gpccs_func,
+	},
+};
+
+static const struct nvkm_acr_func
+acr_r352_base_func = {
+	.dtor = acr_r352_dtor,
+	.fini = acr_r352_fini,
+	.load = acr_r352_load,
+	.reset = acr_r352_reset,
+	.start = acr_r352_start,
+};
+
+struct nvkm_acr *
+acr_r352_new_(const struct acr_r352_func *func,
+	      enum nvkm_secboot_falcon boot_falcon,
+	      unsigned long managed_falcons)
+{
+	struct acr_r352 *acr;
+
+	acr = kzalloc(sizeof(*acr), GFP_KERNEL);
+	if (!acr)
+		return ERR_PTR(-ENOMEM);
+
+	acr->base.boot_falcon = boot_falcon;
+	acr->base.managed_falcons = managed_falcons;
+	acr->base.func = &acr_r352_base_func;
+	acr->func = func;
+
+	return &acr->base;
+}
+
+struct nvkm_acr *
+acr_r352_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r352_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
new file mode 100644
index 000000000000..ad5923b0fd3c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_R352_H__
+#define __NVKM_SECBOOT_ACR_R352_H__
+
+#include "acr.h"
+#include "ls_ucode.h"
+
+struct ls_ucode_img;
+
+#define ACR_R352_MAX_APPS 8
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct acr_r352_lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct acr_r352_lsf_lsb_header {
+	/**
+	 * LS falcon signatures
+	 * @prd_keys:		signature to use in production mode
+	 * @dgb_keys:		signature to use in debug mode
+	 * @b_prd_present:	whether the production key is present
+	 * @b_dgb_present:	whether the debug key is present
+	 * @falcon_id:		ID of the falcon the ucode applies to
+	 */
+	struct {
+		u8 prd_keys[2][16];
+		u8 dbg_keys[2][16];
+		u32 b_prd_present;
+		u32 b_dbg_present;
+		u32 falcon_id;
+	} signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct acr_r352_lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct acr_r352_lsf_wpr_header {
+	u32 falcon_id;
+	u32 lsb_offset;
+	u32 bootstrap_owner;
+	u32 lazy_bootstrap;
+	u32 status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * struct ls_ucode_img_r352 - ucode image augmented with r352 headers
+ */
+struct ls_ucode_img_r352 {
+	struct ls_ucode_img base;
+
+	struct acr_r352_lsf_wpr_header wpr_header;
+	struct acr_r352_lsf_lsb_header lsb_header;
+};
+#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base)
+
+
+/*
+ * HS blob structures
+ */
+
+struct hsf_load_header_app {
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware load header
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 num_apps;
+	struct hsf_load_header_app app[0];
+};
+
+/**
+ * struct acr_r352_ls_func - manages a single LS firmware
+ *
+ * @load: load the external firmware into a ls_ucode_img
+ * @generate_bl_desc: function called on a block of bl_desc_size to generate the
+ *		      proper bootloader descriptor for this LS firmware
+ * @bl_desc_size: size of the bootloader descriptor
+ * @lhdr_flags: LS flags
+ */
+struct acr_r352_ls_func {
+	int (*load)(const struct nvkm_subdev *, struct ls_ucode_img *);
+	void (*generate_bl_desc)(const struct nvkm_acr *,
+				 const struct ls_ucode_img *, u64, void *);
+	u32 bl_desc_size;
+	u32 lhdr_flags;
+};
+
+struct acr_r352;
+
+/**
+ * struct acr_r352_func - manages nuances between ACR versions
+ *
+ * @generate_hs_bl_desc: function called on a block of bl_desc_size to generate
+ *			 the proper HS bootloader descriptor
+ * @hs_bl_desc_size: size of the HS bootloader descriptor
+ */
+struct acr_r352_func {
+	void (*generate_hs_bl_desc)(const struct hsf_load_header *, void *,
+				    u64);
+	u32 hs_bl_desc_size;
+
+	struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *,
+						  enum nvkm_secboot_falcon);
+	int (*ls_fill_headers)(struct acr_r352 *, struct list_head *);
+	int (*ls_write_wpr)(struct acr_r352 *, struct list_head *,
+			    struct nvkm_gpuobj *, u32);
+
+	const struct acr_r352_ls_func *ls_func[NVKM_SECBOOT_FALCON_END];
+};
+
+/**
+ * struct acr_r352 - ACR data for driver release 352 (and beyond)
+ */
+struct acr_r352 {
+	struct nvkm_acr base;
+	const struct acr_r352_func *func;
+
+	/*
+	 * HS FW - lock WPR region (dGPU only) and load LS FWs
+	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
+	 */
+	struct nvkm_gpuobj *load_blob;
+	struct {
+		struct hsf_load_header load_bl_header;
+		struct hsf_load_header_app __load_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS FW - unlock WPR region (dGPU only) */
+	struct nvkm_gpuobj *unload_blob;
+	struct {
+		struct hsf_load_header unload_bl_header;
+		struct hsf_load_header_app __unload_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* Firmware already loaded? */
+	bool firmware_ok;
+
+	/* Falcons to lazy-bootstrap */
+	u32 lazy_bootstrap;
+
+	/* To keep track of the state of all managed falcons */
+	enum {
+		/* In non-secure state, no firmware loaded, no privileges*/
+		NON_SECURE = 0,
+		/* In low-secure mode and ready to be started */
+		RESET,
+		/* In low-secure mode and running */
+		RUNNING,
+	} falcon_state[NVKM_SECBOOT_FALCON_END];
+};
+#define acr_r352(acr) container_of(acr, struct acr_r352, base)
+
+struct nvkm_acr *acr_r352_new_(const struct acr_r352_func *,
+			       enum nvkm_secboot_falcon, unsigned long);
+
+struct ls_ucode_img *acr_r352_ls_ucode_img_load(const struct acr_r352 *,
+						enum nvkm_secboot_falcon);
+int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *);
+int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *,
+			  struct nvkm_gpuobj *, u32);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
new file mode 100644
index 000000000000..f0aff1d98474
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <engine/falcon.h>
+
+/**
+ * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r361_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+static void
+acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r361_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &img->base.ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = base + pdesc->app_resident_code_offset;
+	addr_data = base + pdesc->app_resident_data_offset;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = u64_to_flcn64(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = u64_to_flcn64(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+static void
+acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			    u64 offset)
+{
+	struct acr_r361_flcn_bl_desc *bl_desc = _bl_desc;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = u64_to_flcn64(offset);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base);
+	bl_desc->data_size = hdr->data_size;
+}
+
+const struct acr_r352_ls_func
+acr_r361_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r361_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r361_func = {
+	.generate_hs_bl_desc = acr_r361_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func,
+	},
+};
+
+struct nvkm_acr *
+acr_r361_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r361_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 314be2192b7d..27c9dfffb9a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,184 +19,108 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switching a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
+ * load them, and switch managed falcons into LS mode. Once secure boot
+ * completes, no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
 #include "priv.h"
+#include "acr.h"
 
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <subdev/pmu.h>
 
-static const char *
-managed_falcons_names[] = {
+const char *
+nvkm_secboot_falcon_name[] = {
 	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
 	[NVKM_SECBOOT_FALCON_RESERVED] = "<reserved>",
 	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
 	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
 	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
 };
-
-/*
- * Helper falcon functions
- */
-
-static int
-falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	/* clear halt interrupt */
-	nvkm_mask(device, base + 0x004, 0x10, 0x10);
-	/* wait until halt interrupt is cleared */
-	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-falcon_wait_idle(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* enable engine */
-	nvkm_mc_enable(device, sb->devidx);
-	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
-	if (ret < 0) {
-		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
-		nvkm_mc_disable(device, sb->devidx);
-		return ret;
-	}
-
-	ret = falcon_wait_idle(device, sb->base);
-	if (ret)
-		return ret;
-
-	/* enable IRQs */
-	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mc_intr_mask(device, sb->devidx, true);
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-
-	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mc_intr_mask(device, sb->devidx, false);
-	nvkm_wr32(device, sb->base + 0x014, 0xff);
-
-	falcon_wait_idle(device, sb->base);
-
-	/* disable engine */
-	nvkm_mc_disable(device, sb->devidx);
-
-	return 0;
-}
-
-int
-nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
-{
-	int ret;
-
-	ret = nvkm_secboot_falcon_disable(sb);
-	if (ret)
-		return ret;
-
-	ret = nvkm_secboot_falcon_enable(sb);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-/**
- * nvkm_secboot_falcon_run - run the falcon that will perform secure boot
- *
- * This function is to be called after all chip-specific preparations have
- * been completed. It will start the falcon to perform secure boot, wait for
- * it to halt, and report if an error occurred.
- */
-int
-nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* Start falcon */
-	nvkm_wr32(device, sb->base + 0x100, 0x2);
-
-	/* Wait for falcon halt */
-	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
-	if (ret < 0)
-		return ret;
-
-	/* If mailbox register contains an error code, then ACR has failed */
-	ret = nvkm_rd32(device, sb->base + 0x040);
-	if (ret) {
-		nvkm_error(&sb->subdev, "ACR boot failed, ret 0x%08x", ret);
-		falcon_clear_halt_interrupt(device, sb->base);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
 /**
  * nvkm_secboot_reset() - reset specified falcon
  */
 int
-nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcon)
+nvkm_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 {
 	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+	if (!(BIT(falcon) & sb->acr->managed_falcons)) {
 		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
 		return -EINVAL;
 	}
 
-	return sb->func->reset(sb, falcon);
-}
-
-/**
- * nvkm_secboot_start() - start specified falcon
- */
-int
-nvkm_secboot_start(struct nvkm_secboot *sb, u32 falcon)
-{
-	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
-		nvkm_error(&sb->subdev, "cannot start unmanaged falcon!\n");
-		return -EINVAL;
-	}
-
-	return sb->func->start(sb, falcon);
+	return sb->acr->func->reset(sb->acr, sb, falcon);
 }
 
 /**
  * nvkm_secboot_is_managed() - check whether a given falcon is securely-managed
  */
 bool
-nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
-			enum nvkm_secboot_falcon fid)
+nvkm_secboot_is_managed(struct nvkm_secboot *sb, enum nvkm_secboot_falcon fid)
 {
-	if (!secboot)
+	if (!sb)
 		return false;
 
-	return secboot->func->managed_falcons & BIT(fid);
+	return sb->acr->managed_falcons & BIT(fid);
 }
 
 static int
@@ -205,9 +129,19 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 	struct nvkm_secboot *sb = nvkm_secboot(subdev);
 	int ret = 0;
 
+	switch (sb->acr->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->boot_falcon = subdev->device->pmu->falcon;
+		break;
+	default:
+		nvkm_error(subdev, "Unmanaged boot falcon %s!\n",
+			                nvkm_secboot_falcon_name[sb->acr->boot_falcon]);
+		return -EINVAL;
+	}
+
 	/* Call chip-specific init function */
-	if (sb->func->init)
-		ret = sb->func->init(sb);
+	if (sb->func->oneinit)
+		ret = sb->func->oneinit(sb);
 	if (ret) {
 		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
 			   ret);
@@ -249,7 +183,7 @@ nvkm_secboot = {
 };
 
 int
-nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func, struct nvkm_acr *acr,
 		  struct nvkm_device *device, int index,
 		  struct nvkm_secboot *sb)
 {
@@ -257,22 +191,14 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 
 	nvkm_subdev_ctor(&nvkm_secboot, device, index, &sb->subdev);
 	sb->func = func;
-
-	/* setup the performing falcon's base address and masks */
-	switch (func->boot_falcon) {
-	case NVKM_SECBOOT_FALCON_PMU:
-		sb->devidx = NVKM_SUBDEV_PMU;
-		sb->base = 0x10a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
-		return -EINVAL;
-	};
+	sb->acr = acr;
+	acr->subdev = &sb->subdev;
 
 	nvkm_debug(&sb->subdev, "securely managed falcons:\n");
-	for_each_set_bit(fid, &sb->func->managed_falcons,
+	for_each_set_bit(fid, &sb->acr->managed_falcons,
 			 NVKM_SECBOOT_FALCON_END)
-		nvkm_debug(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+		nvkm_debug(&sb->subdev, "- %s\n",
+			   nvkm_secboot_falcon_name[fid]);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index ec48e4ace37a..813c4eb0b25f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -20,1313 +20,84 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Secure boot is the process by which NVIDIA-signed firmware is loaded into
- * some of the falcons of a GPU. For production devices this is the only way
- * for the firmware to access useful (but sensitive) registers.
- *
- * A Falcon microprocessor supporting advanced security modes can run in one of
- * three modes:
- *
- * - Non-secure (NS). In this mode, functionality is similar to Falcon
- *   architectures before security modes were introduced (pre-Maxwell), but
- *   capability is restricted. In particular, certain registers may be
- *   inaccessible for reads and/or writes, and physical memory access may be
- *   disabled (on certain Falcon instances). This is the only possible mode that
- *   can be used if you don't have microcode cryptographically signed by NVIDIA.
- *
- * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
- *   not possible to read or write any Falcon internal state or Falcon registers
- *   from outside the Falcon (for example, from the host system). The only way
- *   to enable this mode is by loading microcode that has been signed by NVIDIA.
- *   (The loading process involves tagging the IMEM block as secure, writing the
- *   signature into a Falcon register, and starting execution. The hardware will
- *   validate the signature, and if valid, grant HS privileges.)
- *
- * - Light Secure (LS). In this mode, the microprocessor has more privileges
- *   than NS but fewer than HS. Some of the microprocessor state is visible to
- *   host software to ease debugging. The only way to enable this mode is by HS
- *   microcode enabling LS mode. Some privileges available to HS mode are not
- *   available here. LS mode is introduced in GM20x.
- *
- * Secure boot consists in temporarily switching a HS-capable falcon (typically
- * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
- * load them, and switch managed falcons into LS mode. Once secure boot
- * completes, no falcon remains in HS mode.
- *
- * Secure boot requires a write-protected memory region (WPR) which can only be
- * written by the secure falcon. On dGPU, the driver sets up the WPR region in
- * video memory. On Tegra, it is set up by the bootloader and its location and
- * size written into memory controller registers.
- *
- * The secure boot process takes place as follows:
- *
- * 1) A LS blob is constructed that contains all the LS firmwares we want to
- *    load, along with their signatures and bootloaders.
- *
- * 2) A HS blob (also called ACR) is created that contains the signed HS
- *    firmware in charge of loading the LS firmwares into their respective
- *    falcons.
- *
- * 3) The HS blob is loaded (via its own bootloader) and executed on the
- *    HS-capable falcon. It authenticates itself, switches the secure falcon to
- *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
- *    LS blob into the WPR region (Tegra).
- *
- * 4) The LS blob is now secure from all external tampering. The HS falcon
- *    checks the signatures of the LS firmwares and, if valid, switches the
- *    managed falcons to LS mode and makes them ready to run the LS firmware.
- *
- * 5) The managed falcons remain in LS mode and can be started.
- *
- */
 
-#include "priv.h"
+#include "acr.h"
+#include "gm200.h"
 
 #include <core/gpuobj.h>
-#include <core/firmware.h>
 #include <subdev/fb.h>
-
-enum {
-	FALCON_DMAIDX_UCODE		= 0,
-	FALCON_DMAIDX_VIRT		= 1,
-	FALCON_DMAIDX_PHYS_VID		= 2,
-	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
-	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
-};
-
-/**
- * struct fw_bin_header - header of firmware files
- * @bin_magic:		always 0x3b1d14f0
- * @bin_ver:		version of the bin format
- * @bin_size:		entire image size including this header
- * @header_offset:	offset of the firmware/bootloader header in the file
- * @data_offset:	offset of the firmware/bootloader payload in the file
- * @data_size:		size of the payload
- *
- * This header is located at the beginning of the HS firmware and HS bootloader
- * files, to describe where the headers and data can be found.
- */
-struct fw_bin_header {
-	u32 bin_magic;
-	u32 bin_ver;
-	u32 bin_size;
-	u32 header_offset;
-	u32 data_offset;
-	u32 data_size;
-};
-
-/**
- * struct fw_bl_desc - firmware bootloader descriptor
- * @start_tag:		starting tag of bootloader
- * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
- * @code_off:		offset of code section
- * @code_size:		size of code section
- * @data_off:		offset of data section
- * @data_size:		size of data section
- *
- * This structure is embedded in bootloader firmware files at to describe the
- * IMEM and DMEM layout expected by the bootloader.
- */
-struct fw_bl_desc {
-	u32 start_tag;
-	u32 dmem_load_off;
-	u32 code_off;
-	u32 code_size;
-	u32 data_off;
-	u32 data_size;
-};
-
-
-/*
- *
- * LS blob structures
- *
- */
-
-/**
- * struct lsf_ucode_desc - LS falcon signatures
- * @prd_keys:		signature to use when the GPU is in production mode
- * @dgb_keys:		signature to use when the GPU is in debug mode
- * @b_prd_present:	whether the production key is present
- * @b_dgb_present:	whether the debug key is present
- * @falcon_id:		ID of the falcon the ucode applies to
- *
- * Directly loaded from a signature file.
- */
-struct lsf_ucode_desc {
-	u8  prd_keys[2][16];
-	u8  dbg_keys[2][16];
-	u32 b_prd_present;
-	u32 b_dbg_present;
-	u32 falcon_id;
-};
-
-/**
- * struct lsf_lsb_header - LS firmware header
- * @signature:		signature to verify the firmware against
- * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
- *                      blob contains the bootloader, code and data of the
- *                      LS falcon
- * @ucode_size:		size of the ucode blob, including bootloader
- * @data_size:		size of the ucode blob data
- * @bl_code_size:	size of the bootloader code
- * @bl_imem_off:	offset in imem of the bootloader
- * @bl_data_off:	offset of the bootloader data in WPR region
- * @bl_data_size:	size of the bootloader data
- * @app_code_off:	offset of the app code relative to ucode_off
- * @app_code_size:	size of the app code
- * @app_data_off:	offset of the app data relative to ucode_off
- * @app_data_size:	size of the app data
- * @flags:		flags for the secure bootloader
- *
- * This structure is written into the WPR region for each managed falcon. Each
- * instance is referenced by the lsb_offset member of the corresponding
- * lsf_wpr_header.
- */
-struct lsf_lsb_header {
-	struct lsf_ucode_desc signature;
-	u32 ucode_off;
-	u32 ucode_size;
-	u32 data_size;
-	u32 bl_code_size;
-	u32 bl_imem_off;
-	u32 bl_data_off;
-	u32 bl_data_size;
-	u32 app_code_off;
-	u32 app_code_size;
-	u32 app_data_off;
-	u32 app_data_size;
-	u32 flags;
-#define LSF_FLAG_LOAD_CODE_AT_0		1
-#define LSF_FLAG_DMACTL_REQ_CTX		4
-#define LSF_FLAG_FORCE_PRIV_LOAD	8
-};
-
-/**
- * struct lsf_wpr_header - LS blob WPR Header
- * @falcon_id:		LS falcon ID
- * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
- * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
- * @lazy_bootstrap:	skip bootstrapping by ACR
- * @status:		bootstrapping status
- *
- * An array of these is written at the beginning of the WPR region, one for
- * each managed falcon. The array is terminated by an instance which falcon_id
- * is LSF_FALCON_ID_INVALID.
- */
-struct lsf_wpr_header {
-	u32  falcon_id;
-	u32  lsb_offset;
-	u32  bootstrap_owner;
-	u32  lazy_bootstrap;
-	u32  status;
-#define LSF_IMAGE_STATUS_NONE				0
-#define LSF_IMAGE_STATUS_COPY				1
-#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
-#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
-#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
-#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
-#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
-};
-
-
-/**
- * struct ls_ucode_img_desc - descriptor of firmware image
- * @descriptor_size:		size of this descriptor
- * @image_size:			size of the whole image
- * @bootloader_start_offset:	start offset of the bootloader in ucode image
- * @bootloader_size:		size of the bootloader
- * @bootloader_imem_offset:	start off set of the bootloader in IMEM
- * @bootloader_entry_point:	entry point of the bootloader in IMEM
- * @app_start_offset:		start offset of the LS firmware
- * @app_size:			size of the LS firmware's code and data
- * @app_imem_offset:		offset of the app in IMEM
- * @app_imem_entry:		entry point of the app in IMEM
- * @app_dmem_offset:		offset of the data in DMEM
- * @app_resident_code_offset:	offset of app code from app_start_offset
- * @app_resident_code_size:	size of the code
- * @app_resident_data_offset:	offset of data from app_start_offset
- * @app_resident_data_size:	size of data
- *
- * A firmware image contains the code, data, and bootloader of a given LS
- * falcon in a single blob. This structure describes where everything is.
- *
- * This can be generated from a (bootloader, code, data) set if they have
- * been loaded separately, or come directly from a file.
- */
-struct ls_ucode_img_desc {
-	u32 descriptor_size;
-	u32 image_size;
-	u32 tools_version;
-	u32 app_version;
-	char date[64];
-	u32 bootloader_start_offset;
-	u32 bootloader_size;
-	u32 bootloader_imem_offset;
-	u32 bootloader_entry_point;
-	u32 app_start_offset;
-	u32 app_size;
-	u32 app_imem_offset;
-	u32 app_imem_entry;
-	u32 app_dmem_offset;
-	u32 app_resident_code_offset;
-	u32 app_resident_code_size;
-	u32 app_resident_data_offset;
-	u32 app_resident_data_size;
-	u32 nb_overlays;
-	struct {u32 start; u32 size; } load_ovl[64];
-	u32 compressed;
-};
-
-/**
- * struct ls_ucode_img - temporary storage for loaded LS firmwares
- * @node:		to link within lsf_ucode_mgr
- * @falcon_id:		ID of the falcon this LS firmware is for
- * @ucode_desc:		loaded or generated map of ucode_data
- * @ucode_header:	header of the firmware
- * @ucode_data:		firmware payload (code and data)
- * @ucode_size:		size in bytes of data in ucode_data
- * @wpr_header:		WPR header to be written to the LS blob
- * @lsb_header:		LSB header to be written to the LS blob
- *
- * Preparing the WPR LS blob requires information about all the LS firmwares
- * (size, etc) to be known. This structure contains all the data of one LS
- * firmware.
- */
-struct ls_ucode_img {
-	struct list_head node;
-	enum nvkm_secboot_falcon falcon_id;
-
-	struct ls_ucode_img_desc ucode_desc;
-	u32 *ucode_header;
-	u8 *ucode_data;
-	u32 ucode_size;
-
-	struct lsf_wpr_header wpr_header;
-	struct lsf_lsb_header lsb_header;
-};
-
-/**
- * struct ls_ucode_mgr - manager for all LS falcon firmwares
- * @count:	number of managed LS falcons
- * @wpr_size:	size of the required WPR region in bytes
- * @img_list:	linked list of lsf_ucode_img
- */
-struct ls_ucode_mgr {
-	u16 count;
-	u32 wpr_size;
-	struct list_head img_list;
-};
-
-
-/*
- *
- * HS blob structures
- *
- */
-
-/**
- * struct hsf_fw_header - HS firmware descriptor
- * @sig_dbg_offset:	offset of the debug signature
- * @sig_dbg_size:	size of the debug signature
- * @sig_prod_offset:	offset of the production signature
- * @sig_prod_size:	size of the production signature
- * @patch_loc:		offset of the offset (sic) of where the signature is
- * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
- * @hdr_offset:		offset of the load header (see struct hs_load_header)
- * @hdr_size:		size of above header
- *
- * This structure is embedded in the HS firmware image at
- * hs_bin_hdr.header_offset.
- */
-struct hsf_fw_header {
-	u32 sig_dbg_offset;
-	u32 sig_dbg_size;
-	u32 sig_prod_offset;
-	u32 sig_prod_size;
-	u32 patch_loc;
-	u32 patch_sig;
-	u32 hdr_offset;
-	u32 hdr_size;
-};
-
-/**
- * struct hsf_load_header - HS firmware load header
- */
-struct hsf_load_header {
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 data_dma_base;
-	u32 data_size;
-	u32 num_apps;
-	struct {
-		u32 sec_code_off;
-		u32 sec_code_size;
-	} app[0];
-};
-
-/**
- * Convenience function to duplicate a firmware file in memory and check that
- * it has the required minimum size.
- */
-static void *
-gm200_secboot_load_firmware(struct nvkm_subdev *subdev, const char *name,
-		    size_t min_size)
-{
-	const struct firmware *fw;
-	void *blob;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev->device, name, &fw);
-	if (ret)
-		return ERR_PTR(ret);
-	if (fw->size < min_size) {
-		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
-			   name, min_size);
-		nvkm_firmware_put(fw);
-		return ERR_PTR(-EINVAL);
-	}
-	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
-	nvkm_firmware_put(fw);
-	if (!blob)
-		return ERR_PTR(-ENOMEM);
-
-	return blob;
-}
-
-
-/*
- * Low-secure blob creation
- */
-
-#define BL_DESC_BLK_SIZE 256
-/**
- * Build a ucode image and descriptor from provided bootloader, code and data.
- *
- * @bl:		bootloader image, including 16-bytes descriptor
- * @code:	LS firmware code segment
- * @data:	LS firmware data segment
- * @desc:	ucode descriptor to be written
- *
- * Return: allocated ucode image with corresponding descriptor information. desc
- *         is also updated to contain the right offsets within returned image.
- */
-static void *
-ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
-		   const struct firmware *data, struct ls_ucode_img_desc *desc)
-{
-	struct fw_bin_header *bin_hdr = (void *)bl->data;
-	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
-	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
-	u32 pos = 0;
-	void *image;
-
-	desc->bootloader_start_offset = pos;
-	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
-	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
-	desc->bootloader_entry_point = bl_desc->start_tag * 256;
-
-	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
-	desc->app_start_offset = pos;
-	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
-			 ALIGN(data->size, BL_DESC_BLK_SIZE);
-	desc->app_imem_offset = 0;
-	desc->app_imem_entry = 0;
-	desc->app_dmem_offset = 0;
-	desc->app_resident_code_offset = 0;
-	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
-
-	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
-	desc->app_resident_data_offset = pos - desc->app_start_offset;
-	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
-
-	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
-			   desc->app_size;
-
-	image = kzalloc(desc->image_size, GFP_KERNEL);
-	if (!image)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(image + desc->bootloader_start_offset, bl_data,
-	       bl_desc->code_size);
-	memcpy(image + desc->app_start_offset, code->data, code->size);
-	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
-	       data->data, data->size);
-
-	return image;
-}
-
-/**
- * ls_ucode_img_load_generic() - load and prepare a LS ucode image
- *
- * Load the LS microcode, bootloader and signature and pack them into a single
- * blob. Also generate the corresponding ucode descriptor.
- */
-static int
-ls_ucode_img_load_generic(struct nvkm_subdev *subdev,
-			  struct ls_ucode_img *img, const char *falcon_name,
-			  const u32 falcon_id)
-{
-	const struct firmware *bl, *code, *data;
-	struct lsf_ucode_desc *lsf_desc;
-	char f[64];
-	int ret;
-
-	img->ucode_header = NULL;
-
-	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &bl);
-	if (ret)
-		goto error;
-
-	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &code);
-	if (ret)
-		goto free_bl;
-
-	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &data);
-	if (ret)
-		goto free_inst;
-
-	img->ucode_data = ls_ucode_img_build(bl, code, data,
-					     &img->ucode_desc);
-	if (IS_ERR(img->ucode_data)) {
-		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
-	}
-	img->ucode_size = img->ucode_desc.image_size;
-
-	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
-	lsf_desc = gm200_secboot_load_firmware(subdev, f, sizeof(*lsf_desc));
-	if (IS_ERR(lsf_desc)) {
-		ret = PTR_ERR(lsf_desc);
-		goto free_image;
-	}
-	/* not needed? the signature should already have the right value */
-	lsf_desc->falcon_id = falcon_id;
-	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
-	img->falcon_id = lsf_desc->falcon_id;
-	kfree(lsf_desc);
-
-	/* success path - only free requested firmware files */
-	goto free_data;
-
-free_image:
-	kfree(img->ucode_data);
-free_data:
-	nvkm_firmware_put(data);
-free_inst:
-	nvkm_firmware_put(code);
-free_bl:
-	nvkm_firmware_put(bl);
-error:
-	return ret;
-}
-
-typedef int (*lsf_load_func)(struct nvkm_subdev *, struct ls_ucode_img *);
-
-static int
-ls_ucode_img_load_fecs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "fecs",
-					 NVKM_SECBOOT_FALCON_FECS);
-}
-
-static int
-ls_ucode_img_load_gpccs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "gpccs",
-					 NVKM_SECBOOT_FALCON_GPCCS);
-}
-
-/**
- * ls_ucode_img_load() - create a lsf_ucode_img and load it
- */
-static struct ls_ucode_img *
-ls_ucode_img_load(struct nvkm_subdev *subdev, lsf_load_func load_func)
-{
-	struct ls_ucode_img *img;
-	int ret;
-
-	img = kzalloc(sizeof(*img), GFP_KERNEL);
-	if (!img)
-		return ERR_PTR(-ENOMEM);
-
-	ret = load_func(subdev, img);
-	if (ret) {
-		kfree(img);
-		return ERR_PTR(ret);
-	}
-
-	return img;
-}
-
-static const lsf_load_func lsf_load_funcs[] = {
-	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
-	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
-	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
-};
-
-/**
- * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
- * @img:	ucode image to generate against
- * @desc:	descriptor to populate
- * @sb:		secure boot state to use for base addresses
- *
- * Populate the DMEM BL descriptor with the information contained in a
- * ls_ucode_desc.
- *
- */
-static void
-ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
-			      struct gm200_flcn_bl_desc *desc)
-{
-	struct ls_ucode_img_desc *pdesc = &img->ucode_desc;
-	u64 addr_base;
-
-	addr_base = wpr_addr + img->lsb_header.ucode_off +
-		    pdesc->app_start_offset;
-
-	memset(desc, 0, sizeof(*desc));
-	desc->ctx_dma = FALCON_DMAIDX_UCODE;
-	desc->code_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->code_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->non_sec_code_size = pdesc->app_resident_code_size;
-	desc->data_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_size = pdesc->app_resident_data_size;
-	desc->code_entry_point = pdesc->app_imem_entry;
-}
-
-#define LSF_LSB_HEADER_ALIGN 256
-#define LSF_BL_DATA_ALIGN 256
-#define LSF_BL_DATA_SIZE_ALIGN 256
-#define LSF_BL_CODE_SIZE_ALIGN 256
-#define LSF_UCODE_DATA_ALIGN 4096
-
-/**
- * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
- * @gsb:	secure boot device used
- * @img:	image to generate for
- * @offset:	offset in the WPR region where this image starts
- *
- * Allocate space in the WPR area from offset and write the WPR and LSB headers
- * accordingly.
- *
- * Return: offset at the end of this image.
- */
-static u32
-ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
-			  u32 offset)
-{
-	struct lsf_wpr_header *whdr = &img->wpr_header;
-	struct lsf_lsb_header *lhdr = &img->lsb_header;
-	struct ls_ucode_img_desc *desc = &img->ucode_desc;
-
-	if (img->ucode_header) {
-		nvkm_fatal(&gsb->base.subdev,
-			    "images withough loader are not supported yet!\n");
-		return offset;
-	}
-
-	/* Fill WPR header */
-	whdr->falcon_id = img->falcon_id;
-	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
-	whdr->status = LSF_IMAGE_STATUS_COPY;
-
-	/* Align, save off, and include an LSB header size */
-	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
-	whdr->lsb_offset = offset;
-	offset += sizeof(struct lsf_lsb_header);
-
-	/*
-	 * Align, save off, and include the original (static) ucode
-	 * image size
-	 */
-	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
-	lhdr->ucode_off = offset;
-	offset += img->ucode_size;
-
-	/*
-	 * For falcons that use a boot loader (BL), we append a loader
-	 * desc structure on the end of the ucode image and consider
-	 * this the boot loader data. The host will then copy the loader
-	 * desc args to this space within the WPR region (before locking
-	 * down) and the HS bin will then copy them to DMEM 0 for the
-	 * loader.
-	 */
-	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
-				   LSF_BL_CODE_SIZE_ALIGN);
-	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
-				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
-	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
-				lhdr->bl_code_size - lhdr->ucode_size;
-	/*
-	 * Though the BL is located at 0th offset of the image, the VA
-	 * is different to make sure that it doesn't collide the actual
-	 * OS VA range
-	 */
-	lhdr->bl_imem_off = desc->bootloader_imem_offset;
-	lhdr->app_code_off = desc->app_start_offset +
-			     desc->app_resident_code_offset;
-	lhdr->app_code_size = desc->app_resident_code_size;
-	lhdr->app_data_off = desc->app_start_offset +
-			     desc->app_resident_data_offset;
-	lhdr->app_data_size = desc->app_resident_data_size;
-
-	lhdr->flags = 0;
-	if (img->falcon_id == gsb->base.func->boot_falcon)
-		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
-
-	/* GPCCS will be loaded using PRI */
-	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
-		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
-
-	/* Align (size bloat) and save off BL descriptor size */
-	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
-				   LSF_BL_DATA_SIZE_ALIGN);
-	/*
-	 * Align, save off, and include the additional BL data
-	 */
-	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
-	lhdr->bl_data_off = offset;
-	offset += lhdr->bl_data_size;
-
-	return offset;
-}
-
-static void
-ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
-{
-	memset(mgr, 0, sizeof(*mgr));
-	INIT_LIST_HEAD(&mgr->img_list);
-}
-
-static void
-ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img, *t;
-
-	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
-		kfree(img->ucode_data);
-		kfree(img->ucode_header);
-		kfree(img);
-	}
-}
-
-static void
-ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
-{
-	mgr->count++;
-	list_add_tail(&img->node, &mgr->img_list);
-}
-
-/**
- * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
- */
-static void
-ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img;
-	u32 offset;
-
-	/*
-	 * Start with an array of WPR headers at the base of the WPR.
-	 * The expectation here is that the secure falcon will do a single DMA
-	 * read of this array and cache it internally so it's ok to pack these.
-	 * Also, we add 1 to the falcon count to indicate the end of the array.
-	 */
-	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
-
-	/*
-	 * Walk the managed falcons, accounting for the LSB structs
-	 * as well as the ucode images.
-	 */
-	list_for_each_entry(img, &mgr->img_list, node) {
-		offset = ls_ucode_img_fill_headers(gsb, img, offset);
-	}
-
-	mgr->wpr_size = offset;
-}
-
-/**
- * ls_ucode_mgr_write_wpr - write the WPR blob contents
- */
-static int
-ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
-		       struct nvkm_gpuobj *wpr_blob)
-{
-	struct ls_ucode_img *img;
-	u32 pos = 0;
-
-	nvkm_kmap(wpr_blob);
-
-	list_for_each_entry(img, &mgr->img_list, node) {
-		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
-				      sizeof(img->wpr_header));
-
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
-				     &img->lsb_header, sizeof(img->lsb_header));
-
-		/* Generate and write BL descriptor */
-		if (!img->ucode_header) {
-			u8 desc[gsb->func->bl_desc_size];
-			struct gm200_flcn_bl_desc gdesc;
-
-			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
-						      &gdesc);
-			gsb->func->fixup_bl_desc(&gdesc, &desc);
-			nvkm_gpuobj_memcpy_to(wpr_blob,
-					      img->lsb_header.bl_data_off,
-					      &desc, gsb->func->bl_desc_size);
-		}
-
-		/* Copy ucode */
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
-				      img->ucode_data, img->ucode_size);
-
-		pos += sizeof(img->wpr_header);
-	}
-
-	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
-
-	nvkm_done(wpr_blob);
-
-	return 0;
-}
-
-/* Both size and address of WPR need to be 128K-aligned */
-#define WPR_ALIGNMENT	0x20000
-/**
- * gm200_secboot_prepare_ls_blob() - prepare the LS blob
- *
- * For each securely managed falcon, load the FW, signatures and bootloaders and
- * prepare a ucode blob. Then, compute the offsets in the WPR region for each
- * blob, and finally write the headers and ucode blobs into a GPU object that
- * will be copied into the WPR region by the HS firmware.
- */
-static int
-gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct nvkm_device *device = sb->subdev.device;
-	struct ls_ucode_mgr mgr;
-	int falcon_id;
-	int ret;
-
-	ls_ucode_mgr_init(&mgr);
-
-	/* Load all LS blobs */
-	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
-			 NVKM_SECBOOT_FALCON_END) {
-		struct ls_ucode_img *img;
-
-		img = ls_ucode_img_load(&sb->subdev, lsf_load_funcs[falcon_id]);
-
-		if (IS_ERR(img)) {
-			ret = PTR_ERR(img);
-			goto cleanup;
-		}
-		ls_ucode_mgr_add_img(&mgr, img);
-	}
-
-	/*
-	 * Fill the WPR and LSF headers with the right offsets and compute
-	 * required WPR size
-	 */
-	ls_ucode_mgr_fill_headers(gsb, &mgr);
-	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
-
-	/* Allocate GPU object that will contain the WPR region */
-	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
-			      &gsb->ls_blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
-		    mgr.count, mgr.wpr_size);
-
-	/* If WPR address and size are not fixed, set them to fit the LS blob */
-	if (!gsb->wpr_size) {
-		gsb->wpr_addr = gsb->ls_blob->addr;
-		gsb->wpr_size = gsb->ls_blob->size;
-	}
-
-	/* Write LS blob */
-	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
-	if (ret)
-		nvkm_gpuobj_del(&gsb->ls_blob);
-
-cleanup:
-	ls_ucode_mgr_cleanup(&mgr);
-
-	return ret;
-}
-
-/*
- * High-secure blob creation
- */
-
-/**
- * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
- */
-static void
-gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	void *hs_data = acr_image + hsbin_hdr->data_offset;
-	void *sig;
-	u32 sig_size;
-
-	/* Falcon in debug or production mode? */
-	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
-		sig = acr_image + fw_hdr->sig_dbg_offset;
-		sig_size = fw_hdr->sig_dbg_size;
-	} else {
-		sig = acr_image + fw_hdr->sig_prod_offset;
-		sig_size = fw_hdr->sig_prod_size;
-	}
-
-	/* Patch signature */
-	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
-}
-
-/**
- * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
- */
-static void
-gm200_secboot_populate_hsf_bl_desc(void *acr_image,
-				   struct gm200_flcn_bl_desc *bl_desc)
-{
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
-
-	/*
-	 * Descriptor for the bootloader that will load the ACR image into
-	 * IMEM/DMEM memory.
-	 */
-	fw_hdr = acr_image + hsbin_hdr->header_offset;
-	load_hdr = acr_image + fw_hdr->hdr_offset;
-	memset(bl_desc, 0, sizeof(*bl_desc));
-	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
-	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
-	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
-	bl_desc->sec_code_off = load_hdr->app[0].sec_code_off;
-	bl_desc->sec_code_size = load_hdr->app[0].sec_code_size;
-	bl_desc->code_entry_point = 0;
-	/*
-	 * We need to set code_dma_base to the virtual address of the acr_blob,
-	 * and add this address to data_dma_base before writing it into DMEM
-	 */
-	bl_desc->code_dma_base.lo = 0;
-	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
-	bl_desc->data_size = load_hdr->data_size;
-}
-
-/**
- * gm200_secboot_prepare_hs_blob - load and prepare a HS blob and BL descriptor
- *
- * @gsb secure boot instance to prepare for
- * @fw name of the HS firmware to load
- * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
- * @bl_desc pointer to the BL descriptor to write for this firmware
- * @patch whether we should patch the HS descriptor (only for HS loaders)
- */
-static int
-gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb, const char *fw,
-			      struct nvkm_gpuobj **blob,
-			      struct gm200_flcn_bl_desc *bl_desc, bool patch)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	void *acr_image;
-	struct fw_bin_header *hsbin_hdr;
-	struct hsf_fw_header *fw_hdr;
-	void *acr_data;
-	struct hsf_load_header *load_hdr;
-	struct hsflcn_acr_desc *desc;
-	int ret;
-
-	acr_image = gm200_secboot_load_firmware(subdev, fw, 0);
-	if (IS_ERR(acr_image))
-		return PTR_ERR(acr_image);
-	hsbin_hdr = acr_image;
-
-	/* Patch signature */
-	gm200_secboot_hsf_patch_signature(gsb, acr_image);
-
-	acr_data = acr_image + hsbin_hdr->data_offset;
-
-	/* Patch descriptor? */
-	if (patch) {
-		fw_hdr = acr_image + hsbin_hdr->header_offset;
-		load_hdr = acr_image + fw_hdr->hdr_offset;
-		desc = acr_data + load_hdr->data_dma_base;
-		gsb->func->fixup_hs_desc(gsb, desc);
-	}
-
-	/* Generate HS BL descriptor */
-	gm200_secboot_populate_hsf_bl_desc(acr_image, bl_desc);
-
-	/* Create ACR blob and copy HS data to it */
-	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
-			      0x1000, false, NULL, blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_kmap(*blob);
-	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
-	nvkm_done(*blob);
-
-cleanup:
-	kfree(acr_image);
-
-	return ret;
-}
-
-/*
- * High-secure bootloader blob creation
- */
-
-static int
-gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-
-	gsb->hsbl_blob = gm200_secboot_load_firmware(subdev, "acr/bl", 0);
-	if (IS_ERR(gsb->hsbl_blob)) {
-		int ret = PTR_ERR(gsb->hsbl_blob);
-
-		gsb->hsbl_blob = NULL;
-		return ret;
-	}
-
-	return 0;
-}
+#include <engine/falcon.h>
+#include <subdev/mc.h>
 
 /**
- * gm20x_secboot_prepare_blobs - load blobs common to all GM20X GPUs.
+ * gm200_secboot_run_blob() - run the given high-secure blob
  *
- * This includes the LS blob, HS ucode loading blob, and HS bootloader.
- *
- * The HS ucode unload blob is only used on dGPU.
  */
 int
-gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	/* Load and prepare the managed falcon's firmwares */
-	if (!gsb->ls_blob) {
-		ret = gm200_secboot_prepare_ls_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware that will load the LS firmwares */
-	if (!gsb->acr_load_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-						&gsb->acr_load_blob,
-						&gsb->acr_load_bl_desc, true);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware bootloader */
-	if (!gsb->hsbl_blob) {
-		ret = gm200_secboot_prepare_hsbl_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	if (!gsb->acr_unload_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					       &gsb->acr_unload_blob,
-					       &gsb->acr_unload_bl_desc, false);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob)
 {
+	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	struct nvkm_falcon *falcon = gsb->base.boot_falcon;
+	struct nvkm_vma vma;
 	int ret;
 
-	/* firmware already loaded, nothing to do... */
-	if (gsb->firmware_ok)
-		return 0;
-
-	ret = gsb->func->prepare_blobs(gsb);
-	if (ret) {
-		nvkm_error(subdev, "failed to load secure firmware\n");
-		return ret;
-	}
-
-	gsb->firmware_ok = true;
-
-	return 0;
-}
-
-
-/*
- * Secure Boot Execution
- */
-
-/**
- * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
- */
-static void
-gm200_secboot_load_hs_bl(struct gm200_secboot *gsb, void *data, u32 data_size)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	void *blob_data = gsb->hsbl_blob + hdr->data_offset;
-	void *hsbl_code = blob_data + hsbl_desc->code_off;
-	void *hsbl_data = blob_data + hsbl_desc->data_off;
-	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
-	const u32 base = gsb->base.base;
-	u32 blk;
-	u32 tag;
-	int i;
-
-	/*
-	 * Copy HS bootloader data
-	 */
-	nvkm_wr32(device, base + 0x1c0, (0x00000000 | (0x1 << 24)));
-	for (i = 0; i < hsbl_desc->data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)hsbl_data)[i]);
-
-	/*
-	 * Copy HS bootloader interface structure where the HS descriptor
-	 * expects it to be
-	 */
-	nvkm_wr32(device, base + 0x1c0,
-		  (hsbl_desc->dmem_load_off | (0x1 << 24)));
-	for (i = 0; i < data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)data)[i]);
-
-	/* Copy HS bootloader code to end of IMEM */
-	blk = (nvkm_rd32(device, base + 0x108) & 0x1ff) - (code_size >> 8);
-	tag = hsbl_desc->start_tag;
-	nvkm_wr32(device, base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
-	for (i = 0; i < code_size / 4; i++) {
-		/* write new tag every 256B */
-		if ((i & 0x3f) == 0) {
-			nvkm_wr32(device, base + 0x188, tag & 0xffff);
-			tag++;
-		}
-		nvkm_wr32(device, base + 0x184, ((u32 *)hsbl_code)[i]);
-	}
-	nvkm_wr32(device, base + 0x188, 0);
-}
-
-/**
- * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
- */
-static int
-gm200_secboot_setup_falcon(struct gm200_secboot *gsb)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	/* virtual start address for boot vector */
-	u32 virt_addr = hsbl_desc->start_tag << 8;
-	const u32 base = gsb->base.base;
-	const u32 reg_base = base + 0xe00;
-	u32 inst_loc;
-	int ret;
-
-	ret = nvkm_secboot_falcon_reset(&gsb->base);
+	ret = nvkm_falcon_get(falcon, subdev);
 	if (ret)
 		return ret;
 
-	/* setup apertures - virtual */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
-	/* setup apertures - physical */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
-		  0x4 | 0x1);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
-		  0x4 | 0x2);
-
-	/* Set context */
-	if (nvkm_memory_target(gsb->inst->memory) == NVKM_MEM_TARGET_VRAM)
-		inst_loc = 0x0; /* FB */
-	else
-		inst_loc = 0x3; /* Non-coherent sysmem */
-
-	nvkm_mask(device, base + 0x048, 0x1, 0x1);
-	nvkm_wr32(device, base + 0x480,
-		  ((gsb->inst->addr >> 12) & 0xfffffff) |
-		  (inst_loc << 28) | (1 << 30));
-
-	/* Set boot vector to code's starting virtual address */
-	nvkm_wr32(device, base + 0x104, virt_addr);
-
-	return 0;
-}
-
-/**
- * gm200_secboot_run_hs_blob() - run the given high-secure blob
- */
-static int
-gm200_secboot_run_hs_blob(struct gm200_secboot *gsb, struct nvkm_gpuobj *blob,
-			  struct gm200_flcn_bl_desc *desc)
-{
-	struct nvkm_vma vma;
-	u64 vma_addr;
-	const u32 bl_desc_size = gsb->func->bl_desc_size;
-	u8 bl_desc[bl_desc_size];
-	int ret;
-
 	/* Map the HS firmware so the HS bootloader can see it */
 	ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma);
-	if (ret)
+	if (ret) {
+		nvkm_falcon_put(falcon, subdev);
 		return ret;
+	}
 
-	/* Add the mapping address to the DMA bases */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* Fixup the BL header */
-	gsb->func->fixup_bl_desc(desc, &bl_desc);
-
-	/* Reset the falcon and make it ready to run the HS bootloader */
-	ret = gm200_secboot_setup_falcon(gsb);
+	/* Reset and set the falcon up */
+	ret = nvkm_falcon_reset(falcon);
 	if (ret)
-		goto done;
+		goto end;
+	nvkm_falcon_bind_context(falcon, gsb->inst);
 
 	/* Load the HS bootloader into the falcon's IMEM/DMEM */
-	gm200_secboot_load_hs_bl(gsb, &bl_desc, bl_desc_size);
-
-	/* Start the HS bootloader */
-	ret = nvkm_secboot_falcon_run(&gsb->base);
+	ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset);
 	if (ret)
-		goto done;
-
-done:
-	/* Restore the original DMA addresses */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* We don't need the ACR firmware anymore */
-	nvkm_gpuobj_unmap(&vma);
+		goto end;
 
-	return ret;
-}
+	/* Disable interrupts as we will poll for the HALT bit */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, false);
 
-/*
- * gm200_secboot_reset() - execute secure boot from the prepared state
- *
- * Load the HS bootloader and ask the falcon to run it. This will in turn
- * load the HS firmware and run it, so once the falcon stops all the managed
- * falcons should have their LS firmware loaded and be ready to run.
- */
-int
-gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int ret;
+	/* Set default error value in mailbox register */
+	nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5);
 
-	/* Make sure all blobs are ready */
-	ret = gm200_secboot_blobs_ready(gsb);
+	/* Start the HS bootloader */
+	nvkm_falcon_set_start_addr(falcon, sb->acr->start_address);
+	nvkm_falcon_start(falcon);
+	ret = nvkm_falcon_wait_for_halt(falcon, 100);
 	if (ret)
-		return ret;
-
-	/*
-	 * Dummy GM200 implementation: perform secure boot each time we are
-	 * called on FECS. Since only FECS and GPCCS are managed and started
-	 * together, this ought to be safe.
-	 *
-	 * Once we have proper PMU firmware and support, this will be changed
-	 * to a proper call to the PMU method.
-	 */
-	if (falcon != NVKM_SECBOOT_FALCON_FECS)
 		goto end;
 
-	/* If WPR is set and we have an unload blob, run it to unlock WPR */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE) {
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-		if (ret)
-			return ret;
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_falcon_rd32(falcon, 0x040);
+	if (ret) {
+		nvkm_error(subdev, "ACR boot failed, ret 0x%08x", ret);
+		ret = -EINVAL;
+		goto end;
 	}
 
-	/* Reload all managed falcons */
-	ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_load_blob,
-					&gsb->acr_load_bl_desc);
-	if (ret)
-		return ret;
-
 end:
-	gsb->falcon_state[falcon] = RESET;
-	return 0;
-}
+	/* Reenable interrupts */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true);
 
-int
-gm200_secboot_start(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int base;
-
-	switch (falcon) {
-	case NVKM_SECBOOT_FALCON_FECS:
-		base = 0x409000;
-		break;
-	case NVKM_SECBOOT_FALCON_GPCCS:
-		base = 0x41a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "cannot start unhandled falcon!\n");
-		return -EINVAL;
-	}
-
-	nvkm_wr32(sb->subdev.device, base + 0x130, 0x00000002);
-	gsb->falcon_state[falcon] = RUNNING;
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&vma);
+	nvkm_falcon_put(falcon, subdev);
 
-	return 0;
+	return ret;
 }
 
-
-
 int
-gm200_secboot_init(struct nvkm_secboot *sb)
+gm200_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_device *device = sb->subdev.device;
@@ -1361,24 +132,22 @@ gm200_secboot_init(struct nvkm_secboot *sb)
 	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
 	nvkm_done(gsb->inst);
 
+	if (sb->acr->func->oneinit) {
+		ret = sb->acr->func->oneinit(sb->acr, sb);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
-static int
+int
 gm200_secboot_fini(struct nvkm_secboot *sb, bool suspend)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret = 0;
-	int i;
 
-	/* Run the unload blob to unprotect the WPR region */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE)
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-
-	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
-		gsb->falcon_state[i] = NON_SECURE;
+	if (sb->acr->func->fini)
+		ret = sb->acr->func->fini(sb->acr, sb, suspend);
 
 	return ret;
 }
@@ -1388,11 +157,7 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 
-	nvkm_gpuobj_del(&gsb->acr_unload_blob);
-
-	kfree(gsb->hsbl_blob);
-	nvkm_gpuobj_del(&gsb->acr_load_blob);
-	nvkm_gpuobj_del(&gsb->ls_blob);
+	sb->acr->func->dtor(sb->acr);
 
 	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
 	nvkm_gpuobj_del(&gsb->pgd);
@@ -1405,50 +170,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 static const struct nvkm_secboot_func
 gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm200_secboot_init,
+	.oneinit = gm200_secboot_oneinit,
 	.fini = gm200_secboot_fini,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
-			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
-};
-
-/**
- * gm200_fixup_bl_desc - just copy the BL descriptor
- *
- * Use the GM200 descriptor format by default.
- */
-static void
-gm200_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	memcpy(ret, desc, sizeof(*desc));
-}
-
-static void
-gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-
-	/* WPR region information for the HS binary to set up */
-	desc->wpr_region_id = 1;
-	desc->regions.no_regions = 1;
-	desc->regions.region_props[0].region_id = 1;
-	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
-	desc->regions.region_props[0].end_addr =
-		(gsb->wpr_addr + gsb->wpr_size) >> 8;
-}
-
-static const struct gm200_secboot_func
-gm200_secboot_func = {
-	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
-	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -1457,6 +181,12 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r361_new(BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -1465,15 +195,14 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm200_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm200_secboot_func;
-
 	return 0;
 }
 
+
 MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_load.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
new file mode 100644
index 000000000000..45adf1a3bc20
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_GM200_H__
+#define __NVKM_SECBOOT_GM200_H__
+
+#include "priv.h"
+
+struct gm200_secboot {
+	struct nvkm_secboot base;
+
+	/* Instance block & address space used for HS FW execution */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+int gm200_secboot_oneinit(struct nvkm_secboot *);
+int gm200_secboot_fini(struct nvkm_secboot *, bool);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index d5395ebfe8d3..6707b8edc086 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -20,103 +20,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "priv.h"
-
-#include <core/gpuobj.h>
-
-/*
- * The BL header format used by GM20B's firmware is slightly different
- * from the one of GM200. Fix the differences here.
- */
-struct gm20b_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	u32 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	u32 data_dma_base;
-	u32 data_size;
-};
-
-static int
-gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(subdev, "required: %dB\n", acr_size);
-		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-/**
- * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
- *
- * There is only a slight format difference (DMA addresses being 32-bits and
- * 256B-aligned) to address.
- */
-static void
-gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	struct gm20b_flcn_bl_desc *gdesc = ret;
-	u64 addr;
-
-	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
-	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
-	gdesc->ctx_dma = desc->ctx_dma;
-	addr = desc->code_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->code_dma_base.lo;
-	gdesc->code_dma_base = lower_32_bits(addr >> 8);
-	gdesc->non_sec_code_off = desc->non_sec_code_off;
-	gdesc->non_sec_code_size = desc->non_sec_code_size;
-	gdesc->sec_code_off = desc->sec_code_off;
-	gdesc->sec_code_size = desc->sec_code_size;
-	gdesc->code_entry_point = desc->code_entry_point;
-	addr = desc->data_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->data_dma_base.lo;
-	gdesc->data_dma_base = lower_32_bits(addr >> 8);
-	gdesc->data_size = desc->data_size;
-}
-
-static void
-gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-}
-
-static const struct gm200_secboot_func
-gm20b_secboot_func = {
-	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
-	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
-};
-
+#include "acr.h"
+#include "gm200.h"
 
 #ifdef CONFIG_ARCH_TEGRA
 #define TEGRA_MC_BASE				0x70019000
@@ -144,15 +49,15 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
 		return PTR_ERR(mc);
 	}
-	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
 	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
-	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
 		<< 17;
 	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
 	iounmap(mc);
 
 	/* Check that WPR settings are valid */
-	if (gsb->wpr_size == 0) {
+	if (sb->wpr_size == 0) {
 		nvkm_error(&sb->subdev, "WPR region is empty\n");
 		return -EINVAL;
 	}
@@ -174,7 +79,7 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_init(struct nvkm_secboot *sb)
+gm20b_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
@@ -183,17 +88,15 @@ gm20b_secboot_init(struct nvkm_secboot *sb)
 	if (ret)
 		return ret;
 
-	return gm200_secboot_init(sb);
+	return gm200_secboot_oneinit(sb);
 }
 
 static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm20b_secboot_init,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+	.oneinit = gm20b_secboot_oneinit,
+	.fini = gm200_secboot_fini,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -202,6 +105,11 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -210,12 +118,10 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm20b_secboot_func;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
new file mode 100644
index 000000000000..00886cee57eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_LS_UCODE_H__
+#define __NVKM_SECBOOT_LS_UCODE_H__
+
+#include <core/os.h>
+#include <core/subdev.h>
+#include <subdev/secboot.h>
+
+
+/**
+ * struct ls_ucode_img_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file.
+ */
+struct ls_ucode_img_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @sig:		signature for this firmware
+ * @sig:size:		size of the signature in bytes
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_img_desc ucode_desc;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	u8 *sig;
+	u32 sig_size;
+};
+
+/**
+ * struct fw_bin_header - header of firmware files
+ * @bin_magic:		always 0x3b1d14f0
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct fw_bin_header {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct fw_bl_desc - firmware bootloader descriptor
+ * @start_tag:		starting tag of bootloader
+ * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @code_off:		offset of code section
+ * @code_size:		size of code section
+ * @data_off:		offset of data section
+ * @data_size:		size of data section
+ *
+ * This structure is embedded in bootloader firmware files at to describe the
+ * IMEM and DMEM layout expected by the bootloader.
+ */
+struct fw_bl_desc {
+	u32 start_tag;
+	u32 dmem_load_off;
+	u32 code_off;
+	u32 code_size;
+	u32 data_off;
+	u32 data_size;
+};
+
+int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *);
+int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *);
+
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
new file mode 100644
index 000000000000..40a6df77bb8a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "ls_ucode.h"
+#include "acr.h"
+
+#include <core/firmware.h>
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_img_desc *desc)
+{
+	struct fw_bin_header *bin_hdr = (void *)bl->data;
+	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
+	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
+	u32 pos = 0;
+	void *image;
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
+	desc->bootloader_entry_point = bl_desc->start_tag * 256;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_data,
+	       bl_desc->code_size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_gr() - load and prepare a LS GR ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
+		     const char *falcon_name)
+{
+	const struct firmware *bl, *code, *data, *sig;
+	char f[64];
+	int ret;
+
+	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &sig);
+	if (ret)
+		goto free_data;
+	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
+	if (!img->sig) {
+		ret = -ENOMEM;
+		goto free_sig;
+	}
+	img->sig_size = sig->size;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+free_sig:
+	nvkm_firmware_put(sig);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+int
+acr_ls_ucode_load_fecs(const struct nvkm_subdev *subdev,
+		       struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "fecs");
+}
+
+int
+acr_ls_ucode_load_gpccs(const struct nvkm_subdev *subdev,
+			struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "gpccs");
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index a9a8a0e1017e..936a65f5658c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -27,20 +27,16 @@
 #include <subdev/mmu.h>
 
 struct nvkm_secboot_func {
-	int (*init)(struct nvkm_secboot *);
+	int (*oneinit)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-
-	/* ID of the falcon that will perform secure boot */
-	enum nvkm_secboot_falcon boot_falcon;
-	/* Bit-mask of IDs of managed falcons */
-	unsigned long managed_falcons;
+	int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *);
 };
 
-int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
-		      int index, struct nvkm_secboot *);
+extern const char *nvkm_secboot_falcon_name[];
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *,
+		      struct nvkm_device *, int, struct nvkm_secboot *);
 int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
 int nvkm_secboot_falcon_run(struct nvkm_secboot *);
 
@@ -48,187 +44,20 @@ struct flcn_u64 {
 	u32 lo;
 	u32 hi;
 };
+
 static inline u64 flcn64_to_u64(const struct flcn_u64 f)
 {
 	return ((u64)f.hi) << 32 | f.lo;
 }
 
-/**
- * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
- * @signature:		16B signature for secure code. 0s if no secure code
- * @ctx_dma:		DMA context to be used by BL while loading code/data
- * @code_dma_base:	256B-aligned Physical FB Address where code is located
- *			(falcon's $xcbase register)
- * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @non_sec_code_size:	the size of the nonSecure code part.
- * @sec_code_off:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @sec_code_size:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @code_entry_point:	code entry point which will be invoked by BL after
- *                      code is loaded.
- * @data_dma_base:	256B aligned Physical FB Address where data is located.
- *			(falcon's $xdbase register)
- * @data_size:		size of data block. Should be multiple of 256B
- *
- * Structure used by the bootloader to load the rest of the code. This has
- * to be filled by host and copied into DMEM at offset provided in the
- * hsflcn_bl_desc.bl_desc_dmem_load_off.
- */
-struct gm200_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	struct flcn_u64 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	struct flcn_u64 data_dma_base;
-	u32 data_size;
-};
-
-/**
- * struct hsflcn_acr_desc - data section of the HS firmware
- *
- * This header is to be copied at the beginning of DMEM by the HS bootloader.
- *
- * @signature:		signature of ACR ucode
- * @wpr_region_id:	region ID holding the WPR header and its details
- * @wpr_offset:		offset from the WPR region holding the wpr header
- * @regions:		region descriptors
- * @nonwpr_ucode_blob_size:	size of LS blob
- * @nonwpr_ucode_blob_start:	FB location of LS blob is
- */
-struct hsflcn_acr_desc {
-	union {
-		u8 reserved_dmem[0x200];
-		u32 signatures[4];
-	} ucode_reserved_space;
-	u32 wpr_region_id;
-	u32 wpr_offset;
-	u32 mmu_mem_range;
-#define FLCN_ACR_MAX_REGIONS 2
-	struct {
-		u32 no_regions;
-		struct {
-			u32 start_addr;
-			u32 end_addr;
-			u32 region_id;
-			u32 read_mask;
-			u32 write_mask;
-			u32 client_mask;
-		} region_props[FLCN_ACR_MAX_REGIONS];
-	} regions;
-	u32 ucode_blob_size;
-	u64 ucode_blob_base __aligned(8);
-	struct {
-		u32 vpr_enabled;
-		u32 vpr_start;
-		u32 vpr_end;
-		u32 hdcp_policies;
-	} vpr_desc;
-};
-
-/**
- * Contains the whole secure boot state, allowing it to be performed as needed
- * @wpr_addr:		physical address of the WPR region
- * @wpr_size:		size in bytes of the WPR region
- * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
- * @ls_blob_size:	size of the LS blob
- * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
- * @acr_blob:		HS blob
- * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
- * @acr_bl_desc:	bootloader descriptor of the HS blob
- * @hsbl_blob:		HS blob bootloader
- * @inst:		instance block for HS falcon
- * @pgd:		page directory for the HS falcon
- * @vm:			address space used by the HS falcon
- * @falcon_state:	current state of the managed falcons
- * @firmware_ok:	whether the firmware blobs have been created
- */
-struct gm200_secboot {
-	struct nvkm_secboot base;
-	const struct gm200_secboot_func *func;
-
-	/*
-	 * Address and size of the WPR region. On dGPU this will be the
-	 * address of the LS blob. On Tegra this is a fixed region set by the
-	 * bootloader
-	 */
-	u64 wpr_addr;
-	u32 wpr_size;
-
-	/*
-	 * HS FW - lock WPR region (dGPU only) and load LS FWs
-	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
-	 */
-	struct nvkm_gpuobj *acr_load_blob;
-	struct gm200_flcn_bl_desc acr_load_bl_desc;
-
-	/* HS FW - unlock WPR region (dGPU only) */
-	struct nvkm_gpuobj *acr_unload_blob;
-	struct gm200_flcn_bl_desc acr_unload_bl_desc;
-
-	/* HS bootloader */
-	void *hsbl_blob;
-
-	/* LS FWs, to be loaded by the HS ACR */
-	struct nvkm_gpuobj *ls_blob;
-
-	/* Instance block & address space used for HS FW execution */
-	struct nvkm_gpuobj *inst;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_vm *vm;
-
-	/* To keep track of the state of all managed falcons */
-	enum {
-		/* In non-secure state, no firmware loaded, no privileges*/
-		NON_SECURE = 0,
-		/* In low-secure mode and ready to be started */
-		RESET,
-		/* In low-secure mode and running */
-		RUNNING,
-	} falcon_state[NVKM_SECBOOT_FALCON_END];
-
-	bool firmware_ok;
-};
-#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
-
-/**
- * Contains functions we wish to abstract between GM200-like implementations
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
- * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
- * @prepare_blobs:	prepares the various blobs needed for secure booting
- */
-struct gm200_secboot_func {
-	/*
-	 * Size of the bootloader descriptor for this chip. A block of this
-	 * size is allocated before booting a falcon and the fixup_bl_desc
-	 * callback is called on it
-	 */
-	u32 bl_desc_size;
-	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
-
-	/*
-	 * Chip-specific modifications of the HS descriptor can be done here.
-	 * On dGPU this is used to fill the information about the WPR region
-	 * we want the HS FW to set up.
-	 */
-	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
-	int (*prepare_blobs)(struct gm200_secboot *);
-};
+static inline struct flcn_u64 u64_to_flcn64(u64 u)
+{
+	struct flcn_u64 ret;
 
-int gm200_secboot_init(struct nvkm_secboot *);
-void *gm200_secboot_dtor(struct nvkm_secboot *);
-int gm200_secboot_reset(struct nvkm_secboot *, u32);
-int gm200_secboot_start(struct nvkm_secboot *, u32);
+	ret.hi = upper_32_bits(u);
+	ret.lo = lower_32_bits(u);
 
-int gm20x_secboot_prepare_blobs(struct gm200_secboot *);
+	return ret;
+}
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 8894fee30cbc..df949fa7d05d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -64,10 +64,9 @@ nvkm_therm_update_trip(struct nvkm_therm *therm)
 }
 
 static int
-nvkm_therm_update_linear(struct nvkm_therm *therm)
+nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
+                               u8 linear_max_temp)
 {
-	u8  linear_min_temp = therm->fan->bios.linear_min_temp;
-	u8  linear_max_temp = therm->fan->bios.linear_max_temp;
 	u8  temp = therm->func->temp_get(therm);
 	u16 duty;
 
@@ -85,6 +84,21 @@ nvkm_therm_update_linear(struct nvkm_therm *therm)
 	return duty;
 }
 
+static int
+nvkm_therm_update_linear(struct nvkm_therm *therm)
+{
+	u8  min = therm->fan->bios.linear_min_temp;
+	u8  max = therm->fan->bios.linear_max_temp;
+	return nvkm_therm_compute_linear_duty(therm, min, max);
+}
+
+static int
+nvkm_therm_update_linear_fallback(struct nvkm_therm *therm)
+{
+	u8 max = therm->bios_sensor.thrs_fan_boost.temp;
+	return nvkm_therm_compute_linear_duty(therm, 30, max);
+}
+
 static void
 nvkm_therm_update(struct nvkm_therm *therm, int mode)
 {
@@ -119,6 +133,8 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		case NVBIOS_THERM_FAN_OTHER:
 			if (therm->cstate)
 				duty = therm->cstate;
+			else
+				duty = nvkm_therm_update_linear_fallback(therm);
 			poll = false;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index fe063d5728e2..67ada1d9a28c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -95,6 +95,20 @@ nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
 	return intr & ~handled;
 }
 
+int
+nvkm_top_fault_id(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	list_for_each_entry(info, &top->device, head) {
+		if (info->index == devidx && info->fault >= 0)
+			return info->fault;
+	}
+
+	return -ENOENT;
+}
+
 enum nvkm_devidx
 nvkm_top_fault(struct nvkm_device *device, int fault)
 {
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
index a2bb855a2851..ac5800c72cb4 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c
@@ -18,7 +18,7 @@
 #include <linux/jiffies.h>
 #include <linux/module.h>
 #include <linux/platform_device.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
 #include <linux/of_device.h>
diff --git a/drivers/gpu/drm/omapdrm/dss/dispc.c b/drivers/gpu/drm/omapdrm/dss/dispc.c
index 5554b72cf56a..d956e6266368 100644
--- a/drivers/gpu/drm/omapdrm/dss/dispc.c
+++ b/drivers/gpu/drm/omapdrm/dss/dispc.c
@@ -2506,6 +2506,25 @@ static int dispc_ovl_calc_scaling_44xx(unsigned long pclk, unsigned long lclk,
 		return -EINVAL;
 	}
 
+	if (*decim_x > 4 && color_mode != OMAP_DSS_COLOR_NV12) {
+		/*
+		 * Let's disable all scaling that requires horizontal
+		 * decimation with higher factor than 4, until we have
+		 * better estimates of what we can and can not
+		 * do. However, NV12 color format appears to work Ok
+		 * with all decimation factors.
+		 *
+		 * When decimating horizontally by more that 4 the dss
+		 * is not able to fetch the data in burst mode. When
+		 * this happens it is hard to tell if there enough
+		 * bandwidth. Despite what theory says this appears to
+		 * be true also for 16-bit color formats.
+		 */
+		DSSERR("Not enough bandwidth, too much downscaling (x-decimation factor %d > 4)", *decim_x);
+
+		return -EINVAL;
+	}
+
 	*core_clk = dispc.feat->calc_core_clk(pclk, in_width, in_height,
 				out_width, out_height, mem_to_mem);
 	return 0;
diff --git a/drivers/gpu/drm/omapdrm/omap_crtc.c b/drivers/gpu/drm/omapdrm/omap_crtc.c
index dd47dc191e6b..b68c70eb395f 100644
--- a/drivers/gpu/drm/omapdrm/omap_crtc.c
+++ b/drivers/gpu/drm/omapdrm/omap_crtc.c
@@ -410,13 +410,7 @@ static void omap_crtc_atomic_flush(struct drm_crtc *crtc,
 		dispc_mgr_set_gamma(omap_crtc->channel, lut, length);
 	}
 
-	/*
-	 * Only flush the CRTC if it is currently enabled. CRTCs that require a
-	 * mode set are disabled prior plane updates and enabled afterwards.
-	 * They are thus not active (regardless of what their CRTC core state
-	 * reports) and the DRM core could thus call this function even though
-	 * the CRTC is currently disabled. Do nothing in that case.
-	 */
+	/* Only flush the CRTC if it is currently enabled. */
 	if (!omap_crtc->enabled)
 		return;
 
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.c b/drivers/gpu/drm/omapdrm/omap_drv.c
index afe8f05b927b..3f2554235225 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.c
+++ b/drivers/gpu/drm/omapdrm/omap_drv.c
@@ -96,9 +96,22 @@ static void omap_atomic_complete(struct omap_atomic_state_commit *commit)
 	dispc_runtime_get();
 
 	drm_atomic_helper_commit_modeset_disables(dev, old_state);
-	drm_atomic_helper_commit_planes(dev, old_state,
-					DRM_PLANE_COMMIT_ACTIVE_ONLY);
+
+	/* With the current dss dispc implementation we have to enable
+	 * the new modeset before we can commit planes. The dispc ovl
+	 * configuration relies on the video mode configuration been
+	 * written into the HW when the ovl configuration is
+	 * calculated.
+	 *
+	 * This approach is not ideal because after a mode change the
+	 * plane update is executed only after the first vblank
+	 * interrupt. The dispc implementation should be fixed so that
+	 * it is able use uncommitted drm state information.
+	 */
 	drm_atomic_helper_commit_modeset_enables(dev, old_state);
+	omap_atomic_wait_for_completion(dev, old_state);
+
+	drm_atomic_helper_commit_planes(dev, old_state, 0);
 
 	omap_atomic_wait_for_completion(dev, old_state);
 
diff --git a/drivers/gpu/drm/omapdrm/omap_drv.h b/drivers/gpu/drm/omapdrm/omap_drv.h
index 36d93ce84a29..65977982f15f 100644
--- a/drivers/gpu/drm/omapdrm/omap_drv.h
+++ b/drivers/gpu/drm/omapdrm/omap_drv.h
@@ -188,7 +188,7 @@ int omap_gem_dumb_create(struct drm_file *file, struct drm_device *dev,
 int omap_gem_mmap(struct file *filp, struct vm_area_struct *vma);
 int omap_gem_mmap_obj(struct drm_gem_object *obj,
 		struct vm_area_struct *vma);
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int omap_gem_fault(struct vm_fault *vmf);
 int omap_gem_op_start(struct drm_gem_object *obj, enum omap_gem_op op);
 int omap_gem_op_finish(struct drm_gem_object *obj, enum omap_gem_op op);
 int omap_gem_op_sync(struct drm_gem_object *obj, enum omap_gem_op op);
diff --git a/drivers/gpu/drm/omapdrm/omap_fbdev.c b/drivers/gpu/drm/omapdrm/omap_fbdev.c
index 2a839956dae6..942c4d483008 100644
--- a/drivers/gpu/drm/omapdrm/omap_fbdev.c
+++ b/drivers/gpu/drm/omapdrm/omap_fbdev.c
@@ -263,8 +263,7 @@ struct drm_fb_helper *omap_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, helper, &omap_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, helper,
-			priv->num_crtcs, priv->num_connectors);
+	ret = drm_fb_helper_init(dev, helper, priv->num_connectors);
 	if (ret) {
 		dev_err(dev->dev, "could not init fbdev: ret=%d\n", ret);
 		goto fail;
diff --git a/drivers/gpu/drm/omapdrm/omap_gem.c b/drivers/gpu/drm/omapdrm/omap_gem.c
index 4a90c690f09e..5d5a9f517c30 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@@ -518,7 +518,6 @@ static int fault_2d(struct drm_gem_object *obj,
 
 /**
  * omap_gem_fault		-	pagefault handler for GEM objects
- * @vma: the VMA of the GEM object
  * @vmf: fault detail
  *
  * Invoked when a fault occurs on an mmap of a GEM managed area. GEM
@@ -529,8 +528,9 @@ static int fault_2d(struct drm_gem_object *obj,
  * vma->vm_private_data points to the GEM object that is backing this
  * mapping.
  */
-int omap_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int omap_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *obj = vma->vm_private_data;
 	struct omap_gem_object *omap_obj = to_omap_bo(obj);
 	struct drm_device *dev = obj->dev;
@@ -1033,7 +1033,7 @@ void omap_gem_describe(struct drm_gem_object *obj, struct seq_file *m)
 	off = drm_vma_node_start(&obj->vma_node);
 
 	seq_printf(m, "%08x: %2d (%2d) %08llx %pad (%2d) %p %4d",
-			omap_obj->flags, obj->name, obj->refcount.refcount.counter,
+			omap_obj->flags, obj->name, kref_read(&obj->refcount),
 			off, &omap_obj->paddr, omap_obj->paddr_cnt,
 			omap_obj->vaddr, omap_obj->roll);
 
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 06aaf79de8c8..89eb0422821c 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -668,6 +668,48 @@ static const struct panel_desc avic_tm070ddh03 = {
 	},
 };
 
+static const struct drm_display_mode boe_nv101wxmn51_modes[] = {
+	{
+		.clock = 71900,
+		.hdisplay = 1280,
+		.hsync_start = 1280 + 48,
+		.hsync_end = 1280 + 48 + 32,
+		.htotal = 1280 + 48 + 32 + 80,
+		.vdisplay = 800,
+		.vsync_start = 800 + 3,
+		.vsync_end = 800 + 3 + 5,
+		.vtotal = 800 + 3 + 5 + 24,
+		.vrefresh = 60,
+	},
+	{
+		.clock = 57500,
+		.hdisplay = 1280,
+		.hsync_start = 1280 + 48,
+		.hsync_end = 1280 + 48 + 32,
+		.htotal = 1280 + 48 + 32 + 80,
+		.vdisplay = 800,
+		.vsync_start = 800 + 3,
+		.vsync_end = 800 + 3 + 5,
+		.vtotal = 800 + 3 + 5 + 24,
+		.vrefresh = 48,
+	},
+};
+
+static const struct panel_desc boe_nv101wxmn51 = {
+	.modes = boe_nv101wxmn51_modes,
+	.num_modes = ARRAY_SIZE(boe_nv101wxmn51_modes),
+	.bpc = 8,
+	.size = {
+		.width = 217,
+		.height = 136,
+	},
+	.delay = {
+		.prepare = 210,
+		.enable = 50,
+		.unprepare = 160,
+	},
+};
+
 static const struct drm_display_mode chunghwa_claa070wp03xg_mode = {
 	.clock = 66770,
 	.hdisplay = 800,
@@ -760,6 +802,8 @@ static const struct panel_desc edt_et057090dhu = {
 		.width = 115,
 		.height = 86,
 	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
 };
 
 static const struct drm_display_mode edt_etm0700g0dh6_mode = {
@@ -784,6 +828,8 @@ static const struct panel_desc edt_etm0700g0dh6 = {
 		.width = 152,
 		.height = 91,
 	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+	.bus_flags = DRM_BUS_FLAG_DE_HIGH | DRM_BUS_FLAG_PIXDATA_NEGEDGE,
 };
 
 static const struct drm_display_mode foxlink_fl500wvr00_a0t_mode = {
@@ -1277,6 +1323,29 @@ static const struct panel_desc nec_nl4827hc19_05b = {
 	.bus_flags = DRM_BUS_FLAG_PIXDATA_POSEDGE,
 };
 
+static const struct drm_display_mode netron_dy_e231732_mode = {
+	.clock = 66000,
+	.hdisplay = 1024,
+	.hsync_start = 1024 + 160,
+	.hsync_end = 1024 + 160 + 70,
+	.htotal = 1024 + 160 + 70 + 90,
+	.vdisplay = 600,
+	.vsync_start = 600 + 127,
+	.vsync_end = 600 + 127 + 20,
+	.vtotal = 600 + 127 + 20 + 3,
+	.vrefresh = 60,
+};
+
+static const struct panel_desc netron_dy_e231732 = {
+	.modes = &netron_dy_e231732_mode,
+	.num_modes = 1,
+	.size = {
+		.width = 154,
+		.height = 87,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
+};
+
 static const struct drm_display_mode nvd_9128_mode = {
 	.clock = 29500,
 	.hdisplay = 800,
@@ -1632,6 +1701,30 @@ static const struct panel_desc starry_kr122ea0sra = {
 	},
 };
 
+static const struct display_timing tianma_tm070jdhg30_timing = {
+	.pixelclock = { 62600000, 68200000, 78100000 },
+	.hactive = { 1280, 1280, 1280 },
+	.hfront_porch = { 15, 64, 159 },
+	.hback_porch = { 5, 5, 5 },
+	.hsync_len = { 1, 1, 256 },
+	.vactive = { 800, 800, 800 },
+	.vfront_porch = { 3, 40, 99 },
+	.vback_porch = { 2, 2, 2 },
+	.vsync_len = { 1, 1, 128 },
+	.flags = DISPLAY_FLAGS_DE_HIGH,
+};
+
+static const struct panel_desc tianma_tm070jdhg30 = {
+	.timings = &tianma_tm070jdhg30_timing,
+	.num_timings = 1,
+	.bpc = 8,
+	.size = {
+		.width = 151,
+		.height = 95,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X7X4_SPWG,
+};
+
 static const struct drm_display_mode tpk_f07a_0102_mode = {
 	.clock = 33260,
 	.hdisplay = 800,
@@ -1748,6 +1841,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "avic,tm070ddh03",
 		.data = &avic_tm070ddh03,
 	}, {
+		.compatible = "boe,nv101wxmn51",
+		.data = &boe_nv101wxmn51,
+	}, {
 		.compatible = "chunghwa,claa070wp03xg",
 		.data = &chunghwa_claa070wp03xg,
 	}, {
@@ -1826,6 +1922,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "nec,nl4827hc19-05b",
 		.data = &nec_nl4827hc19_05b,
 	}, {
+		.compatible = "netron-dy,e231732",
+		.data = &netron_dy_e231732,
+	}, {
 		.compatible = "nvd,9128",
 		.data = &nvd_9128,
 	}, {
@@ -1868,6 +1967,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "starry,kr122ea0sra",
 		.data = &starry_kr122ea0sra,
 	}, {
+		.compatible = "tianma,tm070jdhg30",
+		.data = &tianma_tm070jdhg30,
+	}, {
 		.compatible = "tpk,f07a-0102",
 		.data = &tpk_f07a_0102,
 	}, {
diff --git a/drivers/gpu/drm/qxl/qxl_debugfs.c b/drivers/gpu/drm/qxl/qxl_debugfs.c
index 057b2b547cac..d58751c94618 100644
--- a/drivers/gpu/drm/qxl/qxl_debugfs.c
+++ b/drivers/gpu/drm/qxl/qxl_debugfs.c
@@ -133,8 +133,8 @@ int qxl_debugfs_add_files(struct qxl_device *qdev,
 	qdev->debugfs_count = i;
 #if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_create_files(files, nfiles,
-				 qdev->ddev->primary->debugfs_root,
-				 qdev->ddev->primary);
+				 qdev->ddev.primary->debugfs_root,
+				 qdev->ddev.primary);
 #endif
 	return 0;
 }
@@ -147,7 +147,7 @@ void qxl_debugfs_remove_files(struct qxl_device *qdev)
 	for (i = 0; i < qdev->debugfs_count; i++) {
 		drm_debugfs_remove_files(qdev->debugfs[i].files,
 					 qdev->debugfs[i].num_files,
-					 qdev->ddev->primary);
+					 qdev->ddev.primary);
 	}
 #endif
 }
diff --git a/drivers/gpu/drm/qxl/qxl_display.c b/drivers/gpu/drm/qxl/qxl_display.c
index 416ade8566b7..1094cd33eb06 100644
--- a/drivers/gpu/drm/qxl/qxl_display.c
+++ b/drivers/gpu/drm/qxl/qxl_display.c
@@ -136,7 +136,7 @@ static int qxl_display_copy_rom_client_monitors_config(struct qxl_device *qdev)
 
 static void qxl_update_offset_props(struct qxl_device *qdev)
 {
-	struct drm_device *dev = qdev->ddev;
+	struct drm_device *dev = &qdev->ddev;
 	struct drm_connector *connector;
 	struct qxl_output *output;
 	struct qxl_head *head;
@@ -156,7 +156,7 @@ static void qxl_update_offset_props(struct qxl_device *qdev)
 void qxl_display_read_client_monitors_config(struct qxl_device *qdev)
 {
 
-	struct drm_device *dev = qdev->ddev;
+	struct drm_device *dev = &qdev->ddev;
 	int status;
 
 	status = qxl_display_copy_rom_client_monitors_config(qdev);
@@ -174,10 +174,10 @@ void qxl_display_read_client_monitors_config(struct qxl_device *qdev)
 	drm_modeset_lock_all(dev);
 	qxl_update_offset_props(qdev);
 	drm_modeset_unlock_all(dev);
-	if (!drm_helper_hpd_irq_event(qdev->ddev)) {
+	if (!drm_helper_hpd_irq_event(dev)) {
 		/* notify that the monitor configuration changed, to
 		   adjust at the arbitrary resolution */
-		drm_kms_helper_hotplug_event(qdev->ddev);
+		drm_kms_helper_hotplug_event(dev);
 	}
 }
 
@@ -1036,7 +1036,7 @@ static int qxl_mode_create_hotplug_mode_update_property(struct qxl_device *qdev)
 		return 0;
 
 	qdev->hotplug_mode_update_property =
-		drm_property_create_range(qdev->ddev, DRM_MODE_PROP_IMMUTABLE,
+		drm_property_create_range(&qdev->ddev, DRM_MODE_PROP_IMMUTABLE,
 					  "hotplug_mode_update", 0, 1);
 
 	return 0;
@@ -1175,28 +1175,28 @@ int qxl_modeset_init(struct qxl_device *qdev)
 	int i;
 	int ret;
 
-	drm_mode_config_init(qdev->ddev);
+	drm_mode_config_init(&qdev->ddev);
 
 	ret = qxl_create_monitors_object(qdev);
 	if (ret)
 		return ret;
 
-	qdev->ddev->mode_config.funcs = (void *)&qxl_mode_funcs;
+	qdev->ddev.mode_config.funcs = (void *)&qxl_mode_funcs;
 
 	/* modes will be validated against the framebuffer size */
-	qdev->ddev->mode_config.min_width = 320;
-	qdev->ddev->mode_config.min_height = 200;
-	qdev->ddev->mode_config.max_width = 8192;
-	qdev->ddev->mode_config.max_height = 8192;
+	qdev->ddev.mode_config.min_width = 320;
+	qdev->ddev.mode_config.min_height = 200;
+	qdev->ddev.mode_config.max_width = 8192;
+	qdev->ddev.mode_config.max_height = 8192;
 
-	qdev->ddev->mode_config.fb_base = qdev->vram_base;
+	qdev->ddev.mode_config.fb_base = qdev->vram_base;
 
-	drm_mode_create_suggested_offset_properties(qdev->ddev);
+	drm_mode_create_suggested_offset_properties(&qdev->ddev);
 	qxl_mode_create_hotplug_mode_update_property(qdev);
 
 	for (i = 0 ; i < qxl_num_crtc; ++i) {
-		qdev_crtc_init(qdev->ddev, i);
-		qdev_output_init(qdev->ddev, i);
+		qdev_crtc_init(&qdev->ddev, i);
+		qdev_output_init(&qdev->ddev, i);
 	}
 
 	qdev->mode_info.mode_config_initialized = true;
@@ -1214,7 +1214,7 @@ void qxl_modeset_fini(struct qxl_device *qdev)
 
 	qxl_destroy_monitors_object(qdev);
 	if (qdev->mode_info.mode_config_initialized) {
-		drm_mode_config_cleanup(qdev->ddev);
+		drm_mode_config_cleanup(&qdev->ddev);
 		qdev->mode_info.mode_config_initialized = false;
 	}
 }
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 6e0f8a2d8ac9..8e17c241e63c 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -62,7 +62,6 @@ static struct pci_driver qxl_pci_driver;
 static int
 qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	struct drm_device *drm;
 	struct qxl_device *qdev;
 	int ret;
 
@@ -72,29 +71,19 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		return -EINVAL; /* TODO: ENODEV ? */
 	}
 
-	drm = drm_dev_alloc(&qxl_driver, &pdev->dev);
-	if (IS_ERR(drm))
-		return -ENOMEM;
-
 	qdev = kzalloc(sizeof(struct qxl_device), GFP_KERNEL);
-	if (!qdev) {
-		ret = -ENOMEM;
-		goto free_drm_device;
-	}
+	if (!qdev)
+		return -ENOMEM;
 
 	ret = pci_enable_device(pdev);
 	if (ret)
-		goto free_drm_device;
-
-	drm->pdev = pdev;
-	pci_set_drvdata(pdev, drm);
-	drm->dev_private = qdev;
+		goto free_dev;
 
-	ret = qxl_device_init(qdev, drm, pdev, ent->driver_data);
+	ret = qxl_device_init(qdev, &qxl_driver, pdev, ent->driver_data);
 	if (ret)
 		goto disable_pci;
 
-	ret = drm_vblank_init(drm, 1);
+	ret = drm_vblank_init(&qdev->ddev, 1);
 	if (ret)
 		goto unload;
 
@@ -102,10 +91,10 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (ret)
 		goto vblank_cleanup;
 
-	drm_kms_helper_poll_init(qdev->ddev);
+	drm_kms_helper_poll_init(&qdev->ddev);
 
 	/* Complete initialization. */
-	ret = drm_dev_register(drm, ent->driver_data);
+	ret = drm_dev_register(&qdev->ddev, ent->driver_data);
 	if (ret)
 		goto modeset_cleanup;
 
@@ -114,14 +103,13 @@ qxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 modeset_cleanup:
 	qxl_modeset_fini(qdev);
 vblank_cleanup:
-	drm_vblank_cleanup(drm);
+	drm_vblank_cleanup(&qdev->ddev);
 unload:
 	qxl_device_fini(qdev);
 disable_pci:
 	pci_disable_device(pdev);
-free_drm_device:
+free_dev:
 	kfree(qdev);
-	kfree(drm);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/qxl/qxl_drv.h b/drivers/gpu/drm/qxl/qxl_drv.h
index 0d877fa61162..785c17b56f73 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.h
+++ b/drivers/gpu/drm/qxl/qxl_drv.h
@@ -242,9 +242,7 @@ void qxl_debugfs_remove_files(struct qxl_device *qdev);
 struct qxl_device;
 
 struct qxl_device {
-	struct device			*dev;
-	struct drm_device		*ddev;
-	struct pci_dev			*pdev;
+	struct drm_device ddev;
 	unsigned long flags;
 
 	resource_size_t vram_base, vram_size;
@@ -336,8 +334,8 @@ __printf(2,3) void qxl_io_log(struct qxl_device *qdev, const char *fmt, ...);
 extern const struct drm_ioctl_desc qxl_ioctls[];
 extern int qxl_max_ioctl;
 
-int qxl_device_init(struct qxl_device *qdev, struct drm_device *ddev,
-		    struct pci_dev *pdev,  unsigned long flags);
+int qxl_device_init(struct qxl_device *qdev, struct drm_driver *drv,
+		    struct pci_dev *pdev, unsigned long flags);
 void qxl_device_fini(struct qxl_device *qdev);
 
 int qxl_modeset_init(struct qxl_device *qdev);
diff --git a/drivers/gpu/drm/qxl/qxl_fb.c b/drivers/gpu/drm/qxl/qxl_fb.c
index e6ade6aab54c..d479b7a7abe4 100644
--- a/drivers/gpu/drm/qxl/qxl_fb.c
+++ b/drivers/gpu/drm/qxl/qxl_fb.c
@@ -268,7 +268,7 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev,
 
 	info->par = qfbdev;
 
-	qxl_framebuffer_init(qdev->ddev, &qfbdev->qfb, &mode_cmd, gobj,
+	qxl_framebuffer_init(&qdev->ddev, &qfbdev->qfb, &mode_cmd, gobj,
 			     &qxlfb_fb_funcs);
 
 	fb = &qfbdev->qfb.base;
@@ -297,7 +297,7 @@ static int qxlfb_create(struct qxl_fbdev *qfbdev,
 			       sizes->fb_height);
 
 	/* setup aperture base/size for vesafb takeover */
-	info->apertures->ranges[0].base = qdev->ddev->mode_config.fb_base;
+	info->apertures->ranges[0].base = qdev->ddev.mode_config.fb_base;
 	info->apertures->ranges[0].size = qdev->vram_size;
 
 	info->fix.mmio_start = 0;
@@ -395,11 +395,10 @@ int qxl_fbdev_init(struct qxl_device *qdev)
 	spin_lock_init(&qfbdev->delayed_ops_lock);
 	INIT_LIST_HEAD(&qfbdev->delayed_ops);
 
-	drm_fb_helper_prepare(qdev->ddev, &qfbdev->helper,
+	drm_fb_helper_prepare(&qdev->ddev, &qfbdev->helper,
 			      &qxl_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(qdev->ddev, &qfbdev->helper,
-				 qxl_num_crtc /* num_crtc - QXL supports just 1 */,
+	ret = drm_fb_helper_init(&qdev->ddev, &qfbdev->helper,
 				 QXLFB_CONN_LIMIT);
 	if (ret)
 		goto free;
@@ -426,7 +425,7 @@ void qxl_fbdev_fini(struct qxl_device *qdev)
 	if (!qdev->mode_info.qfbdev)
 		return;
 
-	qxl_fbdev_destroy(qdev->ddev, qdev->mode_info.qfbdev);
+	qxl_fbdev_destroy(&qdev->ddev, qdev->mode_info.qfbdev);
 	kfree(qdev->mode_info.qfbdev);
 	qdev->mode_info.qfbdev = NULL;
 }
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 5a4c8c492683..0b82a87916ae 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -64,7 +64,7 @@ static int qxl_map_ioctl(struct drm_device *dev, void *data,
 	struct qxl_device *qdev = dev->dev_private;
 	struct drm_qxl_map *qxl_map = data;
 
-	return qxl_mode_dumb_mmap(file_priv, qdev->ddev, qxl_map->handle,
+	return qxl_mode_dumb_mmap(file_priv, &qdev->ddev, qxl_map->handle,
 				  &qxl_map->offset);
 }
 
@@ -375,7 +375,7 @@ static int qxl_clientcap_ioctl(struct drm_device *dev, void *data,
 	byte = param->index / 8;
 	idx = param->index % 8;
 
-	if (qdev->pdev->revision < 4)
+	if (dev->pdev->revision < 4)
 		return -ENOSYS;
 
 	if (byte >= 58)
diff --git a/drivers/gpu/drm/qxl/qxl_irq.c b/drivers/gpu/drm/qxl/qxl_irq.c
index 0bf1e20c6e44..23a40106ab53 100644
--- a/drivers/gpu/drm/qxl/qxl_irq.c
+++ b/drivers/gpu/drm/qxl/qxl_irq.c
@@ -90,7 +90,7 @@ int qxl_irq_init(struct qxl_device *qdev)
 	atomic_set(&qdev->irq_received_cursor, 0);
 	atomic_set(&qdev->irq_received_io_cmd, 0);
 	qdev->irq_received_error = 0;
-	ret = drm_irq_install(qdev->ddev, qdev->ddev->pdev->irq);
+	ret = drm_irq_install(&qdev->ddev, qdev->ddev.pdev->irq);
 	qdev->ram_header->int_mask = QXL_INTERRUPT_MASK;
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Failed installing irq: %d\n", ret);
diff --git a/drivers/gpu/drm/qxl/qxl_kms.c b/drivers/gpu/drm/qxl/qxl_kms.c
index d0666f5dccd6..2dcd5c14cb56 100644
--- a/drivers/gpu/drm/qxl/qxl_kms.c
+++ b/drivers/gpu/drm/qxl/qxl_kms.c
@@ -116,15 +116,20 @@ static void qxl_gc_work(struct work_struct *work)
 }
 
 int qxl_device_init(struct qxl_device *qdev,
-		    struct drm_device *ddev,
+		    struct drm_driver *drv,
 		    struct pci_dev *pdev,
 		    unsigned long flags)
 {
 	int r, sb;
 
-	qdev->dev = &pdev->dev;
-	qdev->ddev = ddev;
-	qdev->pdev = pdev;
+	r = drm_dev_init(&qdev->ddev, drv, &pdev->dev);
+	if (r)
+		return r;
+
+	qdev->ddev.pdev = pdev;
+	pci_set_drvdata(pdev, &qdev->ddev);
+	qdev->ddev.dev_private = qdev;
+
 	qdev->flags = flags;
 
 	mutex_init(&qdev->gem.mutex);
diff --git a/drivers/gpu/drm/qxl/qxl_object.c b/drivers/gpu/drm/qxl/qxl_object.c
index fa5440dc9a19..dbc13510a1f8 100644
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@@ -93,7 +93,7 @@ int qxl_bo_create(struct qxl_device *qdev,
 	if (bo == NULL)
 		return -ENOMEM;
 	size = roundup(size, PAGE_SIZE);
-	r = drm_gem_object_init(qdev->ddev, &bo->gem_base, size);
+	r = drm_gem_object_init(&qdev->ddev, &bo->gem_base, size);
 	if (unlikely(r)) {
 		kfree(bo);
 		return r;
@@ -113,7 +113,7 @@ int qxl_bo_create(struct qxl_device *qdev,
 			NULL, NULL, &qxl_ttm_bo_destroy);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS)
-			dev_err(qdev->dev,
+			dev_err(qdev->ddev.dev,
 				"object_init failed for (%lu, 0x%08X)\n",
 				size, domain);
 		return r;
@@ -223,7 +223,7 @@ struct qxl_bo *qxl_bo_ref(struct qxl_bo *bo)
 
 int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr)
 {
-	struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
+	struct drm_device *ddev = bo->gem_base.dev;
 	int r;
 
 	if (bo->pin_count) {
@@ -240,17 +240,17 @@ int qxl_bo_pin(struct qxl_bo *bo, u32 domain, u64 *gpu_addr)
 			*gpu_addr = qxl_bo_gpu_offset(bo);
 	}
 	if (unlikely(r != 0))
-		dev_err(qdev->dev, "%p pin failed\n", bo);
+		dev_err(ddev->dev, "%p pin failed\n", bo);
 	return r;
 }
 
 int qxl_bo_unpin(struct qxl_bo *bo)
 {
-	struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
+	struct drm_device *ddev = bo->gem_base.dev;
 	int r, i;
 
 	if (!bo->pin_count) {
-		dev_warn(qdev->dev, "%p unpin not necessary\n", bo);
+		dev_warn(ddev->dev, "%p unpin not necessary\n", bo);
 		return 0;
 	}
 	bo->pin_count--;
@@ -260,7 +260,7 @@ int qxl_bo_unpin(struct qxl_bo *bo)
 		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
 	if (unlikely(r != 0))
-		dev_err(qdev->dev, "%p validate failed for unpin\n", bo);
+		dev_err(ddev->dev, "%p validate failed for unpin\n", bo);
 	return r;
 }
 
@@ -270,9 +270,9 @@ void qxl_bo_force_delete(struct qxl_device *qdev)
 
 	if (list_empty(&qdev->gem.objects))
 		return;
-	dev_err(qdev->dev, "Userspace still has active objects !\n");
+	dev_err(qdev->ddev.dev, "Userspace still has active objects !\n");
 	list_for_each_entry_safe(bo, n, &qdev->gem.objects, list) {
-		dev_err(qdev->dev, "%p %p %lu %lu force free\n",
+		dev_err(qdev->ddev.dev, "%p %p %lu %lu force free\n",
 			&bo->gem_base, bo, (unsigned long)bo->gem_base.size,
 			*((unsigned long *)&bo->gem_base.refcount));
 		mutex_lock(&qdev->gem.mutex);
diff --git a/drivers/gpu/drm/qxl/qxl_object.h b/drivers/gpu/drm/qxl/qxl_object.h
index 4d8311373ba3..0374fd93f4d6 100644
--- a/drivers/gpu/drm/qxl/qxl_object.h
+++ b/drivers/gpu/drm/qxl/qxl_object.h
@@ -34,8 +34,8 @@ static inline int qxl_bo_reserve(struct qxl_bo *bo, bool no_wait)
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, NULL);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS) {
-			struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
-			dev_err(qdev->dev, "%p reserve failed\n", bo);
+			struct drm_device *ddev = bo->gem_base.dev;
+			dev_err(ddev->dev, "%p reserve failed\n", bo);
 		}
 		return r;
 	}
@@ -70,8 +70,8 @@ static inline int qxl_bo_wait(struct qxl_bo *bo, u32 *mem_type,
 	r = ttm_bo_reserve(&bo->tbo, true, no_wait, NULL);
 	if (unlikely(r != 0)) {
 		if (r != -ERESTARTSYS) {
-			struct qxl_device *qdev = (struct qxl_device *)bo->gem_base.dev->dev_private;
-			dev_err(qdev->dev, "%p reserve failed for wait\n",
+			struct drm_device *ddev = bo->gem_base.dev;
+			dev_err(ddev->dev, "%p reserve failed for wait\n",
 				bo);
 		}
 		return r;
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 3dcc48431015..7d1cab57c89e 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -105,15 +105,15 @@ static void qxl_ttm_global_fini(struct qxl_device *qdev)
 static struct vm_operations_struct qxl_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops;
 
-static int qxl_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int qxl_ttm_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo;
 	int r;
 
-	bo = (struct ttm_buffer_object *)vma->vm_private_data;
+	bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
 	if (bo == NULL)
 		return VM_FAULT_NOPAGE;
-	r = ttm_vm_ops->fault(vma, vmf);
+	r = ttm_vm_ops->fault(vmf);
 	return r;
 }
 
@@ -408,7 +408,7 @@ int qxl_ttm_init(struct qxl_device *qdev)
 	r = ttm_bo_device_init(&qdev->mman.bdev,
 			       qdev->mman.bo_global_ref.ref.object,
 			       &qxl_bo_driver,
-			       qdev->ddev->anon_inode->i_mapping,
+			       qdev->ddev.anon_inode->i_mapping,
 			       DRM_FILE_PAGE_OFFSET, 0);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index ab89eed9ddd9..4b86e8b45009 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -181,7 +181,7 @@
 #define HW_ASSISTED_I2C_STATUS_FAILURE          2
 #define HW_ASSISTED_I2C_STATUS_SUCCESS          1
 
-#pragma pack(1)                                       /* BIOS data must use byte aligment */
+#pragma pack(1)                                       /* BIOS data must use byte alignment */
 
 /*  Define offset to location of ROM header. */
 
@@ -3883,7 +3883,7 @@ typedef struct _ATOM_GPIO_PIN_ASSIGNMENT
 }ATOM_GPIO_PIN_ASSIGNMENT;
 
 //ucGPIO_ID pre-define id for multiple usage
-//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC swithing feature is enable
+//from SMU7.x, if ucGPIO_ID=PP_AC_DC_SWITCH_GPIO_PINID in GPIO_LUTTable, AC/DC switching feature is enable
 #define PP_AC_DC_SWITCH_GPIO_PINID          60
 //from SMU7.x, if ucGPIO_ID=VDDC_REGULATOR_VRHOT_GPIO_PINID in GPIO_LUTable, VRHot feature is enable
 #define VDDC_VRHOT_GPIO_PINID               61
@@ -7909,7 +7909,7 @@ typedef struct  _ATOM_POWERPLAY_INFO_V3
 
 /*********************************************************************************/
 
-#pragma pack() // BIOS data must use byte aligment
+#pragma pack() // BIOS data must use byte alignment
 
 //
 // AMD ACPI Table
diff --git a/drivers/gpu/drm/radeon/radeon_bios.c b/drivers/gpu/drm/radeon/radeon_bios.c
index 00cfb5d2875f..04c0ed41374f 100644
--- a/drivers/gpu/drm/radeon/radeon_bios.c
+++ b/drivers/gpu/drm/radeon/radeon_bios.c
@@ -638,10 +638,8 @@ static bool radeon_acpi_vfct_bios(struct radeon_device *rdev)
 					     vhdr->ImageLength,
 					     GFP_KERNEL);
 
-			if (!rdev->bios) {
-				kfree(rdev->bios);
+			if (!rdev->bios)
 				return false;
-			}
 			return true;
 		}
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_cursor.c b/drivers/gpu/drm/radeon/radeon_cursor.c
index fb16070b266e..4a4f9533c53b 100644
--- a/drivers/gpu/drm/radeon/radeon_cursor.c
+++ b/drivers/gpu/drm/radeon/radeon_cursor.c
@@ -205,8 +205,8 @@ static int radeon_cursor_move_locked(struct drm_crtc *crtc, int x, int y)
 	}
 
 	if (x <= (crtc->x - w) || y <= (crtc->y - radeon_crtc->cursor_height) ||
-	    x >= (crtc->x + crtc->mode.crtc_hdisplay) ||
-	    y >= (crtc->y + crtc->mode.crtc_vdisplay))
+	    x >= (crtc->x + crtc->mode.hdisplay) ||
+	    y >= (crtc->y + crtc->mode.vdisplay))
 		goto out_of_bounds;
 
 	x += xorigin;
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index af3bbe82fd48..956c425e639e 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -97,9 +97,10 @@
  *   2.46.0 - Add PFP_SYNC_ME support on evergreen
  *   2.47.0 - Add UVD_NO_OP register support
  *   2.48.0 - TA_CS_BC_BASE_ADDR allowed on SI
+ *   2.49.0 - DRM_RADEON_GEM_INFO ioctl returns correct vram_size/visible values
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	48
+#define KMS_DRIVER_MINOR	49
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 void radeon_driver_unload_kms(struct drm_device *dev);
@@ -366,11 +367,10 @@ static void
 radeon_pci_shutdown(struct pci_dev *pdev)
 {
 	/* if we are running in a VM, make sure the device
-	 * torn down properly on reboot/shutdown.
-	 * unfortunately we can't detect certain
-	 * hypervisors so just do this all the time.
+	 * torn down properly on reboot/shutdown
 	 */
-	radeon_pci_remove(pdev);
+	if (radeon_device_is_virtual())
+		radeon_pci_remove(pdev);
 }
 
 static int radeon_pmops_suspend(struct device *dev)
diff --git a/drivers/gpu/drm/radeon/radeon_fb.c b/drivers/gpu/drm/radeon/radeon_fb.c
index 6c10a83f3362..2be4fe9c7217 100644
--- a/drivers/gpu/drm/radeon/radeon_fb.c
+++ b/drivers/gpu/drm/radeon/radeon_fb.c
@@ -366,7 +366,6 @@ int radeon_fbdev_init(struct radeon_device *rdev)
 			      &radeon_fb_helper_funcs);
 
 	ret = drm_fb_helper_init(rdev->ddev, &rfbdev->helper,
-				 rdev->num_crtc,
 				 RADEONFB_CONN_LIMIT);
 	if (ret)
 		goto free;
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 0bcffd8a7bd3..96683f5b2b1b 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -220,8 +220,8 @@ int radeon_gem_info_ioctl(struct drm_device *dev, void *data,
 
 	man = &rdev->mman.bdev.man[TTM_PL_VRAM];
 
-	args->vram_size = rdev->mc.real_vram_size;
-	args->vram_visible = (u64)man->size << PAGE_SHIFT;
+	args->vram_size = (u64)man->size << PAGE_SHIFT;
+	args->vram_visible = rdev->mc.visible_vram_size;
 	args->vram_visible -= rdev->vram_pin_size;
 	args->gart_size = rdev->mc.gtt_size;
 	args->gart_size -= rdev->gart_pin_size;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 7a10b3852970..684f1703aa5c 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -979,19 +979,19 @@ void radeon_ttm_set_active_vram_size(struct radeon_device *rdev, u64 size)
 static struct vm_operations_struct radeon_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops = NULL;
 
-static int radeon_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int radeon_ttm_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo;
 	struct radeon_device *rdev;
 	int r;
 
-	bo = (struct ttm_buffer_object *)vma->vm_private_data;	
+	bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
 	if (bo == NULL) {
 		return VM_FAULT_NOPAGE;
 	}
 	rdev = radeon_get_rdev(bo->bdev);
 	down_read(&rdev->pm.mclk_lock);
-	r = ttm_vm_ops->fault(vma, vmf);
+	r = ttm_vm_ops->fault(vmf);
 	up_read(&rdev->pm.mclk_lock);
 	return r;
 }
diff --git a/drivers/gpu/drm/radeon/vce_v1_0.c b/drivers/gpu/drm/radeon/vce_v1_0.c
index a01efe39a820..f541a4b5ac51 100644
--- a/drivers/gpu/drm/radeon/vce_v1_0.c
+++ b/drivers/gpu/drm/radeon/vce_v1_0.c
@@ -196,7 +196,7 @@ int vce_v1_0_load_fw(struct radeon_device *rdev, uint32_t *data)
 	memset(&data[5], 0, 44);
 	memcpy(&data[16], &sign[1], rdev->vce_fw->size - sizeof(*sign));
 
-	data += le32_to_cpu(data[4]) / 4;
+	data += (le32_to_cpu(sign->len) + 64) / 4;
 	data[0] = sign->val[i].sigval[0];
 	data[1] = sign->val[i].sigval[1];
 	data[2] = sign->val[i].sigval[2];
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index b5d3f16cfa12..ff61f6032f2c 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -662,7 +662,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 	drm_kms_helper_poll_init(dev);
 
 	if (dev->mode_config.num_connector) {
-		fbdev = drm_fbdev_cma_init(dev, 32, dev->mode_config.num_crtc,
+		fbdev = drm_fbdev_cma_init(dev, 32,
 					   dev->mode_config.num_connector);
 		if (IS_ERR(fbdev))
 			return PTR_ERR(fbdev);
diff --git a/drivers/gpu/drm/rockchip/Kconfig b/drivers/gpu/drm/rockchip/Kconfig
index 6f7f9c59f05b..0e4eb845cbb0 100644
--- a/drivers/gpu/drm/rockchip/Kconfig
+++ b/drivers/gpu/drm/rockchip/Kconfig
@@ -21,6 +21,17 @@ config ROCKCHIP_ANALOGIX_DP
 	  for the Analogix Core DP driver. If you want to enable DP
 	  on RK3288 based SoC, you should selet this option.
 
+config ROCKCHIP_CDN_DP
+        tristate "Rockchip cdn DP"
+        depends on DRM_ROCKCHIP
+	depends on EXTCON
+	select SND_SOC_HDMI_CODEC if SND_SOC
+        help
+	  This selects support for Rockchip SoC specific extensions
+	  for the cdn DP driver. If you want to enable Dp on
+	  RK3399 based SoC, you should select this
+	  option.
+
 config ROCKCHIP_DW_HDMI
         tristate "Rockchip specific extensions for Synopsys DW HDMI"
         depends on DRM_ROCKCHIP
diff --git a/drivers/gpu/drm/rockchip/Makefile b/drivers/gpu/drm/rockchip/Makefile
index 9746365694ba..c931e2a7d8de 100644
--- a/drivers/gpu/drm/rockchip/Makefile
+++ b/drivers/gpu/drm/rockchip/Makefile
@@ -7,6 +7,8 @@ rockchipdrm-y := rockchip_drm_drv.o rockchip_drm_fb.o \
 rockchipdrm-$(CONFIG_DRM_FBDEV_EMULATION) += rockchip_drm_fbdev.o
 
 obj-$(CONFIG_ROCKCHIP_ANALOGIX_DP) += analogix_dp-rockchip.o
+obj-$(CONFIG_ROCKCHIP_CDN_DP) += cdn-dp.o
+cdn-dp-objs := cdn-dp-core.o cdn-dp-reg.o
 obj-$(CONFIG_ROCKCHIP_DW_HDMI) += dw_hdmi-rockchip.o
 obj-$(CONFIG_ROCKCHIP_DW_MIPI_DSI) += dw-mipi-dsi.o
 obj-$(CONFIG_ROCKCHIP_INNO_HDMI) += inno_hdmi.o
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
new file mode 100644
index 000000000000..fd79a70b8552
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -0,0 +1,1262 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_of.h>
+
+#include <linux/clk.h>
+#include <linux/component.h>
+#include <linux/extcon.h>
+#include <linux/firmware.h>
+#include <linux/regmap.h>
+#include <linux/reset.h>
+#include <linux/mfd/syscon.h>
+#include <linux/phy/phy.h>
+
+#include <sound/hdmi-codec.h>
+
+#include "cdn-dp-core.h"
+#include "cdn-dp-reg.h"
+#include "rockchip_drm_vop.h"
+
+#define connector_to_dp(c) \
+		container_of(c, struct cdn_dp_device, connector)
+
+#define encoder_to_dp(c) \
+		container_of(c, struct cdn_dp_device, encoder)
+
+#define GRF_SOC_CON9		0x6224
+#define DP_SEL_VOP_LIT		BIT(12)
+#define GRF_SOC_CON26		0x6268
+#define UPHY_SEL_BIT		3
+#define UPHY_SEL_MASK		BIT(19)
+#define DPTX_HPD_SEL		(3 << 12)
+#define DPTX_HPD_DEL		(2 << 12)
+#define DPTX_HPD_SEL_MASK	(3 << 28)
+
+#define CDN_FW_TIMEOUT_MS	(64 * 1000)
+#define CDN_DPCD_TIMEOUT_MS	5000
+#define CDN_DP_FIRMWARE		"rockchip/dptx.bin"
+
+struct cdn_dp_data {
+	u8 max_phy;
+};
+
+struct cdn_dp_data rk3399_cdn_dp = {
+	.max_phy = 2,
+};
+
+static const struct of_device_id cdn_dp_dt_ids[] = {
+	{ .compatible = "rockchip,rk3399-cdn-dp",
+		.data = (void *)&rk3399_cdn_dp },
+	{}
+};
+
+MODULE_DEVICE_TABLE(of, cdn_dp_dt_ids);
+
+static int cdn_dp_grf_write(struct cdn_dp_device *dp,
+			    unsigned int reg, unsigned int val)
+{
+	int ret;
+
+	ret = clk_prepare_enable(dp->grf_clk);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to prepare_enable grf clock\n");
+		return ret;
+	}
+
+	ret = regmap_write(dp->grf, reg, val);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Could not write to GRF: %d\n", ret);
+		return ret;
+	}
+
+	clk_disable_unprepare(dp->grf_clk);
+
+	return 0;
+}
+
+static int cdn_dp_clk_enable(struct cdn_dp_device *dp)
+{
+	int ret;
+	u32 rate;
+
+	ret = clk_prepare_enable(dp->pclk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dp->dev, "cannot enable dp pclk %d\n", ret);
+		goto err_pclk;
+	}
+
+	ret = clk_prepare_enable(dp->core_clk);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dp->dev, "cannot enable core_clk %d\n", ret);
+		goto err_core_clk;
+	}
+
+	ret = pm_runtime_get_sync(dp->dev);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dp->dev, "cannot get pm runtime %d\n", ret);
+		goto err_pm_runtime_get;
+	}
+
+	reset_control_assert(dp->core_rst);
+	reset_control_assert(dp->dptx_rst);
+	reset_control_assert(dp->apb_rst);
+	reset_control_deassert(dp->core_rst);
+	reset_control_deassert(dp->dptx_rst);
+	reset_control_deassert(dp->apb_rst);
+
+	rate = clk_get_rate(dp->core_clk);
+	if (!rate) {
+		DRM_DEV_ERROR(dp->dev, "get clk rate failed: %d\n", rate);
+		goto err_set_rate;
+	}
+
+	cdn_dp_set_fw_clk(dp, rate);
+	cdn_dp_clock_reset(dp);
+
+	return 0;
+
+err_set_rate:
+	pm_runtime_put(dp->dev);
+err_pm_runtime_get:
+	clk_disable_unprepare(dp->core_clk);
+err_core_clk:
+	clk_disable_unprepare(dp->pclk);
+err_pclk:
+	return ret;
+}
+
+static void cdn_dp_clk_disable(struct cdn_dp_device *dp)
+{
+	pm_runtime_put_sync(dp->dev);
+	clk_disable_unprepare(dp->pclk);
+	clk_disable_unprepare(dp->core_clk);
+}
+
+static int cdn_dp_get_port_lanes(struct cdn_dp_port *port)
+{
+	struct extcon_dev *edev = port->extcon;
+	union extcon_property_value property;
+	int dptx;
+	u8 lanes;
+
+	dptx = extcon_get_state(edev, EXTCON_DISP_DP);
+	if (dptx > 0) {
+		extcon_get_property(edev, EXTCON_DISP_DP,
+				    EXTCON_PROP_USB_SS, &property);
+		if (property.intval)
+			lanes = 2;
+		else
+			lanes = 4;
+	} else {
+		lanes = 0;
+	}
+
+	return lanes;
+}
+
+static int cdn_dp_get_sink_count(struct cdn_dp_device *dp, u8 *sink_count)
+{
+	int ret;
+	u8 value;
+
+	*sink_count = 0;
+	ret = cdn_dp_dpcd_read(dp, DP_SINK_COUNT, &value, 1);
+	if (ret)
+		return ret;
+
+	*sink_count = DP_GET_SINK_COUNT(value);
+	return 0;
+}
+
+static struct cdn_dp_port *cdn_dp_connected_port(struct cdn_dp_device *dp)
+{
+	struct cdn_dp_port *port;
+	int i, lanes;
+
+	for (i = 0; i < dp->ports; i++) {
+		port = dp->port[i];
+		lanes = cdn_dp_get_port_lanes(port);
+		if (lanes)
+			return port;
+	}
+	return NULL;
+}
+
+static bool cdn_dp_check_sink_connection(struct cdn_dp_device *dp)
+{
+	unsigned long timeout = jiffies + msecs_to_jiffies(CDN_DPCD_TIMEOUT_MS);
+	struct cdn_dp_port *port;
+	u8 sink_count = 0;
+
+	if (dp->active_port < 0 || dp->active_port >= dp->ports) {
+		DRM_DEV_ERROR(dp->dev, "active_port is wrong!\n");
+		return false;
+	}
+
+	port = dp->port[dp->active_port];
+
+	/*
+	 * Attempt to read sink count, retry in case the sink may not be ready.
+	 *
+	 * Sinks are *supposed* to come up within 1ms from an off state, but
+	 * some docks need more time to power up.
+	 */
+	while (time_before(jiffies, timeout)) {
+		if (!extcon_get_state(port->extcon, EXTCON_DISP_DP))
+			return false;
+
+		if (!cdn_dp_get_sink_count(dp, &sink_count))
+			return sink_count ? true : false;
+
+		usleep_range(5000, 10000);
+	}
+
+	DRM_DEV_ERROR(dp->dev, "Get sink capability timed out\n");
+	return false;
+}
+
+static enum drm_connector_status
+cdn_dp_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct cdn_dp_device *dp = connector_to_dp(connector);
+	enum drm_connector_status status = connector_status_disconnected;
+
+	mutex_lock(&dp->lock);
+	if (dp->connected)
+		status = connector_status_connected;
+	mutex_unlock(&dp->lock);
+
+	return status;
+}
+
+static void cdn_dp_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static const struct drm_connector_funcs cdn_dp_atomic_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.detect = cdn_dp_connector_detect,
+	.destroy = cdn_dp_connector_destroy,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static int cdn_dp_connector_get_modes(struct drm_connector *connector)
+{
+	struct cdn_dp_device *dp = connector_to_dp(connector);
+	struct edid *edid;
+	int ret = 0;
+
+	mutex_lock(&dp->lock);
+	edid = dp->edid;
+	if (edid) {
+		DRM_DEV_DEBUG_KMS(dp->dev, "got edid: width[%d] x height[%d]\n",
+				  edid->width_cm, edid->height_cm);
+
+		dp->sink_has_audio = drm_detect_monitor_audio(edid);
+		ret = drm_add_edid_modes(connector, edid);
+		if (ret) {
+			drm_mode_connector_update_edid_property(connector,
+								edid);
+			drm_edid_to_eld(connector, edid);
+		}
+	}
+	mutex_unlock(&dp->lock);
+
+	return ret;
+}
+
+static struct drm_encoder *
+cdn_dp_connector_best_encoder(struct drm_connector *connector)
+{
+	struct cdn_dp_device *dp = connector_to_dp(connector);
+
+	return &dp->encoder;
+}
+
+static int cdn_dp_connector_mode_valid(struct drm_connector *connector,
+				       struct drm_display_mode *mode)
+{
+	struct cdn_dp_device *dp = connector_to_dp(connector);
+	struct drm_display_info *display_info = &dp->connector.display_info;
+	u32 requested, actual, rate, sink_max, source_max = 0;
+	u8 lanes, bpc;
+
+	/* If DP is disconnected, every mode is invalid */
+	if (!dp->connected)
+		return MODE_BAD;
+
+	switch (display_info->bpc) {
+	case 10:
+		bpc = 10;
+		break;
+	case 6:
+		bpc = 6;
+		break;
+	default:
+		bpc = 8;
+		break;
+	}
+
+	requested = mode->clock * bpc * 3 / 1000;
+
+	source_max = dp->lanes;
+	sink_max = drm_dp_max_lane_count(dp->dpcd);
+	lanes = min(source_max, sink_max);
+
+	source_max = drm_dp_bw_code_to_link_rate(CDN_DP_MAX_LINK_RATE);
+	sink_max = drm_dp_max_link_rate(dp->dpcd);
+	rate = min(source_max, sink_max);
+
+	actual = rate * lanes / 100;
+
+	/* efficiency is about 0.8 */
+	actual = actual * 8 / 10;
+
+	if (requested > actual) {
+		DRM_DEV_DEBUG_KMS(dp->dev,
+				  "requested=%d, actual=%d, clock=%d\n",
+				  requested, actual, mode->clock);
+		return MODE_CLOCK_HIGH;
+	}
+
+	return MODE_OK;
+}
+
+static struct drm_connector_helper_funcs cdn_dp_connector_helper_funcs = {
+	.get_modes = cdn_dp_connector_get_modes,
+	.best_encoder = cdn_dp_connector_best_encoder,
+	.mode_valid = cdn_dp_connector_mode_valid,
+};
+
+static int cdn_dp_firmware_init(struct cdn_dp_device *dp)
+{
+	int ret;
+	const u32 *iram_data, *dram_data;
+	const struct firmware *fw = dp->fw;
+	const struct cdn_firmware_header *hdr;
+
+	hdr = (struct cdn_firmware_header *)fw->data;
+	if (fw->size != le32_to_cpu(hdr->size_bytes)) {
+		DRM_DEV_ERROR(dp->dev, "firmware is invalid\n");
+		return -EINVAL;
+	}
+
+	iram_data = (const u32 *)(fw->data + hdr->header_size);
+	dram_data = (const u32 *)(fw->data + hdr->header_size + hdr->iram_size);
+
+	ret = cdn_dp_load_firmware(dp, iram_data, hdr->iram_size,
+				   dram_data, hdr->dram_size);
+	if (ret)
+		return ret;
+
+	ret = cdn_dp_set_firmware_active(dp, true);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "active ucpu failed: %d\n", ret);
+		return ret;
+	}
+
+	return cdn_dp_event_config(dp);
+}
+
+static int cdn_dp_get_sink_capability(struct cdn_dp_device *dp)
+{
+	int ret;
+
+	if (!cdn_dp_check_sink_connection(dp))
+		return -ENODEV;
+
+	ret = cdn_dp_dpcd_read(dp, DP_DPCD_REV, dp->dpcd,
+			       DP_RECEIVER_CAP_SIZE);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to get caps %d\n", ret);
+		return ret;
+	}
+
+	kfree(dp->edid);
+	dp->edid = drm_do_get_edid(&dp->connector,
+				   cdn_dp_get_edid_block, dp);
+	return 0;
+}
+
+static int cdn_dp_enable_phy(struct cdn_dp_device *dp, struct cdn_dp_port *port)
+{
+	union extcon_property_value property;
+	int ret;
+
+	ret = cdn_dp_grf_write(dp, GRF_SOC_CON26,
+			       (port->id << UPHY_SEL_BIT) | UPHY_SEL_MASK);
+	if (ret)
+		return ret;
+
+	if (!port->phy_enabled) {
+		ret = phy_power_on(port->phy);
+		if (ret) {
+			DRM_DEV_ERROR(dp->dev, "phy power on failed: %d\n",
+				      ret);
+			goto err_phy;
+		}
+		port->phy_enabled = true;
+	}
+
+	ret = cdn_dp_grf_write(dp, GRF_SOC_CON26,
+			       DPTX_HPD_SEL_MASK | DPTX_HPD_SEL);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to write HPD_SEL %d\n", ret);
+		goto err_power_on;
+	}
+
+	ret = cdn_dp_get_hpd_status(dp);
+	if (ret <= 0) {
+		if (!ret)
+			DRM_DEV_ERROR(dp->dev, "hpd does not exist\n");
+		goto err_power_on;
+	}
+
+	ret = extcon_get_property(port->extcon, EXTCON_DISP_DP,
+				  EXTCON_PROP_USB_TYPEC_POLARITY, &property);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "get property failed\n");
+		goto err_power_on;
+	}
+
+	port->lanes = cdn_dp_get_port_lanes(port);
+	ret = cdn_dp_set_host_cap(dp, port->lanes, property.intval);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "set host capabilities failed: %d\n",
+			      ret);
+		goto err_power_on;
+	}
+
+	dp->active_port = port->id;
+	return 0;
+
+err_power_on:
+	if (phy_power_off(port->phy))
+		DRM_DEV_ERROR(dp->dev, "phy power off failed: %d", ret);
+	else
+		port->phy_enabled = false;
+
+err_phy:
+	cdn_dp_grf_write(dp, GRF_SOC_CON26,
+			 DPTX_HPD_SEL_MASK | DPTX_HPD_DEL);
+	return ret;
+}
+
+static int cdn_dp_disable_phy(struct cdn_dp_device *dp,
+			      struct cdn_dp_port *port)
+{
+	int ret;
+
+	if (port->phy_enabled) {
+		ret = phy_power_off(port->phy);
+		if (ret) {
+			DRM_DEV_ERROR(dp->dev, "phy power off failed: %d", ret);
+			return ret;
+		}
+	}
+
+	port->phy_enabled = false;
+	port->lanes = 0;
+	dp->active_port = -1;
+	return 0;
+}
+
+static int cdn_dp_disable(struct cdn_dp_device *dp)
+{
+	int ret, i;
+
+	if (!dp->active)
+		return 0;
+
+	for (i = 0; i < dp->ports; i++)
+		cdn_dp_disable_phy(dp, dp->port[i]);
+
+	ret = cdn_dp_grf_write(dp, GRF_SOC_CON26,
+			       DPTX_HPD_SEL_MASK | DPTX_HPD_DEL);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to clear hpd sel %d\n",
+			      ret);
+		return ret;
+	}
+
+	cdn_dp_set_firmware_active(dp, false);
+	cdn_dp_clk_disable(dp);
+	dp->active = false;
+	dp->link.rate = 0;
+	dp->link.num_lanes = 0;
+	if (!dp->connected) {
+		kfree(dp->edid);
+		dp->edid = NULL;
+	}
+
+	return 0;
+}
+
+static int cdn_dp_enable(struct cdn_dp_device *dp)
+{
+	int ret, i, lanes;
+	struct cdn_dp_port *port;
+
+	port = cdn_dp_connected_port(dp);
+	if (!port) {
+		DRM_DEV_ERROR(dp->dev,
+			      "Can't enable without connection\n");
+		return -ENODEV;
+	}
+
+	if (dp->active)
+		return 0;
+
+	ret = cdn_dp_clk_enable(dp);
+	if (ret)
+		return ret;
+
+	ret = cdn_dp_firmware_init(dp);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "firmware init failed: %d", ret);
+		goto err_clk_disable;
+	}
+
+	/* only enable the port that connected with downstream device */
+	for (i = port->id; i < dp->ports; i++) {
+		port = dp->port[i];
+		lanes = cdn_dp_get_port_lanes(port);
+		if (lanes) {
+			ret = cdn_dp_enable_phy(dp, port);
+			if (ret)
+				continue;
+
+			ret = cdn_dp_get_sink_capability(dp);
+			if (ret) {
+				cdn_dp_disable_phy(dp, port);
+			} else {
+				dp->active = true;
+				dp->lanes = port->lanes;
+				return 0;
+			}
+		}
+	}
+
+err_clk_disable:
+	cdn_dp_clk_disable(dp);
+	return ret;
+}
+
+static void cdn_dp_encoder_mode_set(struct drm_encoder *encoder,
+				    struct drm_display_mode *mode,
+				    struct drm_display_mode *adjusted)
+{
+	struct cdn_dp_device *dp = encoder_to_dp(encoder);
+	struct drm_display_info *display_info = &dp->connector.display_info;
+	struct video_info *video = &dp->video_info;
+
+	switch (display_info->bpc) {
+	case 10:
+		video->color_depth = 10;
+		break;
+	case 6:
+		video->color_depth = 6;
+		break;
+	default:
+		video->color_depth = 8;
+		break;
+	}
+
+	video->color_fmt = PXL_RGB;
+	video->v_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NVSYNC);
+	video->h_sync_polarity = !!(mode->flags & DRM_MODE_FLAG_NHSYNC);
+
+	memcpy(&dp->mode, adjusted, sizeof(*mode));
+}
+
+static bool cdn_dp_check_link_status(struct cdn_dp_device *dp)
+{
+	u8 link_status[DP_LINK_STATUS_SIZE];
+	struct cdn_dp_port *port = cdn_dp_connected_port(dp);
+	u8 sink_lanes = drm_dp_max_lane_count(dp->dpcd);
+
+	if (!port || !dp->link.rate || !dp->link.num_lanes)
+		return false;
+
+	if (cdn_dp_dpcd_read(dp, DP_LANE0_1_STATUS, link_status,
+			     DP_LINK_STATUS_SIZE)) {
+		DRM_ERROR("Failed to get link status\n");
+		return false;
+	}
+
+	/* if link training is requested we should perform it always */
+	return drm_dp_channel_eq_ok(link_status, min(port->lanes, sink_lanes));
+}
+
+static void cdn_dp_encoder_enable(struct drm_encoder *encoder)
+{
+	struct cdn_dp_device *dp = encoder_to_dp(encoder);
+	int ret, val;
+	struct rockchip_crtc_state *state;
+
+	ret = drm_of_encoder_active_endpoint_id(dp->dev->of_node, encoder);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dp->dev, "Could not get vop id, %d", ret);
+		return;
+	}
+
+	DRM_DEV_DEBUG_KMS(dp->dev, "vop %s output to cdn-dp\n",
+			  (ret) ? "LIT" : "BIG");
+	state = to_rockchip_crtc_state(encoder->crtc->state);
+	if (ret) {
+		val = DP_SEL_VOP_LIT | (DP_SEL_VOP_LIT << 16);
+		state->output_mode = ROCKCHIP_OUT_MODE_P888;
+	} else {
+		val = DP_SEL_VOP_LIT << 16;
+		state->output_mode = ROCKCHIP_OUT_MODE_AAAA;
+	}
+
+	ret = cdn_dp_grf_write(dp, GRF_SOC_CON9, val);
+	if (ret)
+		return;
+
+	mutex_lock(&dp->lock);
+
+	ret = cdn_dp_enable(dp);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to enable encoder %d\n",
+			      ret);
+		goto out;
+	}
+	if (!cdn_dp_check_link_status(dp)) {
+		ret = cdn_dp_train_link(dp);
+		if (ret) {
+			DRM_DEV_ERROR(dp->dev, "Failed link train %d\n", ret);
+			goto out;
+		}
+	}
+
+	ret = cdn_dp_set_video_status(dp, CONTROL_VIDEO_IDLE);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to idle video %d\n", ret);
+		goto out;
+	}
+
+	ret = cdn_dp_config_video(dp);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to config video %d\n", ret);
+		goto out;
+	}
+
+	ret = cdn_dp_set_video_status(dp, CONTROL_VIDEO_VALID);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to valid video %d\n", ret);
+		goto out;
+	}
+out:
+	mutex_unlock(&dp->lock);
+}
+
+static void cdn_dp_encoder_disable(struct drm_encoder *encoder)
+{
+	struct cdn_dp_device *dp = encoder_to_dp(encoder);
+	int ret;
+
+	mutex_lock(&dp->lock);
+	if (dp->active) {
+		ret = cdn_dp_disable(dp);
+		if (ret) {
+			DRM_DEV_ERROR(dp->dev, "Failed to disable encoder %d\n",
+				      ret);
+		}
+	}
+	mutex_unlock(&dp->lock);
+
+	/*
+	 * In the following 2 cases, we need to run the event_work to re-enable
+	 * the DP:
+	 * 1. If there is not just one port device is connected, and remove one
+	 *    device from a port, the DP will be disabled here, at this case,
+	 *    run the event_work to re-open DP for the other port.
+	 * 2. If re-training or re-config failed, the DP will be disabled here.
+	 *    run the event_work to re-connect it.
+	 */
+	if (!dp->connected && cdn_dp_connected_port(dp))
+		schedule_work(&dp->event_work);
+}
+
+static int cdn_dp_encoder_atomic_check(struct drm_encoder *encoder,
+				       struct drm_crtc_state *crtc_state,
+				       struct drm_connector_state *conn_state)
+{
+	struct rockchip_crtc_state *s = to_rockchip_crtc_state(crtc_state);
+
+	s->output_mode = ROCKCHIP_OUT_MODE_AAAA;
+	s->output_type = DRM_MODE_CONNECTOR_DisplayPort;
+
+	return 0;
+}
+
+static const struct drm_encoder_helper_funcs cdn_dp_encoder_helper_funcs = {
+	.mode_set = cdn_dp_encoder_mode_set,
+	.enable = cdn_dp_encoder_enable,
+	.disable = cdn_dp_encoder_disable,
+	.atomic_check = cdn_dp_encoder_atomic_check,
+};
+
+static const struct drm_encoder_funcs cdn_dp_encoder_funcs = {
+	.destroy = drm_encoder_cleanup,
+};
+
+static int cdn_dp_parse_dt(struct cdn_dp_device *dp)
+{
+	struct device *dev = dp->dev;
+	struct device_node *np = dev->of_node;
+	struct platform_device *pdev = to_platform_device(dev);
+	struct resource *res;
+
+	dp->grf = syscon_regmap_lookup_by_phandle(np, "rockchip,grf");
+	if (IS_ERR(dp->grf)) {
+		DRM_DEV_ERROR(dev, "cdn-dp needs rockchip,grf property\n");
+		return PTR_ERR(dp->grf);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	dp->regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(dp->regs)) {
+		DRM_DEV_ERROR(dev, "ioremap reg failed\n");
+		return PTR_ERR(dp->regs);
+	}
+
+	dp->core_clk = devm_clk_get(dev, "core-clk");
+	if (IS_ERR(dp->core_clk)) {
+		DRM_DEV_ERROR(dev, "cannot get core_clk_dp\n");
+		return PTR_ERR(dp->core_clk);
+	}
+
+	dp->pclk = devm_clk_get(dev, "pclk");
+	if (IS_ERR(dp->pclk)) {
+		DRM_DEV_ERROR(dev, "cannot get pclk\n");
+		return PTR_ERR(dp->pclk);
+	}
+
+	dp->spdif_clk = devm_clk_get(dev, "spdif");
+	if (IS_ERR(dp->spdif_clk)) {
+		DRM_DEV_ERROR(dev, "cannot get spdif_clk\n");
+		return PTR_ERR(dp->spdif_clk);
+	}
+
+	dp->grf_clk = devm_clk_get(dev, "grf");
+	if (IS_ERR(dp->grf_clk)) {
+		DRM_DEV_ERROR(dev, "cannot get grf clk\n");
+		return PTR_ERR(dp->grf_clk);
+	}
+
+	dp->spdif_rst = devm_reset_control_get(dev, "spdif");
+	if (IS_ERR(dp->spdif_rst)) {
+		DRM_DEV_ERROR(dev, "no spdif reset control found\n");
+		return PTR_ERR(dp->spdif_rst);
+	}
+
+	dp->dptx_rst = devm_reset_control_get(dev, "dptx");
+	if (IS_ERR(dp->dptx_rst)) {
+		DRM_DEV_ERROR(dev, "no uphy reset control found\n");
+		return PTR_ERR(dp->dptx_rst);
+	}
+
+	dp->core_rst = devm_reset_control_get(dev, "core");
+	if (IS_ERR(dp->core_rst)) {
+		DRM_DEV_ERROR(dev, "no core reset control found\n");
+		return PTR_ERR(dp->core_rst);
+	}
+
+	dp->apb_rst = devm_reset_control_get(dev, "apb");
+	if (IS_ERR(dp->apb_rst)) {
+		DRM_DEV_ERROR(dev, "no apb reset control found\n");
+		return PTR_ERR(dp->apb_rst);
+	}
+
+	return 0;
+}
+
+static int cdn_dp_audio_hw_params(struct device *dev,  void *data,
+				  struct hdmi_codec_daifmt *daifmt,
+				  struct hdmi_codec_params *params)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	struct audio_info audio = {
+		.sample_width = params->sample_width,
+		.sample_rate = params->sample_rate,
+		.channels = params->channels,
+	};
+	int ret;
+
+	mutex_lock(&dp->lock);
+	if (!dp->active) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	switch (daifmt->fmt) {
+	case HDMI_I2S:
+		audio.format = AFMT_I2S;
+		break;
+	case HDMI_SPDIF:
+		audio.format = AFMT_SPDIF;
+		break;
+	default:
+		DRM_DEV_ERROR(dev, "Invalid format %d\n", daifmt->fmt);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = cdn_dp_audio_config(dp, &audio);
+	if (!ret)
+		dp->audio_info = audio;
+
+out:
+	mutex_unlock(&dp->lock);
+	return ret;
+}
+
+static void cdn_dp_audio_shutdown(struct device *dev, void *data)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	int ret;
+
+	mutex_lock(&dp->lock);
+	if (!dp->active)
+		goto out;
+
+	ret = cdn_dp_audio_stop(dp, &dp->audio_info);
+	if (!ret)
+		dp->audio_info.format = AFMT_UNUSED;
+out:
+	mutex_unlock(&dp->lock);
+}
+
+static int cdn_dp_audio_digital_mute(struct device *dev, void *data,
+				     bool enable)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	int ret;
+
+	mutex_lock(&dp->lock);
+	if (!dp->active) {
+		ret = -ENODEV;
+		goto out;
+	}
+
+	ret = cdn_dp_audio_mute(dp, enable);
+
+out:
+	mutex_unlock(&dp->lock);
+	return ret;
+}
+
+static int cdn_dp_audio_get_eld(struct device *dev, void *data,
+				u8 *buf, size_t len)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+
+	memcpy(buf, dp->connector.eld, min(sizeof(dp->connector.eld), len));
+
+	return 0;
+}
+
+static const struct hdmi_codec_ops audio_codec_ops = {
+	.hw_params = cdn_dp_audio_hw_params,
+	.audio_shutdown = cdn_dp_audio_shutdown,
+	.digital_mute = cdn_dp_audio_digital_mute,
+	.get_eld = cdn_dp_audio_get_eld,
+};
+
+static int cdn_dp_audio_codec_init(struct cdn_dp_device *dp,
+				   struct device *dev)
+{
+	struct hdmi_codec_pdata codec_data = {
+		.i2s = 1,
+		.spdif = 1,
+		.ops = &audio_codec_ops,
+		.max_i2s_channels = 8,
+	};
+
+	dp->audio_pdev = platform_device_register_data(
+			 dev, HDMI_CODEC_DRV_NAME, PLATFORM_DEVID_AUTO,
+			 &codec_data, sizeof(codec_data));
+
+	return PTR_ERR_OR_ZERO(dp->audio_pdev);
+}
+
+static int cdn_dp_request_firmware(struct cdn_dp_device *dp)
+{
+	int ret;
+	unsigned long timeout = jiffies + msecs_to_jiffies(CDN_FW_TIMEOUT_MS);
+	unsigned long sleep = 1000;
+
+	WARN_ON(!mutex_is_locked(&dp->lock));
+
+	if (dp->fw_loaded)
+		return 0;
+
+	/* Drop the lock before getting the firmware to avoid blocking boot */
+	mutex_unlock(&dp->lock);
+
+	while (time_before(jiffies, timeout)) {
+		ret = request_firmware(&dp->fw, CDN_DP_FIRMWARE, dp->dev);
+		if (ret == -ENOENT) {
+			msleep(sleep);
+			sleep *= 2;
+			continue;
+		} else if (ret) {
+			DRM_DEV_ERROR(dp->dev,
+				      "failed to request firmware: %d\n", ret);
+			goto out;
+		}
+
+		dp->fw_loaded = true;
+		ret = 0;
+		goto out;
+	}
+
+	DRM_DEV_ERROR(dp->dev, "Timed out trying to load firmware\n");
+	ret = -ETIMEDOUT;
+out:
+	mutex_lock(&dp->lock);
+	return ret;
+}
+
+static void cdn_dp_pd_event_work(struct work_struct *work)
+{
+	struct cdn_dp_device *dp = container_of(work, struct cdn_dp_device,
+						event_work);
+	struct drm_connector *connector = &dp->connector;
+	enum drm_connector_status old_status;
+
+	int ret;
+
+	mutex_lock(&dp->lock);
+
+	if (dp->suspended)
+		goto out;
+
+	ret = cdn_dp_request_firmware(dp);
+	if (ret)
+		goto out;
+
+	dp->connected = true;
+
+	/* Not connected, notify userspace to disable the block */
+	if (!cdn_dp_connected_port(dp)) {
+		DRM_DEV_INFO(dp->dev, "Not connected. Disabling cdn\n");
+		dp->connected = false;
+
+	/* Connected but not enabled, enable the block */
+	} else if (!dp->active) {
+		DRM_DEV_INFO(dp->dev, "Connected, not enabled. Enabling cdn\n");
+		ret = cdn_dp_enable(dp);
+		if (ret) {
+			DRM_DEV_ERROR(dp->dev, "Enable dp failed %d\n", ret);
+			dp->connected = false;
+		}
+
+	/* Enabled and connected to a dongle without a sink, notify userspace */
+	} else if (!cdn_dp_check_sink_connection(dp)) {
+		DRM_DEV_INFO(dp->dev, "Connected without sink. Assert hpd\n");
+		dp->connected = false;
+
+	/* Enabled and connected with a sink, re-train if requested */
+	} else if (!cdn_dp_check_link_status(dp)) {
+		unsigned int rate = dp->link.rate;
+		unsigned int lanes = dp->link.num_lanes;
+		struct drm_display_mode *mode = &dp->mode;
+
+		DRM_DEV_INFO(dp->dev, "Connected with sink. Re-train link\n");
+		ret = cdn_dp_train_link(dp);
+		if (ret) {
+			dp->connected = false;
+			DRM_DEV_ERROR(dp->dev, "Train link failed %d\n", ret);
+			goto out;
+		}
+
+		/* If training result is changed, update the video config */
+		if (mode->clock &&
+		    (rate != dp->link.rate || lanes != dp->link.num_lanes)) {
+			ret = cdn_dp_config_video(dp);
+			if (ret) {
+				dp->connected = false;
+				DRM_DEV_ERROR(dp->dev,
+					      "Failed to config video %d\n",
+					      ret);
+			}
+		}
+	}
+
+out:
+	mutex_unlock(&dp->lock);
+
+	old_status = connector->status;
+	connector->status = connector->funcs->detect(connector, false);
+	if (old_status != connector->status)
+		drm_kms_helper_hotplug_event(dp->drm_dev);
+}
+
+static int cdn_dp_pd_event(struct notifier_block *nb,
+			   unsigned long event, void *priv)
+{
+	struct cdn_dp_port *port = container_of(nb, struct cdn_dp_port,
+						event_nb);
+	struct cdn_dp_device *dp = port->dp;
+
+	/*
+	 * It would be nice to be able to just do the work inline right here.
+	 * However, we need to make a bunch of calls that might sleep in order
+	 * to turn on the block/phy, so use a worker instead.
+	 */
+	schedule_work(&dp->event_work);
+
+	return NOTIFY_DONE;
+}
+
+static int cdn_dp_bind(struct device *dev, struct device *master, void *data)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct cdn_dp_port *port;
+	struct drm_device *drm_dev = data;
+	int ret, i;
+
+	ret = cdn_dp_parse_dt(dp);
+	if (ret < 0)
+		return ret;
+
+	dp->drm_dev = drm_dev;
+	dp->connected = false;
+	dp->active = false;
+	dp->active_port = -1;
+
+	INIT_WORK(&dp->event_work, cdn_dp_pd_event_work);
+
+	encoder = &dp->encoder;
+
+	encoder->possible_crtcs = drm_of_find_possible_crtcs(drm_dev,
+							     dev->of_node);
+	DRM_DEBUG_KMS("possible_crtcs = 0x%x\n", encoder->possible_crtcs);
+
+	ret = drm_encoder_init(drm_dev, encoder, &cdn_dp_encoder_funcs,
+			       DRM_MODE_ENCODER_TMDS, NULL);
+	if (ret) {
+		DRM_ERROR("failed to initialize encoder with drm\n");
+		return ret;
+	}
+
+	drm_encoder_helper_add(encoder, &cdn_dp_encoder_helper_funcs);
+
+	connector = &dp->connector;
+	connector->polled = DRM_CONNECTOR_POLL_HPD;
+	connector->dpms = DRM_MODE_DPMS_OFF;
+
+	ret = drm_connector_init(drm_dev, connector,
+				 &cdn_dp_atomic_connector_funcs,
+				 DRM_MODE_CONNECTOR_DisplayPort);
+	if (ret) {
+		DRM_ERROR("failed to initialize connector with drm\n");
+		goto err_free_encoder;
+	}
+
+	drm_connector_helper_add(connector, &cdn_dp_connector_helper_funcs);
+
+	ret = drm_mode_connector_attach_encoder(connector, encoder);
+	if (ret) {
+		DRM_ERROR("failed to attach connector and encoder\n");
+		goto err_free_connector;
+	}
+
+	cdn_dp_audio_codec_init(dp, dev);
+
+	for (i = 0; i < dp->ports; i++) {
+		port = dp->port[i];
+
+		port->event_nb.notifier_call = cdn_dp_pd_event;
+		ret = devm_extcon_register_notifier(dp->dev, port->extcon,
+						    EXTCON_DISP_DP,
+						    &port->event_nb);
+		if (ret) {
+			DRM_DEV_ERROR(dev,
+				      "register EXTCON_DISP_DP notifier err\n");
+			goto err_free_connector;
+		}
+	}
+
+	pm_runtime_enable(dev);
+
+	schedule_work(&dp->event_work);
+
+	return 0;
+
+err_free_connector:
+	drm_connector_cleanup(connector);
+err_free_encoder:
+	drm_encoder_cleanup(encoder);
+	return ret;
+}
+
+static void cdn_dp_unbind(struct device *dev, struct device *master, void *data)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	struct drm_encoder *encoder = &dp->encoder;
+	struct drm_connector *connector = &dp->connector;
+
+	cancel_work_sync(&dp->event_work);
+	platform_device_unregister(dp->audio_pdev);
+	cdn_dp_encoder_disable(encoder);
+	encoder->funcs->destroy(encoder);
+	connector->funcs->destroy(connector);
+
+	pm_runtime_disable(dev);
+	release_firmware(dp->fw);
+	kfree(dp->edid);
+	dp->edid = NULL;
+}
+
+static const struct component_ops cdn_dp_component_ops = {
+	.bind = cdn_dp_bind,
+	.unbind = cdn_dp_unbind,
+};
+
+int cdn_dp_suspend(struct device *dev)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+	int ret = 0;
+
+	mutex_lock(&dp->lock);
+	if (dp->active)
+		ret = cdn_dp_disable(dp);
+	dp->suspended = true;
+	mutex_unlock(&dp->lock);
+
+	return ret;
+}
+
+int cdn_dp_resume(struct device *dev)
+{
+	struct cdn_dp_device *dp = dev_get_drvdata(dev);
+
+	mutex_lock(&dp->lock);
+	dp->suspended = false;
+	if (dp->fw_loaded)
+		schedule_work(&dp->event_work);
+	mutex_unlock(&dp->lock);
+
+	return 0;
+}
+
+static int cdn_dp_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	const struct of_device_id *match;
+	struct cdn_dp_data *dp_data;
+	struct cdn_dp_port *port;
+	struct cdn_dp_device *dp;
+	struct extcon_dev *extcon;
+	struct phy *phy;
+	int i;
+
+	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
+	if (!dp)
+		return -ENOMEM;
+	dp->dev = dev;
+
+	match = of_match_node(cdn_dp_dt_ids, pdev->dev.of_node);
+	dp_data = (struct cdn_dp_data *)match->data;
+
+	for (i = 0; i < dp_data->max_phy; i++) {
+		extcon = extcon_get_edev_by_phandle(dev, i);
+		phy = devm_of_phy_get_by_index(dev, dev->of_node, i);
+
+		if (PTR_ERR(extcon) == -EPROBE_DEFER ||
+		    PTR_ERR(phy) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+
+		if (IS_ERR(extcon) || IS_ERR(phy))
+			continue;
+
+		port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+		if (!dp)
+			return -ENOMEM;
+
+		port->extcon = extcon;
+		port->phy = phy;
+		port->dp = dp;
+		port->id = i;
+		dp->port[dp->ports++] = port;
+	}
+
+	if (!dp->ports) {
+		DRM_DEV_ERROR(dev, "missing extcon or phy\n");
+		return -EINVAL;
+	}
+
+	mutex_init(&dp->lock);
+	dev_set_drvdata(dev, dp);
+
+	return component_add(dev, &cdn_dp_component_ops);
+}
+
+static int cdn_dp_remove(struct platform_device *pdev)
+{
+	struct cdn_dp_device *dp = platform_get_drvdata(pdev);
+
+	cdn_dp_suspend(dp->dev);
+	component_del(&pdev->dev, &cdn_dp_component_ops);
+
+	return 0;
+}
+
+static void cdn_dp_shutdown(struct platform_device *pdev)
+{
+	struct cdn_dp_device *dp = platform_get_drvdata(pdev);
+
+	cdn_dp_suspend(dp->dev);
+}
+
+static const struct dev_pm_ops cdn_dp_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(cdn_dp_suspend,
+				cdn_dp_resume)
+};
+
+static struct platform_driver cdn_dp_driver = {
+	.probe = cdn_dp_probe,
+	.remove = cdn_dp_remove,
+	.shutdown = cdn_dp_shutdown,
+	.driver = {
+		   .name = "cdn-dp",
+		   .owner = THIS_MODULE,
+		   .of_match_table = of_match_ptr(cdn_dp_dt_ids),
+		   .pm = &cdn_dp_pm_ops,
+	},
+};
+
+module_platform_driver(cdn_dp_driver);
+
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_DESCRIPTION("cdn DP Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.h b/drivers/gpu/drm/rockchip/cdn-dp-core.h
new file mode 100644
index 000000000000..f57e296401b8
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.h
@@ -0,0 +1,112 @@
+/*
+ * Copyright (C) 2016 Chris Zhong <zyw@rock-chips.com>
+ * Copyright (C) 2016 ROCKCHIP, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CDN_DP_CORE_H
+#define _CDN_DP_CORE_H
+
+#include <drm/drmP.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_dp_helper.h>
+#include <drm/drm_panel.h>
+#include "rockchip_drm_drv.h"
+
+#define MAX_PHY		2
+
+enum audio_format {
+	AFMT_I2S = 0,
+	AFMT_SPDIF = 1,
+	AFMT_UNUSED,
+};
+
+struct audio_info {
+	enum audio_format format;
+	int sample_rate;
+	int channels;
+	int sample_width;
+};
+
+enum vic_pxl_encoding_format {
+	PXL_RGB = 0x1,
+	YCBCR_4_4_4 = 0x2,
+	YCBCR_4_2_2 = 0x4,
+	YCBCR_4_2_0 = 0x8,
+	Y_ONLY = 0x10,
+};
+
+struct video_info {
+	bool h_sync_polarity;
+	bool v_sync_polarity;
+	bool interlaced;
+	int color_depth;
+	enum vic_pxl_encoding_format color_fmt;
+};
+
+struct cdn_firmware_header {
+	u32 size_bytes; /* size of the entire header+image(s) in bytes */
+	u32 header_size; /* size of just the header in bytes */
+	u32 iram_size; /* size of iram */
+	u32 dram_size; /* size of dram */
+};
+
+struct cdn_dp_port {
+	struct cdn_dp_device *dp;
+	struct notifier_block event_nb;
+	struct extcon_dev *extcon;
+	struct phy *phy;
+	u8 lanes;
+	bool phy_enabled;
+	u8 id;
+};
+
+struct cdn_dp_device {
+	struct device *dev;
+	struct drm_device *drm_dev;
+	struct drm_connector connector;
+	struct drm_encoder encoder;
+	struct drm_display_mode mode;
+	struct platform_device *audio_pdev;
+	struct work_struct event_work;
+	struct edid *edid;
+
+	struct mutex lock;
+	bool connected;
+	bool active;
+	bool suspended;
+
+	const struct firmware *fw;	/* cdn dp firmware */
+	unsigned int fw_version;	/* cdn fw version */
+	bool fw_loaded;
+
+	void __iomem *regs;
+	struct regmap *grf;
+	struct clk *core_clk;
+	struct clk *pclk;
+	struct clk *spdif_clk;
+	struct clk *grf_clk;
+	struct reset_control *spdif_rst;
+	struct reset_control *dptx_rst;
+	struct reset_control *apb_rst;
+	struct reset_control *core_rst;
+	struct audio_info audio_info;
+	struct video_info video_info;
+	struct drm_dp_link link;
+	struct cdn_dp_port *port[MAX_PHY];
+	u8 ports;
+	u8 lanes;
+	int active_port;
+
+	u8 dpcd[DP_RECEIVER_CAP_SIZE];
+	bool sink_has_audio;
+};
+#endif  /* _CDN_DP_CORE_H */
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.c b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
new file mode 100644
index 000000000000..319dbbaa3609
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.c
@@ -0,0 +1,979 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/reset.h>
+
+#include "cdn-dp-core.h"
+#include "cdn-dp-reg.h"
+
+#define CDN_DP_SPDIF_CLK		200000000
+#define FW_ALIVE_TIMEOUT_US		1000000
+#define MAILBOX_RETRY_US		1000
+#define MAILBOX_TIMEOUT_US		5000000
+#define LINK_TRAINING_RETRY_MS		20
+#define LINK_TRAINING_TIMEOUT_MS	500
+
+void cdn_dp_set_fw_clk(struct cdn_dp_device *dp, u32 clk)
+{
+	writel(clk / 1000000, dp->regs + SW_CLK_H);
+}
+
+void cdn_dp_clock_reset(struct cdn_dp_device *dp)
+{
+	u32 val;
+
+	val = DPTX_FRMR_DATA_CLK_RSTN_EN |
+	      DPTX_FRMR_DATA_CLK_EN |
+	      DPTX_PHY_DATA_RSTN_EN |
+	      DPTX_PHY_DATA_CLK_EN |
+	      DPTX_PHY_CHAR_RSTN_EN |
+	      DPTX_PHY_CHAR_CLK_EN |
+	      SOURCE_AUX_SYS_CLK_RSTN_EN |
+	      SOURCE_AUX_SYS_CLK_EN |
+	      DPTX_SYS_CLK_RSTN_EN |
+	      DPTX_SYS_CLK_EN |
+	      CFG_DPTX_VIF_CLK_RSTN_EN |
+	      CFG_DPTX_VIF_CLK_EN;
+	writel(val, dp->regs + SOURCE_DPTX_CAR);
+
+	val = SOURCE_PHY_RSTN_EN | SOURCE_PHY_CLK_EN;
+	writel(val, dp->regs + SOURCE_PHY_CAR);
+
+	val = SOURCE_PKT_SYS_RSTN_EN |
+	      SOURCE_PKT_SYS_CLK_EN |
+	      SOURCE_PKT_DATA_RSTN_EN |
+	      SOURCE_PKT_DATA_CLK_EN;
+	writel(val, dp->regs + SOURCE_PKT_CAR);
+
+	val = SPDIF_CDR_CLK_RSTN_EN |
+	      SPDIF_CDR_CLK_EN |
+	      SOURCE_AIF_SYS_RSTN_EN |
+	      SOURCE_AIF_SYS_CLK_EN |
+	      SOURCE_AIF_CLK_RSTN_EN |
+	      SOURCE_AIF_CLK_EN;
+	writel(val, dp->regs + SOURCE_AIF_CAR);
+
+	val = SOURCE_CIPHER_SYSTEM_CLK_RSTN_EN |
+	      SOURCE_CIPHER_SYS_CLK_EN |
+	      SOURCE_CIPHER_CHAR_CLK_RSTN_EN |
+	      SOURCE_CIPHER_CHAR_CLK_EN;
+	writel(val, dp->regs + SOURCE_CIPHER_CAR);
+
+	val = SOURCE_CRYPTO_SYS_CLK_RSTN_EN |
+	      SOURCE_CRYPTO_SYS_CLK_EN;
+	writel(val, dp->regs + SOURCE_CRYPTO_CAR);
+
+	/* enable Mailbox and PIF interrupt */
+	writel(0, dp->regs + APB_INT_MASK);
+}
+
+static int cdn_dp_mailbox_read(struct cdn_dp_device *dp)
+{
+	int val, ret;
+
+	ret = readx_poll_timeout(readl, dp->regs + MAILBOX_EMPTY_ADDR,
+				 val, !val, MAILBOX_RETRY_US,
+				 MAILBOX_TIMEOUT_US);
+	if (ret < 0)
+		return ret;
+
+	return readl(dp->regs + MAILBOX0_RD_DATA) & 0xff;
+}
+
+static int cdp_dp_mailbox_write(struct cdn_dp_device *dp, u8 val)
+{
+	int ret, full;
+
+	ret = readx_poll_timeout(readl, dp->regs + MAILBOX_FULL_ADDR,
+				 full, !full, MAILBOX_RETRY_US,
+				 MAILBOX_TIMEOUT_US);
+	if (ret < 0)
+		return ret;
+
+	writel(val, dp->regs + MAILBOX0_WR_DATA);
+
+	return 0;
+}
+
+static int cdn_dp_mailbox_validate_receive(struct cdn_dp_device *dp,
+					   u8 module_id, u8 opcode,
+					   u8 req_size)
+{
+	u32 mbox_size, i;
+	u8 header[4];
+	int ret;
+
+	/* read the header of the message */
+	for (i = 0; i < 4; i++) {
+		ret = cdn_dp_mailbox_read(dp);
+		if (ret < 0)
+			return ret;
+
+		header[i] = ret;
+	}
+
+	mbox_size = (header[2] << 8) | header[3];
+
+	if (opcode != header[0] || module_id != header[1] ||
+	    req_size != mbox_size) {
+		/*
+		 * If the message in mailbox is not what we want, we need to
+		 * clear the mailbox by reading its contents.
+		 */
+		for (i = 0; i < mbox_size; i++)
+			if (cdn_dp_mailbox_read(dp) < 0)
+				break;
+
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int cdn_dp_mailbox_read_receive(struct cdn_dp_device *dp,
+				       u8 *buff, u8 buff_size)
+{
+	u32 i;
+	int ret;
+
+	for (i = 0; i < buff_size; i++) {
+		ret = cdn_dp_mailbox_read(dp);
+		if (ret < 0)
+			return ret;
+
+		buff[i] = ret;
+	}
+
+	return 0;
+}
+
+static int cdn_dp_mailbox_send(struct cdn_dp_device *dp, u8 module_id,
+			       u8 opcode, u16 size, u8 *message)
+{
+	u8 header[4];
+	int ret, i;
+
+	header[0] = opcode;
+	header[1] = module_id;
+	header[2] = (size >> 8) & 0xff;
+	header[3] = size & 0xff;
+
+	for (i = 0; i < 4; i++) {
+		ret = cdp_dp_mailbox_write(dp, header[i]);
+		if (ret)
+			return ret;
+	}
+
+	for (i = 0; i < size; i++) {
+		ret = cdp_dp_mailbox_write(dp, message[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static int cdn_dp_reg_write(struct cdn_dp_device *dp, u16 addr, u32 val)
+{
+	u8 msg[6];
+
+	msg[0] = (addr >> 8) & 0xff;
+	msg[1] = addr & 0xff;
+	msg[2] = (val >> 24) & 0xff;
+	msg[3] = (val >> 16) & 0xff;
+	msg[4] = (val >> 8) & 0xff;
+	msg[5] = val & 0xff;
+	return cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_WRITE_REGISTER,
+				   sizeof(msg), msg);
+}
+
+static int cdn_dp_reg_write_bit(struct cdn_dp_device *dp, u16 addr,
+				u8 start_bit, u8 bits_no, u32 val)
+{
+	u8 field[8];
+
+	field[0] = (addr >> 8) & 0xff;
+	field[1] = addr & 0xff;
+	field[2] = start_bit;
+	field[3] = bits_no;
+	field[4] = (val >> 24) & 0xff;
+	field[5] = (val >> 16) & 0xff;
+	field[6] = (val >> 8) & 0xff;
+	field[7] = val & 0xff;
+
+	return cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_WRITE_FIELD,
+				   sizeof(field), field);
+}
+
+int cdn_dp_dpcd_read(struct cdn_dp_device *dp, u32 addr, u8 *data, u16 len)
+{
+	u8 msg[5], reg[5];
+	int ret;
+
+	msg[0] = (len >> 8) & 0xff;
+	msg[1] = len & 0xff;
+	msg[2] = (addr >> 16) & 0xff;
+	msg[3] = (addr >> 8) & 0xff;
+	msg[4] = addr & 0xff;
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_READ_DPCD,
+				  sizeof(msg), msg);
+	if (ret)
+		goto err_dpcd_read;
+
+	ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+					      DPTX_READ_DPCD,
+					      sizeof(reg) + len);
+	if (ret)
+		goto err_dpcd_read;
+
+	ret = cdn_dp_mailbox_read_receive(dp, reg, sizeof(reg));
+	if (ret)
+		goto err_dpcd_read;
+
+	ret = cdn_dp_mailbox_read_receive(dp, data, len);
+
+err_dpcd_read:
+	return ret;
+}
+
+int cdn_dp_dpcd_write(struct cdn_dp_device *dp, u32 addr, u8 value)
+{
+	u8 msg[6], reg[5];
+	int ret;
+
+	msg[0] = 0;
+	msg[1] = 1;
+	msg[2] = (addr >> 16) & 0xff;
+	msg[3] = (addr >> 8) & 0xff;
+	msg[4] = addr & 0xff;
+	msg[5] = value;
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_WRITE_DPCD,
+				  sizeof(msg), msg);
+	if (ret)
+		goto err_dpcd_write;
+
+	ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+					      DPTX_WRITE_DPCD, sizeof(reg));
+	if (ret)
+		goto err_dpcd_write;
+
+	ret = cdn_dp_mailbox_read_receive(dp, reg, sizeof(reg));
+	if (ret)
+		goto err_dpcd_write;
+
+	if (addr != (reg[2] << 16 | reg[3] << 8 | reg[4]))
+		ret = -EINVAL;
+
+err_dpcd_write:
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "dpcd write failed: %d\n", ret);
+	return ret;
+}
+
+int cdn_dp_load_firmware(struct cdn_dp_device *dp, const u32 *i_mem,
+			 u32 i_size, const u32 *d_mem, u32 d_size)
+{
+	u32 reg;
+	int i, ret;
+
+	/* reset ucpu before load firmware*/
+	writel(APB_IRAM_PATH | APB_DRAM_PATH | APB_XT_RESET,
+	       dp->regs + APB_CTRL);
+
+	for (i = 0; i < i_size; i += 4)
+		writel(*i_mem++, dp->regs + ADDR_IMEM + i);
+
+	for (i = 0; i < d_size; i += 4)
+		writel(*d_mem++, dp->regs + ADDR_DMEM + i);
+
+	/* un-reset ucpu */
+	writel(0, dp->regs + APB_CTRL);
+
+	/* check the keep alive register to make sure fw working */
+	ret = readx_poll_timeout(readl, dp->regs + KEEP_ALIVE,
+				 reg, reg, 2000, FW_ALIVE_TIMEOUT_US);
+	if (ret < 0) {
+		DRM_DEV_ERROR(dp->dev, "failed to loaded the FW reg = %x\n",
+			      reg);
+		return -EINVAL;
+	}
+
+	reg = readl(dp->regs + VER_L) & 0xff;
+	dp->fw_version = reg;
+	reg = readl(dp->regs + VER_H) & 0xff;
+	dp->fw_version |= reg << 8;
+	reg = readl(dp->regs + VER_LIB_L_ADDR) & 0xff;
+	dp->fw_version |= reg << 16;
+	reg = readl(dp->regs + VER_LIB_H_ADDR) & 0xff;
+	dp->fw_version |= reg << 24;
+
+	dev_dbg(dp->dev, "firmware version: %x\n", dp->fw_version);
+
+	return 0;
+}
+
+int cdn_dp_set_firmware_active(struct cdn_dp_device *dp, bool enable)
+{
+	u8 msg[5];
+	int ret, i;
+
+	msg[0] = GENERAL_MAIN_CONTROL;
+	msg[1] = MB_MODULE_ID_GENERAL;
+	msg[2] = 0;
+	msg[3] = 1;
+	msg[4] = enable ? FW_ACTIVE : FW_STANDBY;
+
+	for (i = 0; i < sizeof(msg); i++) {
+		ret = cdp_dp_mailbox_write(dp, msg[i]);
+		if (ret)
+			goto err_set_firmware_active;
+	}
+
+	/* read the firmware state */
+	for (i = 0; i < sizeof(msg); i++)  {
+		ret = cdn_dp_mailbox_read(dp);
+		if (ret < 0)
+			goto err_set_firmware_active;
+
+		msg[i] = ret;
+	}
+
+	ret = 0;
+
+err_set_firmware_active:
+	if (ret < 0)
+		DRM_DEV_ERROR(dp->dev, "set firmware active failed\n");
+	return ret;
+}
+
+int cdn_dp_set_host_cap(struct cdn_dp_device *dp, u8 lanes, bool flip)
+{
+	u8 msg[8];
+	int ret;
+
+	msg[0] = CDN_DP_MAX_LINK_RATE;
+	msg[1] = lanes | SCRAMBLER_EN;
+	msg[2] = VOLTAGE_LEVEL_2;
+	msg[3] = PRE_EMPHASIS_LEVEL_3;
+	msg[4] = PTS1 | PTS2 | PTS3 | PTS4;
+	msg[5] = FAST_LT_NOT_SUPPORT;
+	msg[6] = flip ? LANE_MAPPING_FLIPPED : LANE_MAPPING_NORMAL;
+	msg[7] = ENHANCED;
+
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX,
+				  DPTX_SET_HOST_CAPABILITIES,
+				  sizeof(msg), msg);
+	if (ret)
+		goto err_set_host_cap;
+
+	ret = cdn_dp_reg_write(dp, DP_AUX_SWAP_INVERSION_CONTROL,
+			       AUX_HOST_INVERT);
+
+err_set_host_cap:
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "set host cap failed: %d\n", ret);
+	return ret;
+}
+
+int cdn_dp_event_config(struct cdn_dp_device *dp)
+{
+	u8 msg[5];
+	int ret;
+
+	memset(msg, 0, sizeof(msg));
+
+	msg[0] = DPTX_EVENT_ENABLE_HPD | DPTX_EVENT_ENABLE_TRAINING;
+
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_ENABLE_EVENT,
+				  sizeof(msg), msg);
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "set event config failed: %d\n", ret);
+
+	return ret;
+}
+
+u32 cdn_dp_get_event(struct cdn_dp_device *dp)
+{
+	return readl(dp->regs + SW_EVENTS0);
+}
+
+int cdn_dp_get_hpd_status(struct cdn_dp_device *dp)
+{
+	u8 status;
+	int ret;
+
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_HPD_STATE,
+				  0, NULL);
+	if (ret)
+		goto err_get_hpd;
+
+	ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+					      DPTX_HPD_STATE, sizeof(status));
+	if (ret)
+		goto err_get_hpd;
+
+	ret = cdn_dp_mailbox_read_receive(dp, &status, sizeof(status));
+	if (ret)
+		goto err_get_hpd;
+
+	return status;
+
+err_get_hpd:
+	DRM_DEV_ERROR(dp->dev, "get hpd status failed: %d\n", ret);
+	return ret;
+}
+
+int cdn_dp_get_edid_block(void *data, u8 *edid,
+			  unsigned int block, size_t length)
+{
+	struct cdn_dp_device *dp = data;
+	u8 msg[2], reg[2], i;
+	int ret;
+
+	for (i = 0; i < 4; i++) {
+		msg[0] = block / 2;
+		msg[1] = block % 2;
+
+		ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_GET_EDID,
+					  sizeof(msg), msg);
+		if (ret)
+			continue;
+
+		ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+						      DPTX_GET_EDID,
+						      sizeof(reg) + length);
+		if (ret)
+			continue;
+
+		ret = cdn_dp_mailbox_read_receive(dp, reg, sizeof(reg));
+		if (ret)
+			continue;
+
+		ret = cdn_dp_mailbox_read_receive(dp, edid, length);
+		if (ret)
+			continue;
+
+		if (reg[0] == length && reg[1] == block / 2)
+			break;
+	}
+
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "get block[%d] edid failed: %d\n", block,
+			      ret);
+
+	return ret;
+}
+
+static int cdn_dp_training_start(struct cdn_dp_device *dp)
+{
+	unsigned long timeout;
+	u8 msg, event[2];
+	int ret;
+
+	msg = LINK_TRAINING_RUN;
+
+	/* start training */
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_TRAINING_CONTROL,
+				  sizeof(msg), &msg);
+	if (ret)
+		goto err_training_start;
+
+	timeout = jiffies + msecs_to_jiffies(LINK_TRAINING_TIMEOUT_MS);
+	while (time_before(jiffies, timeout)) {
+		msleep(LINK_TRAINING_RETRY_MS);
+		ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX,
+					  DPTX_READ_EVENT, 0, NULL);
+		if (ret)
+			goto err_training_start;
+
+		ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+						      DPTX_READ_EVENT,
+						      sizeof(event));
+		if (ret)
+			goto err_training_start;
+
+		ret = cdn_dp_mailbox_read_receive(dp, event, sizeof(event));
+		if (ret)
+			goto err_training_start;
+
+		if (event[1] & EQ_PHASE_FINISHED)
+			return 0;
+	}
+
+	ret = -ETIMEDOUT;
+
+err_training_start:
+	DRM_DEV_ERROR(dp->dev, "training failed: %d\n", ret);
+	return ret;
+}
+
+static int cdn_dp_get_training_status(struct cdn_dp_device *dp)
+{
+	u8 status[10];
+	int ret;
+
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_READ_LINK_STAT,
+				  0, NULL);
+	if (ret)
+		goto err_get_training_status;
+
+	ret = cdn_dp_mailbox_validate_receive(dp, MB_MODULE_ID_DP_TX,
+					      DPTX_READ_LINK_STAT,
+					      sizeof(status));
+	if (ret)
+		goto err_get_training_status;
+
+	ret = cdn_dp_mailbox_read_receive(dp, status, sizeof(status));
+	if (ret)
+		goto err_get_training_status;
+
+	dp->link.rate = status[0];
+	dp->link.num_lanes = status[1];
+
+err_get_training_status:
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "get training status failed: %d\n", ret);
+	return ret;
+}
+
+int cdn_dp_train_link(struct cdn_dp_device *dp)
+{
+	int ret;
+
+	ret = cdn_dp_training_start(dp);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to start training %d\n", ret);
+		return ret;
+	}
+
+	ret = cdn_dp_get_training_status(dp);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "Failed to get training stat %d\n", ret);
+		return ret;
+	}
+
+	DRM_DEV_DEBUG_KMS(dp->dev, "rate:0x%x, lanes:%d\n", dp->link.rate,
+			  dp->link.num_lanes);
+	return ret;
+}
+
+int cdn_dp_set_video_status(struct cdn_dp_device *dp, int active)
+{
+	u8 msg;
+	int ret;
+
+	msg = !!active;
+
+	ret = cdn_dp_mailbox_send(dp, MB_MODULE_ID_DP_TX, DPTX_SET_VIDEO,
+				  sizeof(msg), &msg);
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "set video status failed: %d\n", ret);
+
+	return ret;
+}
+
+static int cdn_dp_get_msa_misc(struct video_info *video,
+			       struct drm_display_mode *mode)
+{
+	u32 msa_misc;
+	u8 val[2] = {0};
+
+	switch (video->color_fmt) {
+	case PXL_RGB:
+	case Y_ONLY:
+		val[0] = 0;
+		break;
+	/* set YUV default color space conversion to BT601 */
+	case YCBCR_4_4_4:
+		val[0] = 6 + BT_601 * 8;
+		break;
+	case YCBCR_4_2_2:
+		val[0] = 5 + BT_601 * 8;
+		break;
+	case YCBCR_4_2_0:
+		val[0] = 5;
+		break;
+	};
+
+	switch (video->color_depth) {
+	case 6:
+		val[1] = 0;
+		break;
+	case 8:
+		val[1] = 1;
+		break;
+	case 10:
+		val[1] = 2;
+		break;
+	case 12:
+		val[1] = 3;
+		break;
+	case 16:
+		val[1] = 4;
+		break;
+	};
+
+	msa_misc = 2 * val[0] + 32 * val[1] +
+		   ((video->color_fmt == Y_ONLY) ? (1 << 14) : 0);
+
+	return msa_misc;
+}
+
+int cdn_dp_config_video(struct cdn_dp_device *dp)
+{
+	struct video_info *video = &dp->video_info;
+	struct drm_display_mode *mode = &dp->mode;
+	u64 symbol;
+	u32 val, link_rate, rem;
+	u8 bit_per_pix, tu_size_reg = TU_SIZE;
+	int ret;
+
+	bit_per_pix = (video->color_fmt == YCBCR_4_2_2) ?
+		      (video->color_depth * 2) : (video->color_depth * 3);
+
+	link_rate = drm_dp_bw_code_to_link_rate(dp->link.rate) / 1000;
+
+	ret = cdn_dp_reg_write(dp, BND_HSYNC2VSYNC, VIF_BYPASS_INTERLACE);
+	if (ret)
+		goto err_config_video;
+
+	ret = cdn_dp_reg_write(dp, HSYNC2VSYNC_POL_CTRL, 0);
+	if (ret)
+		goto err_config_video;
+
+	/*
+	 * get a best tu_size and valid symbol:
+	 * 1. chose Lclk freq(162Mhz, 270Mhz, 540Mhz), set TU to 32
+	 * 2. calculate VS(valid symbol) = TU * Pclk * Bpp / (Lclk * Lanes)
+	 * 3. if VS > *.85 or VS < *.1 or VS < 2 or TU < VS + 4, then set
+	 *    TU += 2 and repeat 2nd step.
+	 */
+	do {
+		tu_size_reg += 2;
+		symbol = tu_size_reg * mode->clock * bit_per_pix;
+		do_div(symbol, dp->link.num_lanes * link_rate * 8);
+		rem = do_div(symbol, 1000);
+		if (tu_size_reg > 64) {
+			ret = -EINVAL;
+			goto err_config_video;
+		}
+	} while ((symbol <= 1) || (tu_size_reg - symbol < 4) ||
+		 (rem > 850) || (rem < 100));
+
+	val = symbol + (tu_size_reg << 8);
+	val |= TU_CNT_RST_EN;
+	ret = cdn_dp_reg_write(dp, DP_FRAMER_TU, val);
+	if (ret)
+		goto err_config_video;
+
+	/* set the FIFO Buffer size */
+	val = div_u64(mode->clock * (symbol + 1), 1000) + link_rate;
+	val /= (dp->link.num_lanes * link_rate);
+	val = div_u64(8 * (symbol + 1), bit_per_pix) - val;
+	val += 2;
+	ret = cdn_dp_reg_write(dp, DP_VC_TABLE(15), val);
+
+	switch (video->color_depth) {
+	case 6:
+		val = BCS_6;
+		break;
+	case 8:
+		val = BCS_8;
+		break;
+	case 10:
+		val = BCS_10;
+		break;
+	case 12:
+		val = BCS_12;
+		break;
+	case 16:
+		val = BCS_16;
+		break;
+	};
+
+	val += video->color_fmt << 8;
+	ret = cdn_dp_reg_write(dp, DP_FRAMER_PXL_REPR, val);
+	if (ret)
+		goto err_config_video;
+
+	val = video->h_sync_polarity ? DP_FRAMER_SP_HSP : 0;
+	val |= video->v_sync_polarity ? DP_FRAMER_SP_VSP : 0;
+	ret = cdn_dp_reg_write(dp, DP_FRAMER_SP, val);
+	if (ret)
+		goto err_config_video;
+
+	val = (mode->hsync_start - mode->hdisplay) << 16;
+	val |= mode->htotal - mode->hsync_end;
+	ret = cdn_dp_reg_write(dp, DP_FRONT_BACK_PORCH, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->hdisplay * bit_per_pix / 8;
+	ret = cdn_dp_reg_write(dp, DP_BYTE_COUNT, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->htotal | ((mode->htotal - mode->hsync_start) << 16);
+	ret = cdn_dp_reg_write(dp, MSA_HORIZONTAL_0, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->hsync_end - mode->hsync_start;
+	val |= (mode->hdisplay << 16) | (video->h_sync_polarity << 15);
+	ret = cdn_dp_reg_write(dp, MSA_HORIZONTAL_1, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->vtotal;
+	val |= (mode->vtotal - mode->vsync_start) << 16;
+	ret = cdn_dp_reg_write(dp, MSA_VERTICAL_0, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->vsync_end - mode->vsync_start;
+	val |= (mode->vdisplay << 16) | (video->v_sync_polarity << 15);
+	ret = cdn_dp_reg_write(dp, MSA_VERTICAL_1, val);
+	if (ret)
+		goto err_config_video;
+
+	val = cdn_dp_get_msa_misc(video, mode);
+	ret = cdn_dp_reg_write(dp, MSA_MISC, val);
+	if (ret)
+		goto err_config_video;
+
+	ret = cdn_dp_reg_write(dp, STREAM_CONFIG, 1);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->hsync_end - mode->hsync_start;
+	val |= mode->hdisplay << 16;
+	ret = cdn_dp_reg_write(dp, DP_HORIZONTAL, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->vdisplay;
+	val |= (mode->vtotal - mode->vsync_start) << 16;
+	ret = cdn_dp_reg_write(dp, DP_VERTICAL_0, val);
+	if (ret)
+		goto err_config_video;
+
+	val = mode->vtotal;
+	ret = cdn_dp_reg_write(dp, DP_VERTICAL_1, val);
+	if (ret)
+		goto err_config_video;
+
+	ret = cdn_dp_reg_write_bit(dp, DP_VB_ID, 2, 1, 0);
+
+err_config_video:
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "config video failed: %d\n", ret);
+	return ret;
+}
+
+int cdn_dp_audio_stop(struct cdn_dp_device *dp, struct audio_info *audio)
+{
+	u32 val;
+	int ret;
+
+	ret = cdn_dp_reg_write(dp, AUDIO_PACK_CONTROL, 0);
+	if (ret) {
+		DRM_DEV_ERROR(dp->dev, "audio stop failed: %d\n", ret);
+		return ret;
+	}
+
+	val = SPDIF_AVG_SEL | SPDIF_JITTER_BYPASS;
+	val |= SPDIF_FIFO_MID_RANGE(0xe0);
+	val |= SPDIF_JITTER_THRSH(0xe0);
+	val |= SPDIF_JITTER_AVG_WIN(7);
+	writel(val, dp->regs + SPDIF_CTRL_ADDR);
+
+	/* clearn the audio config and reset */
+	writel(0, dp->regs + AUDIO_SRC_CNTL);
+	writel(0, dp->regs + AUDIO_SRC_CNFG);
+	writel(AUDIO_SW_RST, dp->regs + AUDIO_SRC_CNTL);
+	writel(0, dp->regs + AUDIO_SRC_CNTL);
+
+	/* reset smpl2pckt component  */
+	writel(0, dp->regs + SMPL2PKT_CNTL);
+	writel(AUDIO_SW_RST, dp->regs + SMPL2PKT_CNTL);
+	writel(0, dp->regs + SMPL2PKT_CNTL);
+
+	/* reset FIFO */
+	writel(AUDIO_SW_RST, dp->regs + FIFO_CNTL);
+	writel(0, dp->regs + FIFO_CNTL);
+
+	if (audio->format == AFMT_SPDIF)
+		clk_disable_unprepare(dp->spdif_clk);
+
+	return 0;
+}
+
+int cdn_dp_audio_mute(struct cdn_dp_device *dp, bool enable)
+{
+	int ret;
+
+	ret = cdn_dp_reg_write_bit(dp, DP_VB_ID, 4, 1, enable);
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "audio mute failed: %d\n", ret);
+
+	return ret;
+}
+
+static void cdn_dp_audio_config_i2s(struct cdn_dp_device *dp,
+				    struct audio_info *audio)
+{
+	int sub_pckt_num = 1, i2s_port_en_val = 0xf, i;
+	u32 val;
+
+	if (audio->channels == 2) {
+		if (dp->link.num_lanes == 1)
+			sub_pckt_num = 2;
+		else
+			sub_pckt_num = 4;
+
+		i2s_port_en_val = 1;
+	} else if (audio->channels == 4) {
+		i2s_port_en_val = 3;
+	}
+
+	writel(0x0, dp->regs + SPDIF_CTRL_ADDR);
+
+	writel(SYNC_WR_TO_CH_ZERO, dp->regs + FIFO_CNTL);
+
+	val = MAX_NUM_CH(audio->channels);
+	val |= NUM_OF_I2S_PORTS(audio->channels);
+	val |= AUDIO_TYPE_LPCM;
+	val |= CFG_SUB_PCKT_NUM(sub_pckt_num);
+	writel(val, dp->regs + SMPL2PKT_CNFG);
+
+	if (audio->sample_width == 16)
+		val = 0;
+	else if (audio->sample_width == 24)
+		val = 1 << 9;
+	else
+		val = 2 << 9;
+
+	val |= AUDIO_CH_NUM(audio->channels);
+	val |= I2S_DEC_PORT_EN(i2s_port_en_val);
+	val |= TRANS_SMPL_WIDTH_32;
+	writel(val, dp->regs + AUDIO_SRC_CNFG);
+
+	for (i = 0; i < (audio->channels + 1) / 2; i++) {
+		if (audio->sample_width == 16)
+			val = (0x02 << 8) | (0x02 << 20);
+		else if (audio->sample_width == 24)
+			val = (0x0b << 8) | (0x0b << 20);
+
+		val |= ((2 * i) << 4) | ((2 * i + 1) << 16);
+		writel(val, dp->regs + STTS_BIT_CH(i));
+	}
+
+	switch (audio->sample_rate) {
+	case 32000:
+		val = SAMPLING_FREQ(3) |
+		      ORIGINAL_SAMP_FREQ(0xc);
+		break;
+	case 44100:
+		val = SAMPLING_FREQ(0) |
+		      ORIGINAL_SAMP_FREQ(0xf);
+		break;
+	case 48000:
+		val = SAMPLING_FREQ(2) |
+		      ORIGINAL_SAMP_FREQ(0xd);
+		break;
+	case 88200:
+		val = SAMPLING_FREQ(8) |
+		      ORIGINAL_SAMP_FREQ(0x7);
+		break;
+	case 96000:
+		val = SAMPLING_FREQ(0xa) |
+		      ORIGINAL_SAMP_FREQ(5);
+		break;
+	case 176400:
+		val = SAMPLING_FREQ(0xc) |
+		      ORIGINAL_SAMP_FREQ(3);
+		break;
+	case 192000:
+		val = SAMPLING_FREQ(0xe) |
+		      ORIGINAL_SAMP_FREQ(1);
+		break;
+	}
+	val |= 4;
+	writel(val, dp->regs + COM_CH_STTS_BITS);
+
+	writel(SMPL2PKT_EN, dp->regs + SMPL2PKT_CNTL);
+	writel(I2S_DEC_START, dp->regs + AUDIO_SRC_CNTL);
+}
+
+static void cdn_dp_audio_config_spdif(struct cdn_dp_device *dp)
+{
+	u32 val;
+
+	val = SPDIF_AVG_SEL | SPDIF_JITTER_BYPASS;
+	val |= SPDIF_FIFO_MID_RANGE(0xe0);
+	val |= SPDIF_JITTER_THRSH(0xe0);
+	val |= SPDIF_JITTER_AVG_WIN(7);
+	writel(val, dp->regs + SPDIF_CTRL_ADDR);
+
+	writel(SYNC_WR_TO_CH_ZERO, dp->regs + FIFO_CNTL);
+
+	val = MAX_NUM_CH(2) | AUDIO_TYPE_LPCM | CFG_SUB_PCKT_NUM(4);
+	writel(val, dp->regs + SMPL2PKT_CNFG);
+	writel(SMPL2PKT_EN, dp->regs + SMPL2PKT_CNTL);
+
+	val = SPDIF_ENABLE | SPDIF_AVG_SEL | SPDIF_JITTER_BYPASS;
+	val |= SPDIF_FIFO_MID_RANGE(0xe0);
+	val |= SPDIF_JITTER_THRSH(0xe0);
+	val |= SPDIF_JITTER_AVG_WIN(7);
+	writel(val, dp->regs + SPDIF_CTRL_ADDR);
+
+	clk_prepare_enable(dp->spdif_clk);
+	clk_set_rate(dp->spdif_clk, CDN_DP_SPDIF_CLK);
+}
+
+int cdn_dp_audio_config(struct cdn_dp_device *dp, struct audio_info *audio)
+{
+	int ret;
+
+	/* reset the spdif clk before config */
+	if (audio->format == AFMT_SPDIF) {
+		reset_control_assert(dp->spdif_rst);
+		reset_control_deassert(dp->spdif_rst);
+	}
+
+	ret = cdn_dp_reg_write(dp, CM_LANE_CTRL, LANE_REF_CYC);
+	if (ret)
+		goto err_audio_config;
+
+	ret = cdn_dp_reg_write(dp, CM_CTRL, 0);
+	if (ret)
+		goto err_audio_config;
+
+	if (audio->format == AFMT_I2S)
+		cdn_dp_audio_config_i2s(dp, audio);
+	else if (audio->format == AFMT_SPDIF)
+		cdn_dp_audio_config_spdif(dp);
+
+	ret = cdn_dp_reg_write(dp, AUDIO_PACK_CONTROL, AUDIO_PACK_EN);
+
+err_audio_config:
+	if (ret)
+		DRM_DEV_ERROR(dp->dev, "audio config failed: %d\n", ret);
+	return ret;
+}
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-reg.h b/drivers/gpu/drm/rockchip/cdn-dp-reg.h
new file mode 100644
index 000000000000..b5f215324694
--- /dev/null
+++ b/drivers/gpu/drm/rockchip/cdn-dp-reg.h
@@ -0,0 +1,483 @@
+/*
+ * Copyright (C) Fuzhou Rockchip Electronics Co.Ltd
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ *
+ * This software is licensed under the terms of the GNU General Public
+ * License version 2, as published by the Free Software Foundation, and
+ * may be copied, distributed, and modified under those terms.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CDN_DP_REG_H
+#define _CDN_DP_REG_H
+
+#include <linux/bitops.h>
+
+#define ADDR_IMEM		0x10000
+#define ADDR_DMEM		0x20000
+
+/* APB CFG addr */
+#define APB_CTRL			0
+#define XT_INT_CTRL			0x04
+#define MAILBOX_FULL_ADDR		0x08
+#define MAILBOX_EMPTY_ADDR		0x0c
+#define MAILBOX0_WR_DATA		0x10
+#define MAILBOX0_RD_DATA		0x14
+#define KEEP_ALIVE			0x18
+#define VER_L				0x1c
+#define VER_H				0x20
+#define VER_LIB_L_ADDR			0x24
+#define VER_LIB_H_ADDR			0x28
+#define SW_DEBUG_L			0x2c
+#define SW_DEBUG_H			0x30
+#define MAILBOX_INT_MASK		0x34
+#define MAILBOX_INT_STATUS		0x38
+#define SW_CLK_L			0x3c
+#define SW_CLK_H			0x40
+#define SW_EVENTS0			0x44
+#define SW_EVENTS1			0x48
+#define SW_EVENTS2			0x4c
+#define SW_EVENTS3			0x50
+#define XT_OCD_CTRL			0x60
+#define APB_INT_MASK			0x6c
+#define APB_STATUS_MASK			0x70
+
+/* audio decoder addr */
+#define AUDIO_SRC_CNTL			0x30000
+#define AUDIO_SRC_CNFG			0x30004
+#define COM_CH_STTS_BITS		0x30008
+#define STTS_BIT_CH(x)			(0x3000c + ((x) << 2))
+#define SPDIF_CTRL_ADDR			0x3004c
+#define SPDIF_CH1_CS_3100_ADDR		0x30050
+#define SPDIF_CH1_CS_6332_ADDR		0x30054
+#define SPDIF_CH1_CS_9564_ADDR		0x30058
+#define SPDIF_CH1_CS_12796_ADDR		0x3005c
+#define SPDIF_CH1_CS_159128_ADDR	0x30060
+#define SPDIF_CH1_CS_191160_ADDR	0x30064
+#define SPDIF_CH2_CS_3100_ADDR		0x30068
+#define SPDIF_CH2_CS_6332_ADDR		0x3006c
+#define SPDIF_CH2_CS_9564_ADDR		0x30070
+#define SPDIF_CH2_CS_12796_ADDR		0x30074
+#define SPDIF_CH2_CS_159128_ADDR	0x30078
+#define SPDIF_CH2_CS_191160_ADDR	0x3007c
+#define SMPL2PKT_CNTL			0x30080
+#define SMPL2PKT_CNFG			0x30084
+#define FIFO_CNTL			0x30088
+#define FIFO_STTS			0x3008c
+
+/* source pif addr */
+#define SOURCE_PIF_WR_ADDR		0x30800
+#define SOURCE_PIF_WR_REQ		0x30804
+#define SOURCE_PIF_RD_ADDR		0x30808
+#define SOURCE_PIF_RD_REQ		0x3080c
+#define SOURCE_PIF_DATA_WR		0x30810
+#define SOURCE_PIF_DATA_RD		0x30814
+#define SOURCE_PIF_FIFO1_FLUSH		0x30818
+#define SOURCE_PIF_FIFO2_FLUSH		0x3081c
+#define SOURCE_PIF_STATUS		0x30820
+#define SOURCE_PIF_INTERRUPT_SOURCE	0x30824
+#define SOURCE_PIF_INTERRUPT_MASK	0x30828
+#define SOURCE_PIF_PKT_ALLOC_REG	0x3082c
+#define SOURCE_PIF_PKT_ALLOC_WR_EN	0x30830
+#define SOURCE_PIF_SW_RESET		0x30834
+
+/* bellow registers need access by mailbox */
+/* source car addr */
+#define SOURCE_HDTX_CAR			0x0900
+#define SOURCE_DPTX_CAR			0x0904
+#define SOURCE_PHY_CAR			0x0908
+#define SOURCE_CEC_CAR			0x090c
+#define SOURCE_CBUS_CAR			0x0910
+#define SOURCE_PKT_CAR			0x0918
+#define SOURCE_AIF_CAR			0x091c
+#define SOURCE_CIPHER_CAR		0x0920
+#define SOURCE_CRYPTO_CAR		0x0924
+
+/* clock meters addr */
+#define CM_CTRL				0x0a00
+#define CM_I2S_CTRL			0x0a04
+#define CM_SPDIF_CTRL			0x0a08
+#define CM_VID_CTRL			0x0a0c
+#define CM_LANE_CTRL			0x0a10
+#define I2S_NM_STABLE			0x0a14
+#define I2S_NCTS_STABLE			0x0a18
+#define SPDIF_NM_STABLE			0x0a1c
+#define SPDIF_NCTS_STABLE		0x0a20
+#define NMVID_MEAS_STABLE		0x0a24
+#define I2S_MEAS			0x0a40
+#define SPDIF_MEAS			0x0a80
+#define NMVID_MEAS			0x0ac0
+
+/* source vif addr */
+#define BND_HSYNC2VSYNC			0x0b00
+#define HSYNC2VSYNC_F1_L1		0x0b04
+#define HSYNC2VSYNC_F2_L1		0x0b08
+#define HSYNC2VSYNC_STATUS		0x0b0c
+#define HSYNC2VSYNC_POL_CTRL		0x0b10
+
+/* dptx phy addr */
+#define DP_TX_PHY_CONFIG_REG		0x2000
+#define DP_TX_PHY_STATUS_REG		0x2004
+#define DP_TX_PHY_SW_RESET		0x2008
+#define DP_TX_PHY_SCRAMBLER_SEED	0x200c
+#define DP_TX_PHY_TRAINING_01_04	0x2010
+#define DP_TX_PHY_TRAINING_05_08	0x2014
+#define DP_TX_PHY_TRAINING_09_10	0x2018
+#define TEST_COR			0x23fc
+
+/* dptx hpd addr */
+#define HPD_IRQ_DET_MIN_TIMER		0x2100
+#define HPD_IRQ_DET_MAX_TIMER		0x2104
+#define HPD_UNPLGED_DET_MIN_TIMER	0x2108
+#define HPD_STABLE_TIMER		0x210c
+#define HPD_FILTER_TIMER		0x2110
+#define HPD_EVENT_MASK			0x211c
+#define HPD_EVENT_DET			0x2120
+
+/* dpyx framer addr */
+#define DP_FRAMER_GLOBAL_CONFIG		0x2200
+#define DP_SW_RESET			0x2204
+#define DP_FRAMER_TU			0x2208
+#define DP_FRAMER_PXL_REPR		0x220c
+#define DP_FRAMER_SP			0x2210
+#define AUDIO_PACK_CONTROL		0x2214
+#define DP_VC_TABLE(x)			(0x2218 + ((x) << 2))
+#define DP_VB_ID			0x2258
+#define DP_MTPH_LVP_CONTROL		0x225c
+#define DP_MTPH_SYMBOL_VALUES		0x2260
+#define DP_MTPH_ECF_CONTROL		0x2264
+#define DP_MTPH_ACT_CONTROL		0x2268
+#define DP_MTPH_STATUS			0x226c
+#define DP_INTERRUPT_SOURCE		0x2270
+#define DP_INTERRUPT_MASK		0x2274
+#define DP_FRONT_BACK_PORCH		0x2278
+#define DP_BYTE_COUNT			0x227c
+
+/* dptx stream addr */
+#define MSA_HORIZONTAL_0		0x2280
+#define MSA_HORIZONTAL_1		0x2284
+#define MSA_VERTICAL_0			0x2288
+#define MSA_VERTICAL_1			0x228c
+#define MSA_MISC			0x2290
+#define STREAM_CONFIG			0x2294
+#define AUDIO_PACK_STATUS		0x2298
+#define VIF_STATUS			0x229c
+#define PCK_STUFF_STATUS_0		0x22a0
+#define PCK_STUFF_STATUS_1		0x22a4
+#define INFO_PACK_STATUS		0x22a8
+#define RATE_GOVERNOR_STATUS		0x22ac
+#define DP_HORIZONTAL			0x22b0
+#define DP_VERTICAL_0			0x22b4
+#define DP_VERTICAL_1			0x22b8
+#define DP_BLOCK_SDP			0x22bc
+
+/* dptx glbl addr */
+#define DPTX_LANE_EN			0x2300
+#define DPTX_ENHNCD			0x2304
+#define DPTX_INT_MASK			0x2308
+#define DPTX_INT_STATUS			0x230c
+
+/* dp aux addr */
+#define DP_AUX_HOST_CONTROL		0x2800
+#define DP_AUX_INTERRUPT_SOURCE		0x2804
+#define DP_AUX_INTERRUPT_MASK		0x2808
+#define DP_AUX_SWAP_INVERSION_CONTROL	0x280c
+#define DP_AUX_SEND_NACK_TRANSACTION	0x2810
+#define DP_AUX_CLEAR_RX			0x2814
+#define DP_AUX_CLEAR_TX			0x2818
+#define DP_AUX_TIMER_STOP		0x281c
+#define DP_AUX_TIMER_CLEAR		0x2820
+#define DP_AUX_RESET_SW			0x2824
+#define DP_AUX_DIVIDE_2M		0x2828
+#define DP_AUX_TX_PREACHARGE_LENGTH	0x282c
+#define DP_AUX_FREQUENCY_1M_MAX		0x2830
+#define DP_AUX_FREQUENCY_1M_MIN		0x2834
+#define DP_AUX_RX_PRE_MIN		0x2838
+#define DP_AUX_RX_PRE_MAX		0x283c
+#define DP_AUX_TIMER_PRESET		0x2840
+#define DP_AUX_NACK_FORMAT		0x2844
+#define DP_AUX_TX_DATA			0x2848
+#define DP_AUX_RX_DATA			0x284c
+#define DP_AUX_TX_STATUS		0x2850
+#define DP_AUX_RX_STATUS		0x2854
+#define DP_AUX_RX_CYCLE_COUNTER		0x2858
+#define DP_AUX_MAIN_STATES		0x285c
+#define DP_AUX_MAIN_TIMER		0x2860
+#define DP_AUX_AFE_OUT			0x2864
+
+/* crypto addr */
+#define CRYPTO_HDCP_REVISION		0x5800
+#define HDCP_CRYPTO_CONFIG		0x5804
+#define CRYPTO_INTERRUPT_SOURCE		0x5808
+#define CRYPTO_INTERRUPT_MASK		0x580c
+#define CRYPTO22_CONFIG			0x5818
+#define CRYPTO22_STATUS			0x581c
+#define SHA_256_DATA_IN			0x583c
+#define SHA_256_DATA_OUT_(x)		(0x5850 + ((x) << 2))
+#define AES_32_KEY_(x)			(0x5870 + ((x) << 2))
+#define AES_32_DATA_IN			0x5880
+#define AES_32_DATA_OUT_(x)		(0x5884 + ((x) << 2))
+#define CRYPTO14_CONFIG			0x58a0
+#define CRYPTO14_STATUS			0x58a4
+#define CRYPTO14_PRNM_OUT		0x58a8
+#define CRYPTO14_KM_0			0x58ac
+#define CRYPTO14_KM_1			0x58b0
+#define CRYPTO14_AN_0			0x58b4
+#define CRYPTO14_AN_1			0x58b8
+#define CRYPTO14_YOUR_KSV_0		0x58bc
+#define CRYPTO14_YOUR_KSV_1		0x58c0
+#define CRYPTO14_MI_0			0x58c4
+#define CRYPTO14_MI_1			0x58c8
+#define CRYPTO14_TI_0			0x58cc
+#define CRYPTO14_KI_0			0x58d0
+#define CRYPTO14_KI_1			0x58d4
+#define CRYPTO14_BLOCKS_NUM		0x58d8
+#define CRYPTO14_KEY_MEM_DATA_0		0x58dc
+#define CRYPTO14_KEY_MEM_DATA_1		0x58e0
+#define CRYPTO14_SHA1_MSG_DATA		0x58e4
+#define CRYPTO14_SHA1_V_VALUE_(x)	(0x58e8 + ((x) << 2))
+#define TRNG_CTRL			0x58fc
+#define TRNG_DATA_RDY			0x5900
+#define TRNG_DATA			0x5904
+
+/* cipher addr */
+#define HDCP_REVISION			0x60000
+#define INTERRUPT_SOURCE		0x60004
+#define INTERRUPT_MASK			0x60008
+#define HDCP_CIPHER_CONFIG		0x6000c
+#define AES_128_KEY_0			0x60010
+#define AES_128_KEY_1			0x60014
+#define AES_128_KEY_2			0x60018
+#define AES_128_KEY_3			0x6001c
+#define AES_128_RANDOM_0		0x60020
+#define AES_128_RANDOM_1		0x60024
+#define CIPHER14_KM_0			0x60028
+#define CIPHER14_KM_1			0x6002c
+#define CIPHER14_STATUS			0x60030
+#define CIPHER14_RI_PJ_STATUS		0x60034
+#define CIPHER_MODE			0x60038
+#define CIPHER14_AN_0			0x6003c
+#define CIPHER14_AN_1			0x60040
+#define CIPHER22_AUTH			0x60044
+#define CIPHER14_R0_DP_STATUS		0x60048
+#define CIPHER14_BOOTSTRAP		0x6004c
+
+#define DPTX_FRMR_DATA_CLK_RSTN_EN	BIT(11)
+#define DPTX_FRMR_DATA_CLK_EN		BIT(10)
+#define DPTX_PHY_DATA_RSTN_EN		BIT(9)
+#define DPTX_PHY_DATA_CLK_EN		BIT(8)
+#define DPTX_PHY_CHAR_RSTN_EN		BIT(7)
+#define DPTX_PHY_CHAR_CLK_EN		BIT(6)
+#define SOURCE_AUX_SYS_CLK_RSTN_EN	BIT(5)
+#define SOURCE_AUX_SYS_CLK_EN		BIT(4)
+#define DPTX_SYS_CLK_RSTN_EN		BIT(3)
+#define DPTX_SYS_CLK_EN			BIT(2)
+#define CFG_DPTX_VIF_CLK_RSTN_EN	BIT(1)
+#define CFG_DPTX_VIF_CLK_EN		BIT(0)
+
+#define SOURCE_PHY_RSTN_EN		BIT(1)
+#define SOURCE_PHY_CLK_EN		BIT(0)
+
+#define SOURCE_PKT_SYS_RSTN_EN		BIT(3)
+#define SOURCE_PKT_SYS_CLK_EN		BIT(2)
+#define SOURCE_PKT_DATA_RSTN_EN		BIT(1)
+#define SOURCE_PKT_DATA_CLK_EN		BIT(0)
+
+#define SPDIF_CDR_CLK_RSTN_EN		BIT(5)
+#define SPDIF_CDR_CLK_EN		BIT(4)
+#define SOURCE_AIF_SYS_RSTN_EN		BIT(3)
+#define SOURCE_AIF_SYS_CLK_EN		BIT(2)
+#define SOURCE_AIF_CLK_RSTN_EN		BIT(1)
+#define SOURCE_AIF_CLK_EN		BIT(0)
+
+#define SOURCE_CIPHER_SYSTEM_CLK_RSTN_EN	BIT(3)
+#define SOURCE_CIPHER_SYS_CLK_EN		BIT(2)
+#define SOURCE_CIPHER_CHAR_CLK_RSTN_EN		BIT(1)
+#define SOURCE_CIPHER_CHAR_CLK_EN		BIT(0)
+
+#define SOURCE_CRYPTO_SYS_CLK_RSTN_EN	BIT(1)
+#define SOURCE_CRYPTO_SYS_CLK_EN	BIT(0)
+
+#define APB_IRAM_PATH			BIT(2)
+#define APB_DRAM_PATH			BIT(1)
+#define APB_XT_RESET			BIT(0)
+
+#define MAILBOX_INT_MASK_BIT		BIT(1)
+#define PIF_INT_MASK_BIT		BIT(0)
+#define ALL_INT_MASK			3
+
+/* mailbox */
+#define MB_OPCODE_ID			0
+#define MB_MODULE_ID			1
+#define MB_SIZE_MSB_ID			2
+#define MB_SIZE_LSB_ID			3
+#define MB_DATA_ID			4
+
+#define MB_MODULE_ID_DP_TX		0x01
+#define MB_MODULE_ID_HDCP_TX		0x07
+#define MB_MODULE_ID_HDCP_RX		0x08
+#define MB_MODULE_ID_HDCP_GENERAL	0x09
+#define MB_MODULE_ID_GENERAL		0x0a
+
+/* general opcode */
+#define GENERAL_MAIN_CONTROL            0x01
+#define GENERAL_TEST_ECHO               0x02
+#define GENERAL_BUS_SETTINGS            0x03
+#define GENERAL_TEST_ACCESS             0x04
+
+#define DPTX_SET_POWER_MNG			0x00
+#define DPTX_SET_HOST_CAPABILITIES		0x01
+#define DPTX_GET_EDID				0x02
+#define DPTX_READ_DPCD				0x03
+#define DPTX_WRITE_DPCD				0x04
+#define DPTX_ENABLE_EVENT			0x05
+#define DPTX_WRITE_REGISTER			0x06
+#define DPTX_READ_REGISTER			0x07
+#define DPTX_WRITE_FIELD			0x08
+#define DPTX_TRAINING_CONTROL			0x09
+#define DPTX_READ_EVENT				0x0a
+#define DPTX_READ_LINK_STAT			0x0b
+#define DPTX_SET_VIDEO				0x0c
+#define DPTX_SET_AUDIO				0x0d
+#define DPTX_GET_LAST_AUX_STAUS			0x0e
+#define DPTX_SET_LINK_BREAK_POINT		0x0f
+#define DPTX_FORCE_LANES			0x10
+#define DPTX_HPD_STATE				0x11
+
+#define FW_STANDBY				0
+#define FW_ACTIVE				1
+
+#define DPTX_EVENT_ENABLE_HPD			BIT(0)
+#define DPTX_EVENT_ENABLE_TRAINING		BIT(1)
+
+#define LINK_TRAINING_NOT_ACTIVE		0
+#define LINK_TRAINING_RUN			1
+#define LINK_TRAINING_RESTART			2
+
+#define CONTROL_VIDEO_IDLE			0
+#define CONTROL_VIDEO_VALID			1
+
+#define TU_CNT_RST_EN				BIT(15)
+#define VIF_BYPASS_INTERLACE			BIT(13)
+#define INTERLACE_FMT_DET			BIT(12)
+#define INTERLACE_DTCT_WIN			0x20
+
+#define DP_FRAMER_SP_INTERLACE_EN		BIT(2)
+#define DP_FRAMER_SP_HSP			BIT(1)
+#define DP_FRAMER_SP_VSP			BIT(0)
+
+/* capability */
+#define AUX_HOST_INVERT				3
+#define	FAST_LT_SUPPORT				1
+#define FAST_LT_NOT_SUPPORT			0
+#define LANE_MAPPING_NORMAL			0x1b
+#define LANE_MAPPING_FLIPPED			0xe4
+#define ENHANCED				1
+#define SCRAMBLER_EN				BIT(4)
+
+#define	FULL_LT_STARTED				BIT(0)
+#define FASE_LT_STARTED				BIT(1)
+#define CLK_RECOVERY_FINISHED			BIT(2)
+#define EQ_PHASE_FINISHED			BIT(3)
+#define FASE_LT_START_FINISHED			BIT(4)
+#define CLK_RECOVERY_FAILED			BIT(5)
+#define EQ_PHASE_FAILED				BIT(6)
+#define FASE_LT_FAILED				BIT(7)
+
+#define DPTX_HPD_EVENT				BIT(0)
+#define DPTX_TRAINING_EVENT			BIT(1)
+#define HDCP_TX_STATUS_EVENT			BIT(4)
+#define HDCP2_TX_IS_KM_STORED_EVENT		BIT(5)
+#define HDCP2_TX_STORE_KM_EVENT			BIT(6)
+#define HDCP_TX_IS_RECEIVER_ID_VALID_EVENT	BIT(7)
+
+#define TU_SIZE					30
+#define CDN_DP_MAX_LINK_RATE			DP_LINK_BW_5_4
+
+/* audio */
+#define AUDIO_PACK_EN				BIT(8)
+#define SAMPLING_FREQ(x)			(((x) & 0xf) << 16)
+#define ORIGINAL_SAMP_FREQ(x)			(((x) & 0xf) << 24)
+#define SYNC_WR_TO_CH_ZERO			BIT(1)
+#define I2S_DEC_START				BIT(1)
+#define AUDIO_SW_RST				BIT(0)
+#define SMPL2PKT_EN				BIT(1)
+#define MAX_NUM_CH(x)				(((x) & 0x1f) - 1)
+#define NUM_OF_I2S_PORTS(x)			((((x) / 2 - 1) & 0x3) << 5)
+#define AUDIO_TYPE_LPCM				(2 << 7)
+#define CFG_SUB_PCKT_NUM(x)			((((x) - 1) & 0x7) << 11)
+#define AUDIO_CH_NUM(x)				((((x) - 1) & 0x1f) << 2)
+#define TRANS_SMPL_WIDTH_16			0
+#define TRANS_SMPL_WIDTH_24			BIT(11)
+#define TRANS_SMPL_WIDTH_32			(2 << 11)
+#define I2S_DEC_PORT_EN(x)			(((x) & 0xf) << 17)
+#define SPDIF_ENABLE				BIT(21)
+#define SPDIF_AVG_SEL				BIT(20)
+#define SPDIF_JITTER_BYPASS			BIT(19)
+#define SPDIF_FIFO_MID_RANGE(x)			(((x) & 0xff) << 11)
+#define SPDIF_JITTER_THRSH(x)			(((x) & 0xff) << 3)
+#define SPDIF_JITTER_AVG_WIN(x)			((x) & 0x7)
+
+/* Reference cycles when using lane clock as reference */
+#define LANE_REF_CYC				0x8000
+
+enum voltage_swing_level {
+	VOLTAGE_LEVEL_0,
+	VOLTAGE_LEVEL_1,
+	VOLTAGE_LEVEL_2,
+	VOLTAGE_LEVEL_3,
+};
+
+enum pre_emphasis_level {
+	PRE_EMPHASIS_LEVEL_0,
+	PRE_EMPHASIS_LEVEL_1,
+	PRE_EMPHASIS_LEVEL_2,
+	PRE_EMPHASIS_LEVEL_3,
+};
+
+enum pattern_set {
+	PTS1		= BIT(0),
+	PTS2		= BIT(1),
+	PTS3		= BIT(2),
+	PTS4		= BIT(3),
+	DP_NONE		= BIT(4)
+};
+
+enum vic_color_depth {
+	BCS_6 = 0x1,
+	BCS_8 = 0x2,
+	BCS_10 = 0x4,
+	BCS_12 = 0x8,
+	BCS_16 = 0x10,
+};
+
+enum vic_bt_type {
+	BT_601 = 0x0,
+	BT_709 = 0x1,
+};
+
+void cdn_dp_clock_reset(struct cdn_dp_device *dp);
+
+void cdn_dp_set_fw_clk(struct cdn_dp_device *dp, u32 clk);
+int cdn_dp_load_firmware(struct cdn_dp_device *dp, const u32 *i_mem,
+			 u32 i_size, const u32 *d_mem, u32 d_size);
+int cdn_dp_set_firmware_active(struct cdn_dp_device *dp, bool enable);
+int cdn_dp_set_host_cap(struct cdn_dp_device *dp, u8 lanes, bool flip);
+int cdn_dp_event_config(struct cdn_dp_device *dp);
+u32 cdn_dp_get_event(struct cdn_dp_device *dp);
+int cdn_dp_get_hpd_status(struct cdn_dp_device *dp);
+int cdn_dp_dpcd_write(struct cdn_dp_device *dp, u32 addr, u8 value);
+int cdn_dp_dpcd_read(struct cdn_dp_device *dp, u32 addr, u8 *data, u16 len);
+int cdn_dp_get_edid_block(void *dp, u8 *edid,
+			  unsigned int block, size_t length);
+int cdn_dp_train_link(struct cdn_dp_device *dp);
+int cdn_dp_set_video_status(struct cdn_dp_device *dp, int active);
+int cdn_dp_config_video(struct cdn_dp_device *dp);
+int cdn_dp_audio_stop(struct cdn_dp_device *dp, struct audio_info *audio);
+int cdn_dp_audio_mute(struct cdn_dp_device *dp, bool enable);
+int cdn_dp_audio_config(struct cdn_dp_device *dp, struct audio_info *audio);
+#endif /* _CDN_DP_REG_H */
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index c30d649cb147..b360e6251836 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -14,19 +14,19 @@
  * GNU General Public License for more details.
  */
 
-#include <asm/dma-iommu.h>
-
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_of.h>
 #include <linux/dma-mapping.h>
+#include <linux/dma-iommu.h>
 #include <linux/pm_runtime.h>
 #include <linux/module.h>
 #include <linux/of_graph.h>
 #include <linux/component.h>
 #include <linux/console.h>
+#include <linux/iommu.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_fb.h"
@@ -50,28 +50,31 @@ static struct drm_driver rockchip_drm_driver;
 int rockchip_drm_dma_attach_device(struct drm_device *drm_dev,
 				   struct device *dev)
 {
-	struct dma_iommu_mapping *mapping = drm_dev->dev->archdata.mapping;
+	struct rockchip_drm_private *private = drm_dev->dev_private;
 	int ret;
 
 	if (!is_support_iommu)
 		return 0;
 
-	ret = dma_set_coherent_mask(dev, DMA_BIT_MASK(32));
-	if (ret)
+	ret = iommu_attach_device(private->domain, dev);
+	if (ret) {
+		dev_err(dev, "Failed to attach iommu device\n");
 		return ret;
+	}
 
-	dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
-
-	return arm_iommu_attach_device(dev, mapping);
+	return 0;
 }
 
 void rockchip_drm_dma_detach_device(struct drm_device *drm_dev,
 				    struct device *dev)
 {
+	struct rockchip_drm_private *private = drm_dev->dev_private;
+	struct iommu_domain *domain = private->domain;
+
 	if (!is_support_iommu)
 		return;
 
-	arm_iommu_detach_device(dev);
+	iommu_detach_device(domain, dev);
 }
 
 int rockchip_register_crtc_funcs(struct drm_crtc *crtc,
@@ -123,11 +126,46 @@ static void rockchip_drm_crtc_disable_vblank(struct drm_device *dev,
 		priv->crtc_funcs[pipe]->disable_vblank(crtc);
 }
 
+static int rockchip_drm_init_iommu(struct drm_device *drm_dev)
+{
+	struct rockchip_drm_private *private = drm_dev->dev_private;
+	struct iommu_domain_geometry *geometry;
+	u64 start, end;
+
+	if (!is_support_iommu)
+		return 0;
+
+	private->domain = iommu_domain_alloc(&platform_bus_type);
+	if (!private->domain)
+		return -ENOMEM;
+
+	geometry = &private->domain->geometry;
+	start = geometry->aperture_start;
+	end = geometry->aperture_end;
+
+	DRM_DEBUG("IOMMU context initialized (aperture: %#llx-%#llx)\n",
+		  start, end);
+	drm_mm_init(&private->mm, start, end - start + 1);
+	mutex_init(&private->mm_lock);
+
+	return 0;
+}
+
+static void rockchip_iommu_cleanup(struct drm_device *drm_dev)
+{
+	struct rockchip_drm_private *private = drm_dev->dev_private;
+
+	if (!is_support_iommu)
+		return;
+
+	drm_mm_takedown(&private->mm);
+	iommu_domain_free(private->domain);
+}
+
 static int rockchip_drm_bind(struct device *dev)
 {
 	struct drm_device *drm_dev;
 	struct rockchip_drm_private *private;
-	struct dma_iommu_mapping *mapping = NULL;
 	int ret;
 
 	drm_dev = drm_dev_alloc(&rockchip_drm_driver, dev);
@@ -151,38 +189,14 @@ static int rockchip_drm_bind(struct device *dev)
 
 	rockchip_drm_mode_config_init(drm_dev);
 
-	dev->dma_parms = devm_kzalloc(dev, sizeof(*dev->dma_parms),
-				      GFP_KERNEL);
-	if (!dev->dma_parms) {
-		ret = -ENOMEM;
+	ret = rockchip_drm_init_iommu(drm_dev);
+	if (ret)
 		goto err_config_cleanup;
-	}
-
-	if (is_support_iommu) {
-		/* TODO(djkurtz): fetch the mapping start/size from somewhere */
-		mapping = arm_iommu_create_mapping(&platform_bus_type,
-						   0x00000000,
-						   SZ_2G);
-		if (IS_ERR(mapping)) {
-			ret = PTR_ERR(mapping);
-			goto err_config_cleanup;
-		}
-
-		ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
-		if (ret)
-			goto err_release_mapping;
-
-		dma_set_max_seg_size(dev, DMA_BIT_MASK(32));
-
-		ret = arm_iommu_attach_device(dev, mapping);
-		if (ret)
-			goto err_release_mapping;
-	}
 
 	/* Try to bind all sub drivers. */
 	ret = component_bind_all(dev, drm_dev);
 	if (ret)
-		goto err_detach_device;
+		goto err_iommu_cleanup;
 
 	/* init kms poll for handling hpd */
 	drm_kms_helper_poll_init(drm_dev);
@@ -207,8 +221,6 @@ static int rockchip_drm_bind(struct device *dev)
 	if (ret)
 		goto err_fbdev_fini;
 
-	if (is_support_iommu)
-		arm_iommu_release_mapping(mapping);
 	return 0;
 err_fbdev_fini:
 	rockchip_drm_fbdev_fini(drm_dev);
@@ -217,12 +229,8 @@ err_vblank_cleanup:
 err_kms_helper_poll_fini:
 	drm_kms_helper_poll_fini(drm_dev);
 	component_unbind_all(dev, drm_dev);
-err_detach_device:
-	if (is_support_iommu)
-		arm_iommu_detach_device(dev);
-err_release_mapping:
-	if (is_support_iommu)
-		arm_iommu_release_mapping(mapping);
+err_iommu_cleanup:
+	rockchip_iommu_cleanup(drm_dev);
 err_config_cleanup:
 	drm_mode_config_cleanup(drm_dev);
 	drm_dev->dev_private = NULL;
@@ -239,8 +247,7 @@ static void rockchip_drm_unbind(struct device *dev)
 	drm_vblank_cleanup(drm_dev);
 	drm_kms_helper_poll_fini(drm_dev);
 	component_unbind_all(dev, drm_dev);
-	if (is_support_iommu)
-		arm_iommu_detach_device(dev);
+	rockchip_iommu_cleanup(drm_dev);
 	drm_mode_config_cleanup(drm_dev);
 	drm_dev->dev_private = NULL;
 	drm_dev_unregister(drm_dev);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
index fb6226cf84b7..adc39302bec5 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.h
@@ -30,6 +30,7 @@
 
 struct drm_device;
 struct drm_connector;
+struct iommu_domain;
 
 /*
  * Rockchip drm private crtc funcs.
@@ -60,7 +61,10 @@ struct rockchip_drm_private {
 	struct drm_gem_object *fbdev_bo;
 	const struct rockchip_crtc_funcs *crtc_funcs[ROCKCHIP_MAX_CRTC];
 	struct drm_atomic_state *state;
-
+	struct iommu_domain *domain;
+	/* protect drm_mm on multi-threads */
+	struct mutex mm_lock;
+	struct drm_mm mm;
 	struct list_head psr_list;
 	spinlock_t psr_list_lock;
 };
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
index d5e1f8627d38..c9ccdf8f44bb 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fb.c
@@ -213,7 +213,7 @@ rockchip_drm_framebuffer_init(struct drm_device *dev,
 
 	rockchip_fb = rockchip_fb_alloc(dev, mode_cmd, &obj, 1);
 	if (IS_ERR(rockchip_fb))
-		return NULL;
+		return ERR_CAST(rockchip_fb);
 
 	return &rockchip_fb->fb;
 }
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
index 52d1fdf9f9da..70ad50dd594d 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_fbdev.c
@@ -129,19 +129,16 @@ int rockchip_drm_fbdev_init(struct drm_device *dev)
 {
 	struct rockchip_drm_private *private = dev->dev_private;
 	struct drm_fb_helper *helper;
-	unsigned int num_crtc;
 	int ret;
 
 	if (!dev->mode_config.num_crtc || !dev->mode_config.num_connector)
 		return -EINVAL;
 
-	num_crtc = dev->mode_config.num_crtc;
-
 	helper = &private->fbdev_helper;
 
 	drm_fb_helper_prepare(dev, helper, &rockchip_drm_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, helper, num_crtc, ROCKCHIP_MAX_CONNECTOR);
+	ret = drm_fb_helper_init(dev, helper, ROCKCHIP_MAX_CONNECTOR);
 	if (ret < 0) {
 		dev_err(dev->dev, "Failed to initialize drm fb helper - %d.\n",
 			ret);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
index b70f9423379c..df9e57064f19 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.c
@@ -16,11 +16,146 @@
 #include <drm/drmP.h>
 #include <drm/drm_gem.h>
 #include <drm/drm_vma_manager.h>
+#include <linux/iommu.h>
 
 #include "rockchip_drm_drv.h"
 #include "rockchip_drm_gem.h"
 
-static int rockchip_gem_alloc_buf(struct rockchip_gem_object *rk_obj,
+static int rockchip_gem_iommu_map(struct rockchip_gem_object *rk_obj)
+{
+	struct drm_device *drm = rk_obj->base.dev;
+	struct rockchip_drm_private *private = drm->dev_private;
+	int prot = IOMMU_READ | IOMMU_WRITE;
+	ssize_t ret;
+
+	mutex_lock(&private->mm_lock);
+
+	ret = drm_mm_insert_node_generic(&private->mm, &rk_obj->mm,
+					 rk_obj->base.size, PAGE_SIZE,
+					 0, 0);
+
+	mutex_unlock(&private->mm_lock);
+	if (ret < 0) {
+		DRM_ERROR("out of I/O virtual memory: %zd\n", ret);
+		return ret;
+	}
+
+	rk_obj->dma_addr = rk_obj->mm.start;
+
+	ret = iommu_map_sg(private->domain, rk_obj->dma_addr, rk_obj->sgt->sgl,
+			   rk_obj->sgt->nents, prot);
+	if (ret < rk_obj->base.size) {
+		DRM_ERROR("failed to map buffer: size=%zd request_size=%zd\n",
+			  ret, rk_obj->base.size);
+		ret = -ENOMEM;
+		goto err_remove_node;
+	}
+
+	rk_obj->size = ret;
+
+	return 0;
+
+err_remove_node:
+	drm_mm_remove_node(&rk_obj->mm);
+
+	return ret;
+}
+
+static int rockchip_gem_iommu_unmap(struct rockchip_gem_object *rk_obj)
+{
+	struct drm_device *drm = rk_obj->base.dev;
+	struct rockchip_drm_private *private = drm->dev_private;
+
+	iommu_unmap(private->domain, rk_obj->dma_addr, rk_obj->size);
+
+	mutex_lock(&private->mm_lock);
+
+	drm_mm_remove_node(&rk_obj->mm);
+
+	mutex_unlock(&private->mm_lock);
+
+	return 0;
+}
+
+static int rockchip_gem_get_pages(struct rockchip_gem_object *rk_obj)
+{
+	struct drm_device *drm = rk_obj->base.dev;
+	int ret, i;
+	struct scatterlist *s;
+
+	rk_obj->pages = drm_gem_get_pages(&rk_obj->base);
+	if (IS_ERR(rk_obj->pages))
+		return PTR_ERR(rk_obj->pages);
+
+	rk_obj->num_pages = rk_obj->base.size >> PAGE_SHIFT;
+
+	rk_obj->sgt = drm_prime_pages_to_sg(rk_obj->pages, rk_obj->num_pages);
+	if (IS_ERR(rk_obj->sgt)) {
+		ret = PTR_ERR(rk_obj->sgt);
+		goto err_put_pages;
+	}
+
+	/*
+	 * Fake up the SG table so that dma_sync_sg_for_device() can be used
+	 * to flush the pages associated with it.
+	 *
+	 * TODO: Replace this by drm_clflush_sg() once it can be implemented
+	 * without relying on symbols that are not exported.
+	 */
+	for_each_sg(rk_obj->sgt->sgl, s, rk_obj->sgt->nents, i)
+		sg_dma_address(s) = sg_phys(s);
+
+	dma_sync_sg_for_device(drm->dev, rk_obj->sgt->sgl, rk_obj->sgt->nents,
+			       DMA_TO_DEVICE);
+
+	return 0;
+
+err_put_pages:
+	drm_gem_put_pages(&rk_obj->base, rk_obj->pages, false, false);
+	return ret;
+}
+
+static void rockchip_gem_put_pages(struct rockchip_gem_object *rk_obj)
+{
+	sg_free_table(rk_obj->sgt);
+	kfree(rk_obj->sgt);
+	drm_gem_put_pages(&rk_obj->base, rk_obj->pages, true, true);
+}
+
+static int rockchip_gem_alloc_iommu(struct rockchip_gem_object *rk_obj,
+				    bool alloc_kmap)
+{
+	int ret;
+
+	ret = rockchip_gem_get_pages(rk_obj);
+	if (ret < 0)
+		return ret;
+
+	ret = rockchip_gem_iommu_map(rk_obj);
+	if (ret < 0)
+		goto err_free;
+
+	if (alloc_kmap) {
+		rk_obj->kvaddr = vmap(rk_obj->pages, rk_obj->num_pages, VM_MAP,
+				      pgprot_writecombine(PAGE_KERNEL));
+		if (!rk_obj->kvaddr) {
+			DRM_ERROR("failed to vmap() buffer\n");
+			ret = -ENOMEM;
+			goto err_unmap;
+		}
+	}
+
+	return 0;
+
+err_unmap:
+	rockchip_gem_iommu_unmap(rk_obj);
+err_free:
+	rockchip_gem_put_pages(rk_obj);
+
+	return ret;
+}
+
+static int rockchip_gem_alloc_dma(struct rockchip_gem_object *rk_obj,
 				  bool alloc_kmap)
 {
 	struct drm_gem_object *obj = &rk_obj->base;
@@ -42,7 +177,27 @@ static int rockchip_gem_alloc_buf(struct rockchip_gem_object *rk_obj,
 	return 0;
 }
 
-static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj)
+static int rockchip_gem_alloc_buf(struct rockchip_gem_object *rk_obj,
+				  bool alloc_kmap)
+{
+	struct drm_gem_object *obj = &rk_obj->base;
+	struct drm_device *drm = obj->dev;
+	struct rockchip_drm_private *private = drm->dev_private;
+
+	if (private->domain)
+		return rockchip_gem_alloc_iommu(rk_obj, alloc_kmap);
+	else
+		return rockchip_gem_alloc_dma(rk_obj, alloc_kmap);
+}
+
+static void rockchip_gem_free_iommu(struct rockchip_gem_object *rk_obj)
+{
+	vunmap(rk_obj->kvaddr);
+	rockchip_gem_iommu_unmap(rk_obj);
+	rockchip_gem_put_pages(rk_obj);
+}
+
+static void rockchip_gem_free_dma(struct rockchip_gem_object *rk_obj)
 {
 	struct drm_gem_object *obj = &rk_obj->base;
 	struct drm_device *drm = obj->dev;
@@ -51,23 +206,68 @@ static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj)
 		       rk_obj->dma_attrs);
 }
 
-static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
-					struct vm_area_struct *vma)
+static void rockchip_gem_free_buf(struct rockchip_gem_object *rk_obj)
+{
+	if (rk_obj->pages)
+		rockchip_gem_free_iommu(rk_obj);
+	else
+		rockchip_gem_free_dma(rk_obj);
+}
 
+static int rockchip_drm_gem_object_mmap_iommu(struct drm_gem_object *obj,
+					      struct vm_area_struct *vma)
 {
+	struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
+	unsigned int i, count = obj->size >> PAGE_SHIFT;
+	unsigned long user_count = (vma->vm_end - vma->vm_start) >> PAGE_SHIFT;
+	unsigned long uaddr = vma->vm_start;
+	unsigned long offset = vma->vm_pgoff;
+	unsigned long end = user_count + offset;
 	int ret;
+
+	if (user_count == 0)
+		return -ENXIO;
+	if (end > count)
+		return -ENXIO;
+
+	for (i = offset; i < end; i++) {
+		ret = vm_insert_page(vma, uaddr, rk_obj->pages[i]);
+		if (ret)
+			return ret;
+		uaddr += PAGE_SIZE;
+	}
+
+	return 0;
+}
+
+static int rockchip_drm_gem_object_mmap_dma(struct drm_gem_object *obj,
+					    struct vm_area_struct *vma)
+{
 	struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
 	struct drm_device *drm = obj->dev;
 
+	return dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
+			      obj->size, rk_obj->dma_attrs);
+}
+
+static int rockchip_drm_gem_object_mmap(struct drm_gem_object *obj,
+					struct vm_area_struct *vma)
+{
+	int ret;
+	struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
+
 	/*
-	 * dma_alloc_attrs() allocated a struct page table for rk_obj, so clear
+	 * We allocated a struct page table for rk_obj, so clear
 	 * VM_PFNMAP flag that was set by drm_gem_mmap_obj()/drm_gem_mmap().
 	 */
 	vma->vm_flags &= ~VM_PFNMAP;
 	vma->vm_pgoff = 0;
 
-	ret = dma_mmap_attrs(drm->dev, vma, rk_obj->kvaddr, rk_obj->dma_addr,
-			     obj->size, rk_obj->dma_attrs);
+	if (rk_obj->pages)
+		ret = rockchip_drm_gem_object_mmap_iommu(obj, vma);
+	else
+		ret = rockchip_drm_gem_object_mmap_dma(obj, vma);
+
 	if (ret)
 		drm_gem_vm_close(vma);
 
@@ -101,6 +301,12 @@ int rockchip_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	return rockchip_drm_gem_object_mmap(obj, vma);
 }
 
+static void rockchip_gem_release_object(struct rockchip_gem_object *rk_obj)
+{
+	drm_gem_object_release(&rk_obj->base);
+	kfree(rk_obj);
+}
+
 struct rockchip_gem_object *
 	rockchip_gem_create_object(struct drm_device *drm, unsigned int size,
 				   bool alloc_kmap)
@@ -117,7 +323,7 @@ struct rockchip_gem_object *
 
 	obj = &rk_obj->base;
 
-	drm_gem_private_object_init(drm, obj, size);
+	drm_gem_object_init(drm, obj, size);
 
 	ret = rockchip_gem_alloc_buf(rk_obj, alloc_kmap);
 	if (ret)
@@ -126,7 +332,7 @@ struct rockchip_gem_object *
 	return rk_obj;
 
 err_free_rk_obj:
-	kfree(rk_obj);
+	rockchip_gem_release_object(rk_obj);
 	return ERR_PTR(ret);
 }
 
@@ -138,13 +344,11 @@ void rockchip_gem_free_object(struct drm_gem_object *obj)
 {
 	struct rockchip_gem_object *rk_obj;
 
-	drm_gem_free_mmap_offset(obj);
-
 	rk_obj = to_rockchip_obj(obj);
 
 	rockchip_gem_free_buf(rk_obj);
 
-	kfree(rk_obj);
+	rockchip_gem_release_object(rk_obj);
 }
 
 /*
@@ -253,6 +457,9 @@ struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj)
 	struct sg_table *sgt;
 	int ret;
 
+	if (rk_obj->pages)
+		return drm_prime_pages_to_sg(rk_obj->pages, rk_obj->num_pages);
+
 	sgt = kzalloc(sizeof(*sgt), GFP_KERNEL);
 	if (!sgt)
 		return ERR_PTR(-ENOMEM);
@@ -273,6 +480,10 @@ void *rockchip_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
 
+	if (rk_obj->pages)
+		return vmap(rk_obj->pages, rk_obj->num_pages, VM_MAP,
+			    pgprot_writecombine(PAGE_KERNEL));
+
 	if (rk_obj->dma_attrs & DMA_ATTR_NO_KERNEL_MAPPING)
 		return NULL;
 
@@ -281,5 +492,12 @@ void *rockchip_gem_prime_vmap(struct drm_gem_object *obj)
 
 void rockchip_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
-	/* Nothing to do */
+	struct rockchip_gem_object *rk_obj = to_rockchip_obj(obj);
+
+	if (rk_obj->pages) {
+		vunmap(vaddr);
+		return;
+	}
+
+	/* Nothing to do if allocated by DMA mapping API. */
 }
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h
index 18b3488db4ec..3f6ea4d18a5c 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_gem.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_gem.h
@@ -23,7 +23,15 @@ struct rockchip_gem_object {
 
 	void *kvaddr;
 	dma_addr_t dma_addr;
+	/* Used when IOMMU is disabled */
 	unsigned long dma_attrs;
+
+	/* Used when IOMMU is enabled */
+	struct drm_mm_node mm;
+	unsigned long num_pages;
+	struct page **pages;
+	struct sg_table *sgt;
+	size_t size;
 };
 
 struct sg_table *rockchip_gem_prime_get_sg_table(struct drm_gem_object *obj);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index fb5f001f51c3..76c79ac57df0 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -531,6 +531,8 @@ static int vop_enable(struct drm_crtc *crtc)
 	}
 
 	memcpy(vop->regs, vop->regsbak, vop->len);
+	vop_cfg_done(vop);
+
 	/*
 	 * At here, vop clock & iommu is enable, R/W vop regs would be safe.
 	 */
@@ -582,6 +584,8 @@ static void vop_crtc_disable(struct drm_crtc *crtc)
 		spin_unlock(&vop->reg_lock);
 	}
 
+	vop_cfg_done(vop);
+
 	drm_crtc_vblank_off(crtc);
 
 	/*
@@ -932,9 +936,11 @@ static void vop_crtc_enable(struct drm_crtc *crtc)
 		vop_dsp_hold_valid_irq_disable(vop);
 	}
 
-	pin_pol = 0x8;
-	pin_pol |= (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) ? 0 : 1;
-	pin_pol |= (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) ? 0 : (1 << 1);
+	pin_pol = BIT(DCLK_INVERT);
+	pin_pol |= (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC) ?
+		   0 : BIT(HSYNC_POSITIVE);
+	pin_pol |= (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC) ?
+		   0 : BIT(VSYNC_POSITIVE);
 	VOP_CTRL_SET(vop, pin_pol, pin_pol);
 
 	switch (s->output_type) {
@@ -954,6 +960,11 @@ static void vop_crtc_enable(struct drm_crtc *crtc)
 		VOP_CTRL_SET(vop, mipi_pin_pol, pin_pol);
 		VOP_CTRL_SET(vop, mipi_en, 1);
 		break;
+	case DRM_MODE_CONNECTOR_DisplayPort:
+		pin_pol &= ~BIT(DCLK_INVERT);
+		VOP_CTRL_SET(vop, dp_pin_pol, pin_pol);
+		VOP_CTRL_SET(vop, dp_en, 1);
+		break;
 	default:
 		DRM_DEV_ERROR(vop->dev, "unsupported connector_type [%d]\n",
 			      s->output_type);
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
index 1dbc52615257..5a4faa85dbd2 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.h
@@ -45,6 +45,7 @@ struct vop_ctrl {
 	struct vop_reg edp_en;
 	struct vop_reg hdmi_en;
 	struct vop_reg mipi_en;
+	struct vop_reg dp_en;
 	struct vop_reg out_mode;
 	struct vop_reg dither_down;
 	struct vop_reg dither_up;
@@ -53,6 +54,7 @@ struct vop_ctrl {
 	struct vop_reg hdmi_pin_pol;
 	struct vop_reg edp_pin_pol;
 	struct vop_reg mipi_pin_pol;
+	struct vop_reg dp_pin_pol;
 
 	struct vop_reg htotal_pw;
 	struct vop_reg hact_st_end;
@@ -244,6 +246,13 @@ enum scale_down_mode {
 	SCALE_DOWN_AVG = 0x1
 };
 
+enum vop_pol {
+	HSYNC_POSITIVE = 0,
+	VSYNC_POSITIVE = 1,
+	DEN_NEGATIVE   = 2,
+	DCLK_INVERT    = 3
+};
+
 #define FRAC_16_16(mult, div)    (((mult) << 16) / (div))
 #define SCL_FT_DEFAULT_FIXPOINT_SHIFT	12
 #define SCL_MAX_VSKIPLINES		4
diff --git a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
index 35c51f3402f2..91fbc7b52147 100644
--- a/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
+++ b/drivers/gpu/drm/rockchip/rockchip_vop_reg.c
@@ -284,6 +284,7 @@ static const struct vop_data rk3288_vop = {
 static const struct vop_ctrl rk3399_ctrl_data = {
 	.standby = VOP_REG(RK3399_SYS_CTRL, 0x1, 22),
 	.gate_en = VOP_REG(RK3399_SYS_CTRL, 0x1, 23),
+	.dp_en = VOP_REG(RK3399_SYS_CTRL, 0x1, 11),
 	.rgb_en = VOP_REG(RK3399_SYS_CTRL, 0x1, 12),
 	.hdmi_en = VOP_REG(RK3399_SYS_CTRL, 0x1, 13),
 	.edp_en = VOP_REG(RK3399_SYS_CTRL, 0x1, 14),
@@ -293,6 +294,7 @@ static const struct vop_ctrl rk3399_ctrl_data = {
 	.data_blank = VOP_REG(RK3399_DSP_CTRL0, 0x1, 19),
 	.out_mode = VOP_REG(RK3399_DSP_CTRL0, 0xf, 0),
 	.rgb_pin_pol = VOP_REG(RK3399_DSP_CTRL1, 0xf, 16),
+	.dp_pin_pol = VOP_REG(RK3399_DSP_CTRL1, 0xf, 16),
 	.hdmi_pin_pol = VOP_REG(RK3399_DSP_CTRL1, 0xf, 20),
 	.edp_pin_pol = VOP_REG(RK3399_DSP_CTRL1, 0xf, 24),
 	.mipi_pin_pol = VOP_REG(RK3399_DSP_CTRL1, 0xf, 28),
diff --git a/drivers/gpu/drm/selftests/drm_mm_selftests.h b/drivers/gpu/drm/selftests/drm_mm_selftests.h
index 6a4575fdc1c0..37bbdac52896 100644
--- a/drivers/gpu/drm/selftests/drm_mm_selftests.h
+++ b/drivers/gpu/drm/selftests/drm_mm_selftests.h
@@ -17,6 +17,7 @@ selftest(align32, igt_align32)
 selftest(align64, igt_align64)
 selftest(evict, igt_evict)
 selftest(evict_range, igt_evict_range)
+selftest(bottomup, igt_bottomup)
 selftest(topdown, igt_topdown)
 selftest(color, igt_color)
 selftest(color_evict, igt_color_evict)
diff --git a/drivers/gpu/drm/selftests/test-drm_mm.c b/drivers/gpu/drm/selftests/test-drm_mm.c
index 6df53e6c1308..1e71bc182ca9 100644
--- a/drivers/gpu/drm/selftests/test-drm_mm.c
+++ b/drivers/gpu/drm/selftests/test-drm_mm.c
@@ -22,23 +22,24 @@ static unsigned int max_iterations = 8192;
 static unsigned int max_prime = 128;
 
 enum {
-	DEFAULT,
-	TOPDOWN,
 	BEST,
+	BOTTOMUP,
+	TOPDOWN,
+	EVICT,
 };
 
 static const struct insert_mode {
 	const char *name;
-	unsigned int search_flags;
-	unsigned int create_flags;
+	enum drm_mm_insert_mode mode;
 } insert_modes[] = {
-	[DEFAULT] = { "default", DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT },
-	[TOPDOWN] = { "top-down", DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP },
-	[BEST] = { "best", DRM_MM_SEARCH_BEST, DRM_MM_CREATE_DEFAULT },
+	[BEST] = { "best", DRM_MM_INSERT_BEST },
+	[BOTTOMUP] = { "bottom-up", DRM_MM_INSERT_LOW },
+	[TOPDOWN] = { "top-down", DRM_MM_INSERT_HIGH },
+	[EVICT] = { "evict", DRM_MM_INSERT_EVICT },
 	{}
 }, evict_modes[] = {
-	{ "default", DRM_MM_SEARCH_DEFAULT, DRM_MM_CREATE_DEFAULT },
-	{ "top-down", DRM_MM_SEARCH_BELOW, DRM_MM_CREATE_TOP },
+	{ "bottom-up", DRM_MM_INSERT_LOW },
+	{ "top-down", DRM_MM_INSERT_HIGH },
 	{}
 };
 
@@ -526,8 +527,7 @@ static bool expect_insert(struct drm_mm *mm, struct drm_mm_node *node,
 
 	err = drm_mm_insert_node_generic(mm, node,
 					 size, alignment, color,
-					 mode->search_flags,
-					 mode->create_flags);
+					 mode->mode);
 	if (err) {
 		pr_err("insert (size=%llu, alignment=%llu, color=%lu, mode=%s) failed with err=%d\n",
 		       size, alignment, color, mode->name, err);
@@ -547,7 +547,7 @@ static bool expect_insert_fail(struct drm_mm *mm, u64 size)
 	struct drm_mm_node tmp = {};
 	int err;
 
-	err = drm_mm_insert_node(mm, &tmp, size, 0, DRM_MM_SEARCH_DEFAULT);
+	err = drm_mm_insert_node(mm, &tmp, size);
 	if (likely(err == -ENOSPC))
 		return true;
 
@@ -753,11 +753,10 @@ static bool expect_insert_in_range(struct drm_mm *mm, struct drm_mm_node *node,
 {
 	int err;
 
-	err = drm_mm_insert_node_in_range_generic(mm, node,
-						  size, alignment, color,
-						  range_start, range_end,
-						  mode->search_flags,
-						  mode->create_flags);
+	err = drm_mm_insert_node_in_range(mm, node,
+					  size, alignment, color,
+					  range_start, range_end,
+					  mode->mode);
 	if (err) {
 		pr_err("insert (size=%llu, alignment=%llu, color=%lu, mode=%s) nto range [%llx, %llx] failed with err=%d\n",
 		       size, alignment, color, mode->name,
@@ -781,11 +780,10 @@ static bool expect_insert_in_range_fail(struct drm_mm *mm,
 	struct drm_mm_node tmp = {};
 	int err;
 
-	err = drm_mm_insert_node_in_range_generic(mm, &tmp,
-						  size, 0, 0,
-						  range_start, range_end,
-						  DRM_MM_SEARCH_DEFAULT,
-						  DRM_MM_CREATE_DEFAULT);
+	err = drm_mm_insert_node_in_range(mm, &tmp,
+					  size, 0, 0,
+					  range_start, range_end,
+					  0);
 	if (likely(err == -ENOSPC))
 		return true;
 
@@ -1324,7 +1322,7 @@ static int evict_something(struct drm_mm *mm,
 	drm_mm_scan_init_with_range(&scan, mm,
 				    size, alignment, 0,
 				    range_start, range_end,
-				    mode->create_flags);
+				    mode->mode);
 	if (!evict_nodes(&scan,
 			 nodes, order, count, false,
 			 &evict_list))
@@ -1332,8 +1330,7 @@ static int evict_something(struct drm_mm *mm,
 
 	memset(&tmp, 0, sizeof(tmp));
 	err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, 0,
-					 mode->search_flags,
-					 mode->create_flags);
+					 DRM_MM_INSERT_EVICT);
 	if (err) {
 		pr_err("Failed to insert into eviction hole: size=%d, align=%d\n",
 		       size, alignment);
@@ -1408,8 +1405,7 @@ static int igt_evict(void *ignored)
 	ret = -EINVAL;
 	drm_mm_init(&mm, 0, size);
 	for (n = 0; n < size; n++) {
-		err = drm_mm_insert_node(&mm, &nodes[n].node, 1, 0,
-					 DRM_MM_SEARCH_DEFAULT);
+		err = drm_mm_insert_node(&mm, &nodes[n].node, 1);
 		if (err) {
 			pr_err("insert failed, step %d\n", n);
 			ret = err;
@@ -1517,8 +1513,7 @@ static int igt_evict_range(void *ignored)
 	ret = -EINVAL;
 	drm_mm_init(&mm, 0, size);
 	for (n = 0; n < size; n++) {
-		err = drm_mm_insert_node(&mm, &nodes[n].node, 1, 0,
-					 DRM_MM_SEARCH_DEFAULT);
+		err = drm_mm_insert_node(&mm, &nodes[n].node, 1);
 		if (err) {
 			pr_err("insert failed, step %d\n", n);
 			ret = err;
@@ -1702,6 +1697,106 @@ err:
 	return ret;
 }
 
+static int igt_bottomup(void *ignored)
+{
+	const struct insert_mode *bottomup = &insert_modes[BOTTOMUP];
+	DRM_RND_STATE(prng, random_seed);
+	const unsigned int count = 8192;
+	unsigned int size;
+	unsigned long *bitmap;
+	struct drm_mm mm;
+	struct drm_mm_node *nodes, *node, *next;
+	unsigned int *order, n, m, o = 0;
+	int ret;
+
+	/* Like igt_topdown, but instead of searching for the last hole,
+	 * we search for the first.
+	 */
+
+	ret = -ENOMEM;
+	nodes = vzalloc(count * sizeof(*nodes));
+	if (!nodes)
+		goto err;
+
+	bitmap = kzalloc(count / BITS_PER_LONG * sizeof(unsigned long),
+			 GFP_TEMPORARY);
+	if (!bitmap)
+		goto err_nodes;
+
+	order = drm_random_order(count, &prng);
+	if (!order)
+		goto err_bitmap;
+
+	ret = -EINVAL;
+	for (size = 1; size <= 64; size <<= 1) {
+		drm_mm_init(&mm, 0, size*count);
+		for (n = 0; n < count; n++) {
+			if (!expect_insert(&mm, &nodes[n],
+					   size, 0, n,
+					   bottomup)) {
+				pr_err("bottomup insert failed, size %u step %d\n", size, n);
+				goto out;
+			}
+
+			if (!assert_one_hole(&mm, size*(n + 1), size*count))
+				goto out;
+		}
+
+		if (!assert_continuous(&mm, size))
+			goto out;
+
+		drm_random_reorder(order, count, &prng);
+		for_each_prime_number_from(n, 1, min(count, max_prime)) {
+			for (m = 0; m < n; m++) {
+				node = &nodes[order[(o + m) % count]];
+				drm_mm_remove_node(node);
+				__set_bit(node_index(node), bitmap);
+			}
+
+			for (m = 0; m < n; m++) {
+				unsigned int first;
+
+				node = &nodes[order[(o + m) % count]];
+				if (!expect_insert(&mm, node,
+						   size, 0, 0,
+						   bottomup)) {
+					pr_err("insert failed, step %d/%d\n", m, n);
+					goto out;
+				}
+
+				first = find_first_bit(bitmap, count);
+				if (node_index(node) != first) {
+					pr_err("node %d/%d not inserted into bottom hole, expected %d, found %d\n",
+					       m, n, first, node_index(node));
+					goto out;
+				}
+				__clear_bit(first, bitmap);
+			}
+
+			DRM_MM_BUG_ON(find_first_bit(bitmap, count) != count);
+
+			o += n;
+		}
+
+		drm_mm_for_each_node_safe(node, next, &mm)
+			drm_mm_remove_node(node);
+		DRM_MM_BUG_ON(!drm_mm_clean(&mm));
+	}
+
+	ret = 0;
+out:
+	drm_mm_for_each_node_safe(node, next, &mm)
+		drm_mm_remove_node(node);
+	drm_mm_takedown(&mm);
+	kfree(order);
+err_bitmap:
+	kfree(bitmap);
+err_nodes:
+	vfree(nodes);
+err:
+	return ret;
+}
+
 static void separate_adjacent_colors(const struct drm_mm_node *node,
 				     unsigned long color,
 				     u64 *start,
@@ -1904,7 +1999,7 @@ static int evict_color(struct drm_mm *mm,
 	drm_mm_scan_init_with_range(&scan, mm,
 				    size, alignment, color,
 				    range_start, range_end,
-				    mode->create_flags);
+				    mode->mode);
 	if (!evict_nodes(&scan,
 			 nodes, order, count, true,
 			 &evict_list))
@@ -1912,8 +2007,7 @@ static int evict_color(struct drm_mm *mm,
 
 	memset(&tmp, 0, sizeof(tmp));
 	err = drm_mm_insert_node_generic(mm, &tmp, size, alignment, color,
-					 mode->search_flags,
-					 mode->create_flags);
+					 DRM_MM_INSERT_EVICT);
 	if (err) {
 		pr_err("Failed to insert into eviction hole: size=%d, align=%d, color=%lu, err=%d\n",
 		       size, alignment, color, err);
diff --git a/drivers/gpu/drm/sis/sis_mm.c b/drivers/gpu/drm/sis/sis_mm.c
index 03defda77766..1622db24cd39 100644
--- a/drivers/gpu/drm/sis/sis_mm.c
+++ b/drivers/gpu/drm/sis/sis_mm.c
@@ -109,8 +109,7 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file,
 	if (pool == AGP_TYPE) {
 		retval = drm_mm_insert_node(&dev_priv->agp_mm,
 					    &item->mm_node,
-					    mem->size, 0,
-					    DRM_MM_SEARCH_DEFAULT);
+					    mem->size);
 		offset = item->mm_node.start;
 	} else {
 #if defined(CONFIG_FB_SIS) || defined(CONFIG_FB_SIS_MODULE)
@@ -122,8 +121,7 @@ static int sis_drm_alloc(struct drm_device *dev, struct drm_file *file,
 #else
 		retval = drm_mm_insert_node(&dev_priv->vram_mm,
 					    &item->mm_node,
-					    mem->size, 0,
-					    DRM_MM_SEARCH_DEFAULT);
+					    mem->size);
 		offset = item->mm_node.start;
 #endif
 	}
diff --git a/drivers/gpu/drm/sti/sti_crtc.c b/drivers/gpu/drm/sti/sti_crtc.c
index e992bed98dcb..d45a4335df5d 100644
--- a/drivers/gpu/drm/sti/sti_crtc.c
+++ b/drivers/gpu/drm/sti/sti_crtc.c
@@ -134,21 +134,6 @@ sti_crtc_mode_set_nofb(struct drm_crtc *crtc)
 	sti_crtc_mode_set(crtc, &crtc->state->adjusted_mode);
 }
 
-static void sti_crtc_atomic_begin(struct drm_crtc *crtc,
-				  struct drm_crtc_state *old_crtc_state)
-{
-	struct sti_mixer *mixer = to_sti_mixer(crtc);
-
-	if (crtc->state->event) {
-		crtc->state->event->pipe = drm_crtc_index(crtc);
-
-		WARN_ON(drm_crtc_vblank_get(crtc) != 0);
-
-		mixer->pending_event = crtc->state->event;
-		crtc->state->event = NULL;
-	}
-}
-
 static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 				  struct drm_crtc_state *old_crtc_state)
 {
@@ -156,6 +141,8 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct sti_mixer *mixer = to_sti_mixer(crtc);
 	struct sti_compositor *compo = dev_get_drvdata(mixer->dev);
 	struct drm_plane *p;
+	struct drm_pending_vblank_event *event;
+	unsigned long flags;
 
 	DRM_DEBUG_DRIVER("\n");
 
@@ -220,13 +207,24 @@ static void sti_crtc_atomic_flush(struct drm_crtc *crtc,
 			break;
 		}
 	}
+
+	event = crtc->state->event;
+	if (event) {
+		crtc->state->event = NULL;
+
+		spin_lock_irqsave(&crtc->dev->event_lock, flags);
+		if (drm_crtc_vblank_get(crtc) == 0)
+			drm_crtc_arm_vblank_event(crtc, event);
+		else
+			drm_crtc_send_vblank_event(crtc, event);
+		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+	}
 }
 
 static const struct drm_crtc_helper_funcs sti_crtc_helper_funcs = {
 	.enable = sti_crtc_enable,
 	.disable = sti_crtc_disabling,
 	.mode_set_nofb = sti_crtc_mode_set_nofb,
-	.atomic_begin = sti_crtc_atomic_begin,
 	.atomic_flush = sti_crtc_atomic_flush,
 };
 
@@ -250,7 +248,6 @@ int sti_crtc_vblank_cb(struct notifier_block *nb,
 	struct sti_compositor *compo;
 	struct drm_crtc *crtc = data;
 	struct sti_mixer *mixer;
-	unsigned long flags;
 	struct sti_private *priv;
 	unsigned int pipe;
 
@@ -267,14 +264,6 @@ int sti_crtc_vblank_cb(struct notifier_block *nb,
 
 	drm_crtc_handle_vblank(crtc);
 
-	spin_lock_irqsave(&crtc->dev->event_lock, flags);
-	if (mixer->pending_event) {
-		drm_crtc_send_vblank_event(crtc, mixer->pending_event);
-		drm_crtc_vblank_put(crtc);
-		mixer->pending_event = NULL;
-	}
-	spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
-
 	if (mixer->status == STI_MIXER_DISABLING) {
 		struct drm_plane *p;
 
@@ -317,19 +306,12 @@ void sti_crtc_disable_vblank(struct drm_device *drm_dev, unsigned int pipe)
 	struct sti_private *priv = drm_dev->dev_private;
 	struct sti_compositor *compo = priv->compo;
 	struct notifier_block *vtg_vblank_nb = &compo->vtg_vblank_nb[pipe];
-	struct drm_crtc *crtc = &compo->mixer[pipe]->drm_crtc;
 	struct sti_vtg *vtg = compo->vtg[pipe];
 
 	DRM_DEBUG_DRIVER("\n");
 
 	if (sti_vtg_unregister_client(vtg, vtg_vblank_nb))
 		DRM_DEBUG_DRIVER("Warning: cannot unregister VTG notifier\n");
-
-	/* free the resources of the pending requests */
-	if (compo->mixer[pipe]->pending_event) {
-		drm_crtc_vblank_put(crtc);
-		compo->mixer[pipe]->pending_event = NULL;
-	}
 }
 
 static int sti_crtc_late_register(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/sti/sti_drv.c b/drivers/gpu/drm/sti/sti_drv.c
index acc056644cd0..20fc0fbfa849 100644
--- a/drivers/gpu/drm/sti/sti_drv.c
+++ b/drivers/gpu/drm/sti/sti_drv.c
@@ -58,7 +58,9 @@ static int sti_drm_fps_set(void *data, u64 val)
 	list_for_each_entry(p, &drm_dev->mode_config.plane_list, head) {
 		struct sti_plane *plane = to_sti_plane(p);
 
+		memset(&plane->fps_info, 0, sizeof(plane->fps_info));
 		plane->fps_info.output = (val >> i) & 1;
+
 		i++;
 	}
 
@@ -115,52 +117,6 @@ err:
 	return ret;
 }
 
-static void sti_atomic_schedule(struct sti_private *private,
-				struct drm_atomic_state *state)
-{
-	private->commit.state = state;
-	schedule_work(&private->commit.work);
-}
-
-static void sti_atomic_complete(struct sti_private *private,
-				struct drm_atomic_state *state)
-{
-	struct drm_device *drm = private->drm_dev;
-
-	/*
-	 * Everything below can be run asynchronously without the need to grab
-	 * any modeset locks at all under one condition: It must be guaranteed
-	 * that the asynchronous work has either been cancelled (if the driver
-	 * supports it, which at least requires that the framebuffers get
-	 * cleaned up with drm_atomic_helper_cleanup_planes()) or completed
-	 * before the new state gets committed on the software side with
-	 * drm_atomic_helper_swap_state().
-	 *
-	 * This scheme allows new atomic state updates to be prepared and
-	 * checked in parallel to the asynchronous completion of the previous
-	 * update. Which is important since compositors need to figure out the
-	 * composition of the next frame right after having submitted the
-	 * current layout.
-	 */
-
-	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_planes(drm, state, 0);
-	drm_atomic_helper_commit_modeset_enables(drm, state);
-
-	drm_atomic_helper_wait_for_vblanks(drm, state);
-
-	drm_atomic_helper_cleanup_planes(drm, state);
-	drm_atomic_state_put(state);
-}
-
-static void sti_atomic_work(struct work_struct *work)
-{
-	struct sti_private *private = container_of(work,
-			struct sti_private, commit.work);
-
-	sti_atomic_complete(private, private->commit.state);
-}
-
 static int sti_atomic_check(struct drm_device *dev,
 			    struct drm_atomic_state *state)
 {
@@ -181,38 +137,6 @@ static int sti_atomic_check(struct drm_device *dev,
 	return ret;
 }
 
-static int sti_atomic_commit(struct drm_device *drm,
-			     struct drm_atomic_state *state, bool nonblock)
-{
-	struct sti_private *private = drm->dev_private;
-	int err;
-
-	err = drm_atomic_helper_prepare_planes(drm, state);
-	if (err)
-		return err;
-
-	/* serialize outstanding nonblocking commits */
-	mutex_lock(&private->commit.lock);
-	flush_work(&private->commit.work);
-
-	/*
-	 * This is the point of no return - everything below never fails except
-	 * when the hw goes bonghits. Which means we can commit the new state on
-	 * the software side now.
-	 */
-
-	drm_atomic_helper_swap_state(state, true);
-
-	drm_atomic_state_get(state);
-	if (nonblock)
-		sti_atomic_schedule(private, state);
-	else
-		sti_atomic_complete(private, state);
-
-	mutex_unlock(&private->commit.lock);
-	return 0;
-}
-
 static void sti_output_poll_changed(struct drm_device *ddev)
 {
 	struct sti_private *private = ddev->dev_private;
@@ -224,7 +148,7 @@ static const struct drm_mode_config_funcs sti_mode_config_funcs = {
 	.fb_create = drm_fb_cma_create,
 	.output_poll_changed = sti_output_poll_changed,
 	.atomic_check = sti_atomic_check,
-	.atomic_commit = sti_atomic_commit,
+	.atomic_commit = drm_atomic_helper_commit,
 };
 
 static void sti_mode_config_init(struct drm_device *dev)
@@ -304,9 +228,6 @@ static int sti_init(struct drm_device *ddev)
 	dev_set_drvdata(ddev->dev, ddev);
 	private->drm_dev = ddev;
 
-	mutex_init(&private->commit.lock);
-	INIT_WORK(&private->commit.work, sti_atomic_work);
-
 	drm_mode_config_init(ddev);
 
 	sti_mode_config_init(ddev);
@@ -327,6 +248,7 @@ static void sti_cleanup(struct drm_device *ddev)
 
 	drm_kms_helper_poll_fini(ddev);
 	drm_vblank_cleanup(ddev);
+	component_unbind_all(ddev->dev, ddev);
 	kfree(private);
 	ddev->dev_private = NULL;
 }
@@ -360,7 +282,7 @@ static int sti_bind(struct device *dev)
 
 	private = ddev->dev_private;
 	if (ddev->mode_config.num_connector) {
-		fbdev = drm_fbdev_cma_init(ddev, 32, ddev->mode_config.num_crtc,
+		fbdev = drm_fbdev_cma_init(ddev, 32,
 					   ddev->mode_config.num_connector);
 		if (IS_ERR(fbdev)) {
 			DRM_DEBUG_DRIVER("Warning: fails to create fbdev\n");
diff --git a/drivers/gpu/drm/sti/sti_drv.h b/drivers/gpu/drm/sti/sti_drv.h
index 4c75845cc9ab..6502ed2d3351 100644
--- a/drivers/gpu/drm/sti/sti_drv.h
+++ b/drivers/gpu/drm/sti/sti_drv.h
@@ -25,12 +25,6 @@ struct sti_private {
 	struct drm_property *plane_zorder_property;
 	struct drm_device *drm_dev;
 	struct drm_fbdev_cma *fbdev;
-
-	struct {
-		struct drm_atomic_state *state;
-		struct work_struct work;
-		struct mutex lock;
-	} commit;
 };
 
 extern struct platform_driver sti_tvout_driver;
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index 877d053d86f4..86279f5022c2 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -610,7 +610,6 @@ static int sti_gdp_atomic_check(struct drm_plane *drm_plane,
 	struct sti_plane *plane = to_sti_plane(drm_plane);
 	struct sti_gdp *gdp = to_sti_gdp(plane);
 	struct drm_crtc *crtc = state->crtc;
-	struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
 	struct drm_framebuffer *fb =  state->fb;
 	struct drm_crtc_state *crtc_state;
 	struct sti_mixer *mixer;
@@ -648,45 +647,30 @@ static int sti_gdp_atomic_check(struct drm_plane *drm_plane,
 		return -EINVAL;
 	}
 
-	if (!gdp->vtg) {
-		/* Register gdp callback */
-		gdp->vtg = compo->vtg[mixer->id];
-		if (sti_vtg_register_client(gdp->vtg,
-					    &gdp->vtg_field_nb, crtc)) {
-			DRM_ERROR("Cannot register VTG notifier\n");
+	/* Set gdp clock */
+	if (mode->clock && gdp->clk_pix) {
+		struct clk *clkp;
+		int rate = mode->clock * 1000;
+		int res;
+
+		/*
+		 * According to the mixer used, the gdp pixel clock
+		 * should have a different parent clock.
+		 */
+		if (mixer->id == STI_MIXER_MAIN)
+			clkp = gdp->clk_main_parent;
+		else
+			clkp = gdp->clk_aux_parent;
+
+		if (clkp)
+			clk_set_parent(gdp->clk_pix, clkp);
+
+		res = clk_set_rate(gdp->clk_pix, rate);
+		if (res < 0) {
+			DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
+				  rate);
 			return -EINVAL;
 		}
-
-		/* Set and enable gdp clock */
-		if (gdp->clk_pix) {
-			struct clk *clkp;
-			int rate = mode->clock * 1000;
-			int res;
-
-			/*
-			 * According to the mixer used, the gdp pixel clock
-			 * should have a different parent clock.
-			 */
-			if (mixer->id == STI_MIXER_MAIN)
-				clkp = gdp->clk_main_parent;
-			else
-				clkp = gdp->clk_aux_parent;
-
-			if (clkp)
-				clk_set_parent(gdp->clk_pix, clkp);
-
-			res = clk_set_rate(gdp->clk_pix, rate);
-			if (res < 0) {
-				DRM_ERROR("Cannot set rate (%dHz) for gdp\n",
-					  rate);
-				return -EINVAL;
-			}
-
-			if (clk_prepare_enable(gdp->clk_pix)) {
-				DRM_ERROR("Failed to prepare/enable gdp\n");
-				return -EINVAL;
-			}
-		}
 	}
 
 	DRM_DEBUG_KMS("CRTC:%d (%s) drm plane:%d (%s)\n",
@@ -724,6 +708,31 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	if (!crtc || !fb)
 		return;
 
+	if ((oldstate->fb == state->fb) &&
+	    (oldstate->crtc_x == state->crtc_x) &&
+	    (oldstate->crtc_y == state->crtc_y) &&
+	    (oldstate->crtc_w == state->crtc_w) &&
+	    (oldstate->crtc_h == state->crtc_h) &&
+	    (oldstate->src_x == state->src_x) &&
+	    (oldstate->src_y == state->src_y) &&
+	    (oldstate->src_w == state->src_w) &&
+	    (oldstate->src_h == state->src_h)) {
+		/* No change since last update, do not post cmd */
+		DRM_DEBUG_DRIVER("No change, not posting cmd\n");
+		plane->status = STI_PLANE_UPDATED;
+		return;
+	}
+
+	if (!gdp->vtg) {
+		struct sti_compositor *compo = dev_get_drvdata(gdp->dev);
+		struct sti_mixer *mixer = to_sti_mixer(crtc);
+
+		/* Register gdp callback */
+		gdp->vtg = compo->vtg[mixer->id];
+		sti_vtg_register_client(gdp->vtg, &gdp->vtg_field_nb, crtc);
+		clk_prepare_enable(gdp->clk_pix);
+	}
+
 	mode = &crtc->mode;
 	dst_x = state->crtc_x;
 	dst_y = state->crtc_y;
diff --git a/drivers/gpu/drm/sti/sti_hdmi.c b/drivers/gpu/drm/sti/sti_hdmi.c
index c9151849d604..ce2dcba679d5 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.c
+++ b/drivers/gpu/drm/sti/sti_hdmi.c
@@ -95,7 +95,6 @@
 #define HDMI_CFG_HDCP_EN                BIT(2)
 #define HDMI_CFG_ESS_NOT_OESS           BIT(3)
 #define HDMI_CFG_H_SYNC_POL_NEG         BIT(4)
-#define HDMI_CFG_SINK_TERM_DET_EN       BIT(5)
 #define HDMI_CFG_V_SYNC_POL_NEG         BIT(6)
 #define HDMI_CFG_422_EN                 BIT(8)
 #define HDMI_CFG_FIFO_OVERRUN_CLR       BIT(12)
@@ -159,7 +158,6 @@ struct sti_hdmi_connector {
 	struct drm_encoder *encoder;
 	struct sti_hdmi *hdmi;
 	struct drm_property *colorspace_property;
-	struct drm_property *hdmi_mode_property;
 };
 
 #define to_sti_hdmi_connector(x) \
@@ -266,12 +264,9 @@ static void hdmi_config(struct sti_hdmi *hdmi)
 
 	/* Select encryption type and the framing mode */
 	conf |= HDMI_CFG_ESS_NOT_OESS;
-	if (hdmi->hdmi_mode == HDMI_MODE_HDMI)
+	if (hdmi->hdmi_monitor)
 		conf |= HDMI_CFG_HDMI_NOT_DVI;
 
-	/* Enable sink term detection */
-	conf |= HDMI_CFG_SINK_TERM_DET_EN;
-
 	/* Set Hsync polarity */
 	if (hdmi->mode.flags & DRM_MODE_FLAG_NHSYNC) {
 		DRM_DEBUG_DRIVER("H Sync Negative\n");
@@ -607,9 +602,6 @@ static void hdmi_dbg_cfg(struct seq_file *s, int val)
 	tmp = val & HDMI_CFG_ESS_NOT_OESS;
 	DBGFS_PRINT_STR("HDCP mode:", tmp ? "ESS enable" : "OESS enable");
 	seq_puts(s, "\t\t\t\t\t");
-	tmp = val & HDMI_CFG_SINK_TERM_DET_EN;
-	DBGFS_PRINT_STR("Sink term detection:", tmp ? "enable" : "disable");
-	seq_puts(s, "\t\t\t\t\t");
 	tmp = val & HDMI_CFG_H_SYNC_POL_NEG;
 	DBGFS_PRINT_STR("Hsync polarity:", tmp ? "inverted" : "normal");
 	seq_puts(s, "\t\t\t\t\t");
@@ -977,6 +969,11 @@ static int sti_hdmi_connector_get_modes(struct drm_connector *connector)
 	if (!edid)
 		goto fail;
 
+	hdmi->hdmi_monitor = drm_detect_hdmi_monitor(edid);
+	DRM_DEBUG_KMS("%s : %dx%d cm\n",
+		      (hdmi->hdmi_monitor ? "hdmi monitor" : "dvi monitor"),
+		      edid->width_cm, edid->height_cm);
+
 	count = drm_add_edid_modes(connector, edid);
 	drm_mode_connector_update_edid_property(connector, edid);
 	drm_edid_to_eld(connector, edid);
@@ -1060,19 +1057,6 @@ static void sti_hdmi_connector_init_property(struct drm_device *drm_dev,
 	}
 	hdmi_connector->colorspace_property = prop;
 	drm_object_attach_property(&connector->base, prop, hdmi->colorspace);
-
-	/* hdmi_mode property */
-	hdmi->hdmi_mode = DEFAULT_HDMI_MODE;
-	prop = drm_property_create_enum(drm_dev, 0, "hdmi_mode",
-					hdmi_mode_names,
-					ARRAY_SIZE(hdmi_mode_names));
-	if (!prop) {
-		DRM_ERROR("fails to create colorspace property\n");
-		return;
-	}
-	hdmi_connector->hdmi_mode_property = prop;
-	drm_object_attach_property(&connector->base, prop, hdmi->hdmi_mode);
-
 }
 
 static int
@@ -1090,11 +1074,6 @@ sti_hdmi_connector_set_property(struct drm_connector *connector,
 		return 0;
 	}
 
-	if (property == hdmi_connector->hdmi_mode_property) {
-		hdmi->hdmi_mode = val;
-		return 0;
-	}
-
 	DRM_ERROR("failed to set hdmi connector property\n");
 	return -EINVAL;
 }
@@ -1114,11 +1093,6 @@ sti_hdmi_connector_get_property(struct drm_connector *connector,
 		return 0;
 	}
 
-	if (property == hdmi_connector->hdmi_mode_property) {
-		*val = hdmi->hdmi_mode;
-		return 0;
-	}
-
 	DRM_ERROR("failed to get hdmi connector property\n");
 	return -EINVAL;
 }
diff --git a/drivers/gpu/drm/sti/sti_hdmi.h b/drivers/gpu/drm/sti/sti_hdmi.h
index 119bc3582ac7..407012350f1a 100644
--- a/drivers/gpu/drm/sti/sti_hdmi.h
+++ b/drivers/gpu/drm/sti/sti_hdmi.h
@@ -30,19 +30,6 @@ struct hdmi_audio_params {
 	struct hdmi_audio_infoframe cea;
 };
 
-/* values for the framing mode property */
-enum sti_hdmi_modes {
-	HDMI_MODE_HDMI,
-	HDMI_MODE_DVI,
-};
-
-static const struct drm_prop_enum_list hdmi_mode_names[] = {
-	{ HDMI_MODE_HDMI, "hdmi" },
-	{ HDMI_MODE_DVI, "dvi" },
-};
-
-#define DEFAULT_HDMI_MODE HDMI_MODE_HDMI
-
 static const struct drm_prop_enum_list colorspace_mode_names[] = {
 	{ HDMI_COLORSPACE_RGB, "rgb" },
 	{ HDMI_COLORSPACE_YUV422, "yuv422" },
@@ -73,7 +60,7 @@ static const struct drm_prop_enum_list colorspace_mode_names[] = {
  * @reset: reset control of the hdmi phy
  * @ddc_adapt: i2c ddc adapter
  * @colorspace: current colorspace selected
- * @hdmi_mode: select framing for HDMI or DVI
+ * @hdmi_monitor: true if HDMI monitor detected else DVI monitor assumed
  * @audio_pdev: ASoC hdmi-codec platform device
  * @audio: hdmi audio parameters.
  * @drm_connector: hdmi connector
@@ -98,7 +85,7 @@ struct sti_hdmi {
 	struct reset_control *reset;
 	struct i2c_adapter *ddc_adapt;
 	enum hdmi_colorspace colorspace;
-	enum sti_hdmi_modes hdmi_mode;
+	bool hdmi_monitor;
 	struct platform_device *audio_pdev;
 	struct hdmi_audio_params audio;
 	struct drm_connector *drm_connector;
diff --git a/drivers/gpu/drm/sti/sti_hqvdp.c b/drivers/gpu/drm/sti/sti_hqvdp.c
index 4376fd8a8e52..66f843148ef7 100644
--- a/drivers/gpu/drm/sti/sti_hqvdp.c
+++ b/drivers/gpu/drm/sti/sti_hqvdp.c
@@ -1037,9 +1037,9 @@ static int sti_hqvdp_atomic_check(struct drm_plane *drm_plane,
 	src_w = state->src_w >> 16;
 	src_h = state->src_h >> 16;
 
-	if (!sti_hqvdp_check_hw_scaling(hqvdp, mode,
-					src_w, src_h,
-					dst_w, dst_h)) {
+	if (mode->clock && !sti_hqvdp_check_hw_scaling(hqvdp, mode,
+						       src_w, src_h,
+						       dst_w, dst_h)) {
 		DRM_ERROR("Scaling beyond HW capabilities\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/sti/sti_mixer.h b/drivers/gpu/drm/sti/sti_mixer.h
index 830a3c42d886..e64a00e61049 100644
--- a/drivers/gpu/drm/sti/sti_mixer.h
+++ b/drivers/gpu/drm/sti/sti_mixer.h
@@ -28,7 +28,6 @@ enum sti_mixer_status {
  * @regs: mixer registers
  * @id: id of the mixer
  * @drm_crtc: crtc object link to the mixer
- * @pending_event: set if a flip event is pending on crtc
  * @status: to know the status of the mixer
  */
 struct sti_mixer {
@@ -36,7 +35,6 @@ struct sti_mixer {
 	void __iomem *regs;
 	int id;
 	struct drm_crtc drm_crtc;
-	struct drm_pending_vblank_event *pending_event;
 	enum sti_mixer_status status;
 };
 
diff --git a/drivers/gpu/drm/sti/sti_vtg.c b/drivers/gpu/drm/sti/sti_vtg.c
index c3d9c8ae14af..2dcba1d3a122 100644
--- a/drivers/gpu/drm/sti/sti_vtg.c
+++ b/drivers/gpu/drm/sti/sti_vtg.c
@@ -17,7 +17,6 @@
 #include "sti_vtg.h"
 
 #define VTG_MODE_MASTER         0
-#define VTG_MODE_SLAVE_BY_EXT0  1
 
 /* registers offset */
 #define VTG_MODE            0x0000
@@ -132,7 +131,6 @@ struct sti_vtg_sync_params {
  * @irq_status: store the IRQ status value
  * @notifier_list: notifier callback
  * @crtc: the CRTC for vblank event
- * @slave: slave vtg
  * @link: List node to link the structure in lookup list
  */
 struct sti_vtg {
@@ -144,7 +142,6 @@ struct sti_vtg {
 	u32 irq_status;
 	struct raw_notifier_head notifier_list;
 	struct drm_crtc *crtc;
-	struct sti_vtg *slave;
 	struct list_head link;
 };
 
@@ -166,10 +163,6 @@ struct sti_vtg *of_vtg_find(struct device_node *np)
 
 static void vtg_reset(struct sti_vtg *vtg)
 {
-	/* reset slave and then master */
-	if (vtg->slave)
-		vtg_reset(vtg->slave);
-
 	writel(1, vtg->regs + VTG_DRST_AUTOC);
 }
 
@@ -259,10 +252,6 @@ static void vtg_set_mode(struct sti_vtg *vtg,
 {
 	unsigned int i;
 
-	if (vtg->slave)
-		vtg_set_mode(vtg->slave, VTG_MODE_SLAVE_BY_EXT0,
-			     vtg->sync_params, mode);
-
 	/* Set the number of clock cycles per line */
 	writel(mode->htotal, vtg->regs + VTG_CLKLN);
 
@@ -318,11 +307,7 @@ void sti_vtg_set_config(struct sti_vtg *vtg,
 
 	vtg_reset(vtg);
 
-	/* enable irq for the vtg vblank synchro */
-	if (vtg->slave)
-		vtg_enable_irq(vtg->slave);
-	else
-		vtg_enable_irq(vtg);
+	vtg_enable_irq(vtg);
 }
 
 /**
@@ -365,18 +350,12 @@ u32 sti_vtg_get_pixel_number(struct drm_display_mode mode, int x)
 int sti_vtg_register_client(struct sti_vtg *vtg, struct notifier_block *nb,
 			    struct drm_crtc *crtc)
 {
-	if (vtg->slave)
-		return sti_vtg_register_client(vtg->slave, nb, crtc);
-
 	vtg->crtc = crtc;
 	return raw_notifier_chain_register(&vtg->notifier_list, nb);
 }
 
 int sti_vtg_unregister_client(struct sti_vtg *vtg, struct notifier_block *nb)
 {
-	if (vtg->slave)
-		return sti_vtg_unregister_client(vtg->slave, nb);
-
 	return raw_notifier_chain_unregister(&vtg->notifier_list, nb);
 }
 
@@ -410,7 +389,6 @@ static irqreturn_t vtg_irq(int irq, void *arg)
 static int vtg_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np;
 	struct sti_vtg *vtg;
 	struct resource *res;
 	int ret;
@@ -434,29 +412,20 @@ static int vtg_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	np = of_parse_phandle(pdev->dev.of_node, "st,slave", 0);
-	if (np) {
-		vtg->slave = of_vtg_find(np);
-		of_node_put(np);
+	vtg->irq = platform_get_irq(pdev, 0);
+	if (vtg->irq < 0) {
+		DRM_ERROR("Failed to get VTG interrupt\n");
+		return vtg->irq;
+	}
+
+	RAW_INIT_NOTIFIER_HEAD(&vtg->notifier_list);
 
-		if (!vtg->slave)
-			return -EPROBE_DEFER;
-	} else {
-		vtg->irq = platform_get_irq(pdev, 0);
-		if (vtg->irq < 0) {
-			DRM_ERROR("Failed to get VTG interrupt\n");
-			return vtg->irq;
-		}
-
-		RAW_INIT_NOTIFIER_HEAD(&vtg->notifier_list);
-
-		ret = devm_request_threaded_irq(dev, vtg->irq, vtg_irq,
-				vtg_irq_thread, IRQF_ONESHOT,
-				dev_name(dev), vtg);
-		if (ret < 0) {
-			DRM_ERROR("Failed to register VTG interrupt\n");
-			return ret;
-		}
+	ret = devm_request_threaded_irq(dev, vtg->irq, vtg_irq,
+					vtg_irq_thread, IRQF_ONESHOT,
+					dev_name(dev), vtg);
+	if (ret < 0) {
+		DRM_ERROR("Failed to register VTG interrupt\n");
+		return ret;
 	}
 
 	vtg_register(vtg);
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
index 8b6ce619ad81..2c3beff8b53e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
@@ -40,9 +40,7 @@ struct drm_fbdev_cma *sun4i_framebuffer_init(struct drm_device *drm)
 
 	drm->mode_config.funcs = &sun4i_de_mode_config_funcs;
 
-	return drm_fbdev_cma_init(drm, 32,
-				  drm->mode_config.num_crtc,
-				  drm->mode_config.num_connector);
+	return drm_fbdev_cma_init(drm, 32, drm->mode_config.num_connector);
 }
 
 void sun4i_framebuffer_free(struct drm_device *drm)
diff --git a/drivers/gpu/drm/tegra/fb.c b/drivers/gpu/drm/tegra/fb.c
index f896e2ff7d47..f142f6a4db25 100644
--- a/drivers/gpu/drm/tegra/fb.c
+++ b/drivers/gpu/drm/tegra/fb.c
@@ -309,7 +309,7 @@ static int tegra_fbdev_init(struct tegra_fbdev *fbdev,
 	struct drm_device *drm = fbdev->base.dev;
 	int err;
 
-	err = drm_fb_helper_init(drm, &fbdev->base, num_crtc, max_connectors);
+	err = drm_fb_helper_init(drm, &fbdev->base, max_connectors);
 	if (err < 0) {
 		dev_err(drm->dev, "failed to initialize DRM FB helper: %d\n",
 			err);
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 7d853e6b5ff0..17e62ecb5d4d 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -128,8 +128,8 @@ static int tegra_bo_iommu_map(struct tegra_drm *tegra, struct tegra_bo *bo)
 	if (!bo->mm)
 		return -ENOMEM;
 
-	err = drm_mm_insert_node_generic(&tegra->mm, bo->mm, bo->gem.size,
-					 PAGE_SIZE, 0, 0, 0);
+	err = drm_mm_insert_node_generic(&tegra->mm,
+					 bo->mm, bo->gem.size, PAGE_SIZE, 0, 0);
 	if (err < 0) {
 		dev_err(tegra->drm->dev, "out of I/O virtual memory: %zd\n",
 			err);
@@ -441,8 +441,9 @@ int tegra_bo_dumb_map_offset(struct drm_file *file, struct drm_device *drm,
 	return 0;
 }
 
-static int tegra_bo_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int tegra_bo_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_gem_object *gem = vma->vm_private_data;
 	struct tegra_bo *bo = to_tegra_bo(gem);
 	struct page *page;
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_drv.c b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
index 919294a735fe..372d86fbb093 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_drv.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_drv.c
@@ -403,8 +403,7 @@ static int tilcdc_init(struct drm_driver *ddrv, struct device *dev)
 	drm_mode_config_reset(ddev);
 
 	priv->fbdev = drm_fbdev_cma_init(ddev, bpp,
-			ddev->mode_config.num_crtc,
-			ddev->mode_config.num_connector);
+					 ddev->mode_config.num_connector);
 	if (IS_ERR(priv->fbdev)) {
 		ret = PTR_ERR(priv->fbdev);
 		goto init_failed;
diff --git a/drivers/gpu/drm/tinydrm/Kconfig b/drivers/gpu/drm/tinydrm/Kconfig
new file mode 100644
index 000000000000..3504c53846da
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/Kconfig
@@ -0,0 +1,21 @@
+menuconfig DRM_TINYDRM
+	tristate "Support for simple displays"
+	depends on DRM
+	select DRM_KMS_HELPER
+	select DRM_KMS_CMA_HELPER
+	select BACKLIGHT_LCD_SUPPORT
+	select BACKLIGHT_CLASS_DEVICE
+	help
+	  Choose this option if you have a tinydrm supported display.
+	  If M is selected the module will be called tinydrm.
+
+config TINYDRM_MIPI_DBI
+	tristate
+
+config TINYDRM_MI0283QT
+	tristate "DRM support for MI0283QT"
+	depends on DRM_TINYDRM && SPI
+	select TINYDRM_MIPI_DBI
+	help
+	  DRM driver for the Multi-Inno MI0283QT display panel
+	  If M is selected the module will be called mi0283qt.
diff --git a/drivers/gpu/drm/tinydrm/Makefile b/drivers/gpu/drm/tinydrm/Makefile
new file mode 100644
index 000000000000..7a3604cf4fc2
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/Makefile
@@ -0,0 +1,7 @@
+obj-$(CONFIG_DRM_TINYDRM)		+= core/
+
+# Controllers
+obj-$(CONFIG_TINYDRM_MIPI_DBI)		+= mipi-dbi.o
+
+# Displays
+obj-$(CONFIG_TINYDRM_MI0283QT)		+= mi0283qt.o
diff --git a/drivers/gpu/drm/tinydrm/core/Makefile b/drivers/gpu/drm/tinydrm/core/Makefile
new file mode 100644
index 000000000000..fb221e6f8885
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/Makefile
@@ -0,0 +1,3 @@
+tinydrm-y := tinydrm-core.o tinydrm-pipe.o tinydrm-helpers.o
+
+obj-$(CONFIG_DRM_TINYDRM) += tinydrm.o
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-core.c b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
new file mode 100644
index 000000000000..6a257dd08ee0
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-core.c
@@ -0,0 +1,376 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/tinydrm/tinydrm.h>
+#include <linux/device.h>
+#include <linux/dma-buf.h>
+
+/**
+ * DOC: overview
+ *
+ * This library provides driver helpers for very simple display hardware.
+ *
+ * It is based on &drm_simple_display_pipe coupled with a &drm_connector which
+ * has only one fixed &drm_display_mode. The framebuffers are backed by the
+ * cma helper and have support for framebuffer flushing (dirty).
+ * fbdev support is also included.
+ *
+ */
+
+/**
+ * DOC: core
+ *
+ * The driver allocates &tinydrm_device, initializes it using
+ * devm_tinydrm_init(), sets up the pipeline using tinydrm_display_pipe_init()
+ * and registers the DRM device using devm_tinydrm_register().
+ */
+
+/**
+ * tinydrm_lastclose - DRM lastclose helper
+ * @drm: DRM device
+ *
+ * This function ensures that fbdev is restored when drm_lastclose() is called
+ * on the last drm_release(). Drivers can use this as their
+ * &drm_driver->lastclose callback.
+ */
+void tinydrm_lastclose(struct drm_device *drm)
+{
+	struct tinydrm_device *tdev = drm->dev_private;
+
+	DRM_DEBUG_KMS("\n");
+	drm_fbdev_cma_restore_mode(tdev->fbdev_cma);
+}
+EXPORT_SYMBOL(tinydrm_lastclose);
+
+/**
+ * tinydrm_gem_cma_prime_import_sg_table - Produce a CMA GEM object from
+ *     another driver's scatter/gather table of pinned pages
+ * @drm: DRM device to import into
+ * @attach: DMA-BUF attachment
+ * @sgt: Scatter/gather table of pinned pages
+ *
+ * This function imports a scatter/gather table exported via DMA-BUF by
+ * another driver using drm_gem_cma_prime_import_sg_table(). It sets the
+ * kernel virtual address on the CMA object. Drivers should use this as their
+ * &drm_driver->gem_prime_import_sg_table callback if they need the virtual
+ * address. tinydrm_gem_cma_free_object() should be used in combination with
+ * this function.
+ *
+ * Returns:
+ * A pointer to a newly created GEM object or an ERR_PTR-encoded negative
+ * error code on failure.
+ */
+struct drm_gem_object *
+tinydrm_gem_cma_prime_import_sg_table(struct drm_device *drm,
+				      struct dma_buf_attachment *attach,
+				      struct sg_table *sgt)
+{
+	struct drm_gem_cma_object *cma_obj;
+	struct drm_gem_object *obj;
+	void *vaddr;
+
+	vaddr = dma_buf_vmap(attach->dmabuf);
+	if (!vaddr) {
+		DRM_ERROR("Failed to vmap PRIME buffer\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	obj = drm_gem_cma_prime_import_sg_table(drm, attach, sgt);
+	if (IS_ERR(obj)) {
+		dma_buf_vunmap(attach->dmabuf, vaddr);
+		return obj;
+	}
+
+	cma_obj = to_drm_gem_cma_obj(obj);
+	cma_obj->vaddr = vaddr;
+
+	return obj;
+}
+EXPORT_SYMBOL(tinydrm_gem_cma_prime_import_sg_table);
+
+/**
+ * tinydrm_gem_cma_free_object - Free resources associated with a CMA GEM
+ *                               object
+ * @gem_obj: GEM object to free
+ *
+ * This function frees the backing memory of the CMA GEM object, cleans up the
+ * GEM object state and frees the memory used to store the object itself using
+ * drm_gem_cma_free_object(). It also handles PRIME buffers which has the kernel
+ * virtual address set by tinydrm_gem_cma_prime_import_sg_table(). Drivers
+ * can use this as their &drm_driver->gem_free_object callback.
+ */
+void tinydrm_gem_cma_free_object(struct drm_gem_object *gem_obj)
+{
+	if (gem_obj->import_attach) {
+		struct drm_gem_cma_object *cma_obj;
+
+		cma_obj = to_drm_gem_cma_obj(gem_obj);
+		dma_buf_vunmap(gem_obj->import_attach->dmabuf, cma_obj->vaddr);
+		cma_obj->vaddr = NULL;
+	}
+
+	drm_gem_cma_free_object(gem_obj);
+}
+EXPORT_SYMBOL_GPL(tinydrm_gem_cma_free_object);
+
+const struct file_operations tinydrm_fops = {
+	.owner		= THIS_MODULE,
+	.open		= drm_open,
+	.release	= drm_release,
+	.unlocked_ioctl	= drm_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= drm_compat_ioctl,
+#endif
+	.poll		= drm_poll,
+	.read		= drm_read,
+	.llseek		= no_llseek,
+	.mmap		= drm_gem_cma_mmap,
+};
+EXPORT_SYMBOL(tinydrm_fops);
+
+static struct drm_framebuffer *
+tinydrm_fb_create(struct drm_device *drm, struct drm_file *file_priv,
+		  const struct drm_mode_fb_cmd2 *mode_cmd)
+{
+	struct tinydrm_device *tdev = drm->dev_private;
+
+	return drm_fb_cma_create_with_funcs(drm, file_priv, mode_cmd,
+					    tdev->fb_funcs);
+}
+
+static const struct drm_mode_config_funcs tinydrm_mode_config_funcs = {
+	.fb_create = tinydrm_fb_create,
+	.atomic_check = drm_atomic_helper_check,
+	.atomic_commit = drm_atomic_helper_commit,
+};
+
+static int tinydrm_init(struct device *parent, struct tinydrm_device *tdev,
+			const struct drm_framebuffer_funcs *fb_funcs,
+			struct drm_driver *driver)
+{
+	struct drm_device *drm;
+
+	mutex_init(&tdev->dirty_lock);
+	tdev->fb_funcs = fb_funcs;
+
+	/*
+	 * We don't embed drm_device, because that prevent us from using
+	 * devm_kzalloc() to allocate tinydrm_device in the driver since
+	 * drm_dev_unref() frees the structure. The devm_ functions provide
+	 * for easy error handling.
+	 */
+	drm = drm_dev_alloc(driver, parent);
+	if (IS_ERR(drm))
+		return PTR_ERR(drm);
+
+	tdev->drm = drm;
+	drm->dev_private = tdev;
+	drm_mode_config_init(drm);
+	drm->mode_config.funcs = &tinydrm_mode_config_funcs;
+
+	return 0;
+}
+
+static void tinydrm_fini(struct tinydrm_device *tdev)
+{
+	drm_mode_config_cleanup(tdev->drm);
+	mutex_destroy(&tdev->dirty_lock);
+	tdev->drm->dev_private = NULL;
+	drm_dev_unref(tdev->drm);
+}
+
+static void devm_tinydrm_release(void *data)
+{
+	tinydrm_fini(data);
+}
+
+/**
+ * devm_tinydrm_init - Initialize tinydrm device
+ * @parent: Parent device object
+ * @tdev: tinydrm device
+ * @fb_funcs: Framebuffer functions
+ * @driver: DRM driver
+ *
+ * This function initializes @tdev, the underlying DRM device and it's
+ * mode_config. Resources will be automatically freed on driver detach (devres)
+ * using drm_mode_config_cleanup() and drm_dev_unref().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int devm_tinydrm_init(struct device *parent, struct tinydrm_device *tdev,
+		      const struct drm_framebuffer_funcs *fb_funcs,
+		      struct drm_driver *driver)
+{
+	int ret;
+
+	ret = tinydrm_init(parent, tdev, fb_funcs, driver);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action(parent, devm_tinydrm_release, tdev);
+	if (ret)
+		tinydrm_fini(tdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(devm_tinydrm_init);
+
+static int tinydrm_register(struct tinydrm_device *tdev)
+{
+	struct drm_device *drm = tdev->drm;
+	int bpp = drm->mode_config.preferred_depth;
+	struct drm_fbdev_cma *fbdev;
+	int ret;
+
+	ret = drm_dev_register(tdev->drm, 0);
+	if (ret)
+		return ret;
+
+	fbdev = drm_fbdev_cma_init_with_funcs(drm, bpp ? bpp : 32,
+					      drm->mode_config.num_connector,
+					      tdev->fb_funcs);
+	if (IS_ERR(fbdev))
+		DRM_ERROR("Failed to initialize fbdev: %ld\n", PTR_ERR(fbdev));
+	else
+		tdev->fbdev_cma = fbdev;
+
+	return 0;
+}
+
+static void tinydrm_unregister(struct tinydrm_device *tdev)
+{
+	struct drm_fbdev_cma *fbdev_cma = tdev->fbdev_cma;
+
+	drm_crtc_force_disable_all(tdev->drm);
+	/* don't restore fbdev in lastclose, keep pipeline disabled */
+	tdev->fbdev_cma = NULL;
+	drm_dev_unregister(tdev->drm);
+	if (fbdev_cma)
+		drm_fbdev_cma_fini(fbdev_cma);
+}
+
+static void devm_tinydrm_register_release(void *data)
+{
+	tinydrm_unregister(data);
+}
+
+/**
+ * devm_tinydrm_register - Register tinydrm device
+ * @tdev: tinydrm device
+ *
+ * This function registers the underlying DRM device and fbdev.
+ * These resources will be automatically unregistered on driver detach (devres)
+ * and the display pipeline will be disabled.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int devm_tinydrm_register(struct tinydrm_device *tdev)
+{
+	struct device *dev = tdev->drm->dev;
+	int ret;
+
+	ret = tinydrm_register(tdev);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action(dev, devm_tinydrm_register_release, tdev);
+	if (ret)
+		tinydrm_unregister(tdev);
+
+	return ret;
+}
+EXPORT_SYMBOL(devm_tinydrm_register);
+
+/**
+ * tinydrm_shutdown - Shutdown tinydrm
+ * @tdev: tinydrm device
+ *
+ * This function makes sure that the display pipeline is disabled.
+ * Used by drivers in their shutdown callback to turn off the display
+ * on machine shutdown and reboot.
+ */
+void tinydrm_shutdown(struct tinydrm_device *tdev)
+{
+	drm_crtc_force_disable_all(tdev->drm);
+}
+EXPORT_SYMBOL(tinydrm_shutdown);
+
+/**
+ * tinydrm_suspend - Suspend tinydrm
+ * @tdev: tinydrm device
+ *
+ * Used in driver PM operations to suspend tinydrm.
+ * Suspends fbdev and DRM.
+ * Resume with tinydrm_resume().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_suspend(struct tinydrm_device *tdev)
+{
+	struct drm_atomic_state *state;
+
+	if (tdev->suspend_state) {
+		DRM_ERROR("Failed to suspend: state already set\n");
+		return -EINVAL;
+	}
+
+	drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 1);
+	state = drm_atomic_helper_suspend(tdev->drm);
+	if (IS_ERR(state)) {
+		drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 0);
+		return PTR_ERR(state);
+	}
+
+	tdev->suspend_state = state;
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_suspend);
+
+/**
+ * tinydrm_resume - Resume tinydrm
+ * @tdev: tinydrm device
+ *
+ * Used in driver PM operations to resume tinydrm.
+ * Suspend with tinydrm_suspend().
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_resume(struct tinydrm_device *tdev)
+{
+	struct drm_atomic_state *state = tdev->suspend_state;
+	int ret;
+
+	if (!state) {
+		DRM_ERROR("Failed to resume: state is not set\n");
+		return -EINVAL;
+	}
+
+	tdev->suspend_state = NULL;
+
+	ret = drm_atomic_helper_resume(tdev->drm, state);
+	if (ret) {
+		DRM_ERROR("Error resuming state: %d\n", ret);
+		return ret;
+	}
+
+	drm_fbdev_cma_set_suspend_unlocked(tdev->fbdev_cma, 0);
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_resume);
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
new file mode 100644
index 000000000000..3ccda6c1e159
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-helpers.c
@@ -0,0 +1,460 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/tinydrm.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/backlight.h>
+#include <linux/pm.h>
+#include <linux/spi/spi.h>
+#include <linux/swab.h>
+
+static unsigned int spi_max;
+module_param(spi_max, uint, 0400);
+MODULE_PARM_DESC(spi_max, "Set a lower SPI max transfer size");
+
+/**
+ * tinydrm_merge_clips - Merge clip rectangles
+ * @dst: Destination clip rectangle
+ * @src: Source clip rectangle(s)
+ * @num_clips: Number of @src clip rectangles
+ * @flags: Dirty fb ioctl flags
+ * @max_width: Maximum width of @dst
+ * @max_height: Maximum height of @dst
+ *
+ * This function merges @src clip rectangle(s) into @dst. If @src is NULL,
+ * @max_width and @min_width is used to set a full @dst clip rectangle.
+ *
+ * Returns:
+ * true if it's a full clip, false otherwise
+ */
+bool tinydrm_merge_clips(struct drm_clip_rect *dst,
+			 struct drm_clip_rect *src, unsigned int num_clips,
+			 unsigned int flags, u32 max_width, u32 max_height)
+{
+	unsigned int i;
+
+	if (!src || !num_clips) {
+		dst->x1 = 0;
+		dst->x2 = max_width;
+		dst->y1 = 0;
+		dst->y2 = max_height;
+		return true;
+	}
+
+	dst->x1 = ~0;
+	dst->y1 = ~0;
+	dst->x2 = 0;
+	dst->y2 = 0;
+
+	for (i = 0; i < num_clips; i++) {
+		if (flags & DRM_MODE_FB_DIRTY_ANNOTATE_COPY)
+			i++;
+		dst->x1 = min(dst->x1, src[i].x1);
+		dst->x2 = max(dst->x2, src[i].x2);
+		dst->y1 = min(dst->y1, src[i].y1);
+		dst->y2 = max(dst->y2, src[i].y2);
+	}
+
+	if (dst->x2 > max_width || dst->y2 > max_height ||
+	    dst->x1 >= dst->x2 || dst->y1 >= dst->y2) {
+		DRM_DEBUG_KMS("Illegal clip: x1=%u, x2=%u, y1=%u, y2=%u\n",
+			      dst->x1, dst->x2, dst->y1, dst->y2);
+		dst->x1 = 0;
+		dst->y1 = 0;
+		dst->x2 = max_width;
+		dst->y2 = max_height;
+	}
+
+	return (dst->x2 - dst->x1) == max_width &&
+	       (dst->y2 - dst->y1) == max_height;
+}
+EXPORT_SYMBOL(tinydrm_merge_clips);
+
+/**
+ * tinydrm_memcpy - Copy clip buffer
+ * @dst: Destination buffer
+ * @vaddr: Source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ */
+void tinydrm_memcpy(void *dst, void *vaddr, struct drm_framebuffer *fb,
+		    struct drm_clip_rect *clip)
+{
+	unsigned int cpp = drm_format_plane_cpp(fb->format->format, 0);
+	unsigned int pitch = fb->pitches[0];
+	void *src = vaddr + (clip->y1 * pitch) + (clip->x1 * cpp);
+	size_t len = (clip->x2 - clip->x1) * cpp;
+	unsigned int y;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		memcpy(dst, src, len);
+		src += pitch;
+		dst += len;
+	}
+}
+EXPORT_SYMBOL(tinydrm_memcpy);
+
+/**
+ * tinydrm_swab16 - Swap bytes into clip buffer
+ * @dst: RGB565 destination buffer
+ * @vaddr: RGB565 source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ */
+void tinydrm_swab16(u16 *dst, void *vaddr, struct drm_framebuffer *fb,
+		    struct drm_clip_rect *clip)
+{
+	size_t len = (clip->x2 - clip->x1) * sizeof(u16);
+	unsigned int x, y;
+	u16 *src, *buf;
+
+	/*
+	 * The cma memory is write-combined so reads are uncached.
+	 * Speed up by fetching one line at a time.
+	 */
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		src = vaddr + (y * fb->pitches[0]);
+		src += clip->x1;
+		memcpy(buf, src, len);
+		src = buf;
+		for (x = clip->x1; x < clip->x2; x++)
+			*dst++ = swab16(*src++);
+	}
+
+	kfree(buf);
+}
+EXPORT_SYMBOL(tinydrm_swab16);
+
+/**
+ * tinydrm_xrgb8888_to_rgb565 - Convert XRGB8888 to RGB565 clip buffer
+ * @dst: RGB565 destination buffer
+ * @vaddr: XRGB8888 source buffer
+ * @fb: DRM framebuffer
+ * @clip: Clip rectangle area to copy
+ * @swap: Swap bytes
+ *
+ * Drivers can use this function for RGB565 devices that don't natively
+ * support XRGB8888.
+ */
+void tinydrm_xrgb8888_to_rgb565(u16 *dst, void *vaddr,
+				struct drm_framebuffer *fb,
+				struct drm_clip_rect *clip, bool swap)
+{
+	size_t len = (clip->x2 - clip->x1) * sizeof(u32);
+	unsigned int x, y;
+	u32 *src, *buf;
+	u16 val16;
+
+	buf = kmalloc(len, GFP_KERNEL);
+	if (!buf)
+		return;
+
+	for (y = clip->y1; y < clip->y2; y++) {
+		src = vaddr + (y * fb->pitches[0]);
+		src += clip->x1;
+		memcpy(buf, src, len);
+		src = buf;
+		for (x = clip->x1; x < clip->x2; x++) {
+			val16 = ((*src & 0x00F80000) >> 8) |
+				((*src & 0x0000FC00) >> 5) |
+				((*src & 0x000000F8) >> 3);
+			src++;
+			if (swap)
+				*dst++ = swab16(val16);
+			else
+				*dst++ = val16;
+		}
+	}
+
+	kfree(buf);
+}
+EXPORT_SYMBOL(tinydrm_xrgb8888_to_rgb565);
+
+/**
+ * tinydrm_of_find_backlight - Find backlight device in device-tree
+ * @dev: Device
+ *
+ * This function looks for a DT node pointed to by a property named 'backlight'
+ * and uses of_find_backlight_by_node() to get the backlight device.
+ * Additionally if the brightness property is zero, it is set to
+ * max_brightness.
+ *
+ * Returns:
+ * NULL if there's no backlight property.
+ * Error pointer -EPROBE_DEFER if the DT node is found, but no backlight device
+ * is found.
+ * If the backlight device is found, a pointer to the structure is returned.
+ */
+struct backlight_device *tinydrm_of_find_backlight(struct device *dev)
+{
+	struct backlight_device *backlight;
+	struct device_node *np;
+
+	np = of_parse_phandle(dev->of_node, "backlight", 0);
+	if (!np)
+		return NULL;
+
+	backlight = of_find_backlight_by_node(np);
+	of_node_put(np);
+
+	if (!backlight)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	if (!backlight->props.brightness) {
+		backlight->props.brightness = backlight->props.max_brightness;
+		DRM_DEBUG_KMS("Backlight brightness set to %d\n",
+			      backlight->props.brightness);
+	}
+
+	return backlight;
+}
+EXPORT_SYMBOL(tinydrm_of_find_backlight);
+
+/**
+ * tinydrm_enable_backlight - Enable backlight helper
+ * @backlight: Backlight device
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_enable_backlight(struct backlight_device *backlight)
+{
+	unsigned int old_state;
+	int ret;
+
+	if (!backlight)
+		return 0;
+
+	old_state = backlight->props.state;
+	backlight->props.state &= ~BL_CORE_FBBLANK;
+	DRM_DEBUG_KMS("Backlight state: 0x%x -> 0x%x\n", old_state,
+		      backlight->props.state);
+
+	ret = backlight_update_status(backlight);
+	if (ret)
+		DRM_ERROR("Failed to enable backlight %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_enable_backlight);
+
+/**
+ * tinydrm_disable_backlight - Disable backlight helper
+ * @backlight: Backlight device
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_disable_backlight(struct backlight_device *backlight)
+{
+	unsigned int old_state;
+	int ret;
+
+	if (!backlight)
+		return 0;
+
+	old_state = backlight->props.state;
+	backlight->props.state |= BL_CORE_FBBLANK;
+	DRM_DEBUG_KMS("Backlight state: 0x%x -> 0x%x\n", old_state,
+		      backlight->props.state);
+	ret = backlight_update_status(backlight);
+	if (ret)
+		DRM_ERROR("Failed to disable backlight %d\n", ret);
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_disable_backlight);
+
+#if IS_ENABLED(CONFIG_SPI)
+
+/**
+ * tinydrm_spi_max_transfer_size - Determine max SPI transfer size
+ * @spi: SPI device
+ * @max_len: Maximum buffer size needed (optional)
+ *
+ * This function returns the maximum size to use for SPI transfers. It checks
+ * the SPI master, the optional @max_len and the module parameter spi_max and
+ * returns the smallest.
+ *
+ * Returns:
+ * Maximum size for SPI transfers
+ */
+size_t tinydrm_spi_max_transfer_size(struct spi_device *spi, size_t max_len)
+{
+	size_t ret;
+
+	ret = min(spi_max_transfer_size(spi), spi->master->max_dma_len);
+	if (max_len)
+		ret = min(ret, max_len);
+	if (spi_max)
+		ret = min_t(size_t, ret, spi_max);
+	ret &= ~0x3;
+	if (ret < 4)
+		ret = 4;
+
+	return ret;
+}
+EXPORT_SYMBOL(tinydrm_spi_max_transfer_size);
+
+/**
+ * tinydrm_spi_bpw_supported - Check if bits per word is supported
+ * @spi: SPI device
+ * @bpw: Bits per word
+ *
+ * This function checks to see if the SPI master driver supports @bpw.
+ *
+ * Returns:
+ * True if @bpw is supported, false otherwise.
+ */
+bool tinydrm_spi_bpw_supported(struct spi_device *spi, u8 bpw)
+{
+	u32 bpw_mask = spi->master->bits_per_word_mask;
+
+	if (bpw == 8)
+		return true;
+
+	if (!bpw_mask) {
+		dev_warn_once(&spi->dev,
+			      "bits_per_word_mask not set, assume 8-bit only\n");
+		return false;
+	}
+
+	if (bpw_mask & SPI_BPW_MASK(bpw))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL(tinydrm_spi_bpw_supported);
+
+static void
+tinydrm_dbg_spi_print(struct spi_device *spi, struct spi_transfer *tr,
+		      const void *buf, int idx, bool tx)
+{
+	u32 speed_hz = tr->speed_hz ? tr->speed_hz : spi->max_speed_hz;
+	char linebuf[3 * 32];
+
+	hex_dump_to_buffer(buf, tr->len, 16,
+			   DIV_ROUND_UP(tr->bits_per_word, 8),
+			   linebuf, sizeof(linebuf), false);
+
+	printk(KERN_DEBUG
+	       "    tr(%i): speed=%u%s, bpw=%i, len=%u, %s_buf=[%s%s]\n", idx,
+	       speed_hz > 1000000 ? speed_hz / 1000000 : speed_hz / 1000,
+	       speed_hz > 1000000 ? "MHz" : "kHz", tr->bits_per_word, tr->len,
+	       tx ? "tx" : "rx", linebuf, tr->len > 16 ? " ..." : "");
+}
+
+/* called through tinydrm_dbg_spi_message() */
+void _tinydrm_dbg_spi_message(struct spi_device *spi, struct spi_message *m)
+{
+	struct spi_transfer *tmp;
+	struct list_head *pos;
+	int i = 0;
+
+	list_for_each(pos, &m->transfers) {
+		tmp = list_entry(pos, struct spi_transfer, transfer_list);
+
+		if (tmp->tx_buf)
+			tinydrm_dbg_spi_print(spi, tmp, tmp->tx_buf, i, true);
+		if (tmp->rx_buf)
+			tinydrm_dbg_spi_print(spi, tmp, tmp->rx_buf, i, false);
+		i++;
+	}
+}
+EXPORT_SYMBOL(_tinydrm_dbg_spi_message);
+
+/**
+ * tinydrm_spi_transfer - SPI transfer helper
+ * @spi: SPI device
+ * @speed_hz: Override speed (optional)
+ * @header: Optional header transfer
+ * @bpw: Bits per word
+ * @buf: Buffer to transfer
+ * @len: Buffer length
+ *
+ * This SPI transfer helper breaks up the transfer of @buf into chunks which
+ * the SPI master driver can handle. If the machine is Little Endian and the
+ * SPI master driver doesn't support 16 bits per word, it swaps the bytes and
+ * does a 8-bit transfer.
+ * If @header is set, it is prepended to each SPI message.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int tinydrm_spi_transfer(struct spi_device *spi, u32 speed_hz,
+			 struct spi_transfer *header, u8 bpw, const void *buf,
+			 size_t len)
+{
+	struct spi_transfer tr = {
+		.bits_per_word = bpw,
+		.speed_hz = speed_hz,
+	};
+	struct spi_message m;
+	u16 *swap_buf = NULL;
+	size_t max_chunk;
+	size_t chunk;
+	int ret = 0;
+
+	if (WARN_ON_ONCE(bpw != 8 && bpw != 16))
+		return -EINVAL;
+
+	max_chunk = tinydrm_spi_max_transfer_size(spi, 0);
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] bpw=%u, max_chunk=%zu, transfers:\n",
+			 __func__, bpw, max_chunk);
+
+	if (bpw == 16 && !tinydrm_spi_bpw_supported(spi, 16)) {
+		tr.bits_per_word = 8;
+		if (tinydrm_machine_little_endian()) {
+			swap_buf = kmalloc(min(len, max_chunk), GFP_KERNEL);
+			if (!swap_buf)
+				return -ENOMEM;
+		}
+	}
+
+	spi_message_init(&m);
+	if (header)
+		spi_message_add_tail(header, &m);
+	spi_message_add_tail(&tr, &m);
+
+	while (len) {
+		chunk = min(len, max_chunk);
+
+		tr.tx_buf = buf;
+		tr.len = chunk;
+
+		if (swap_buf) {
+			const u16 *buf16 = buf;
+			unsigned int i;
+
+			for (i = 0; i < chunk / 2; i++)
+				swap_buf[i] = swab16(buf16[i]);
+
+			tr.tx_buf = swap_buf;
+		}
+
+		buf += chunk;
+		len -= chunk;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_spi_transfer);
+
+#endif /* CONFIG_SPI */
diff --git a/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
new file mode 100644
index 000000000000..ec43fb7ad9e4
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/core/tinydrm-pipe.c
@@ -0,0 +1,234 @@
+/*
+ * Copyright (C) 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_modes.h>
+#include <drm/tinydrm/tinydrm.h>
+
+struct tinydrm_connector {
+	struct drm_connector base;
+	const struct drm_display_mode *mode;
+};
+
+static inline struct tinydrm_connector *
+to_tinydrm_connector(struct drm_connector *connector)
+{
+	return container_of(connector, struct tinydrm_connector, base);
+}
+
+static int tinydrm_connector_get_modes(struct drm_connector *connector)
+{
+	struct tinydrm_connector *tconn = to_tinydrm_connector(connector);
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(connector->dev, tconn->mode);
+	if (!mode) {
+		DRM_ERROR("Failed to duplicate mode\n");
+		return 0;
+	}
+
+	if (mode->name[0] == '\0')
+		drm_mode_set_name(mode);
+
+	mode->type |= DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	if (mode->width_mm) {
+		connector->display_info.width_mm = mode->width_mm;
+		connector->display_info.height_mm = mode->height_mm;
+	}
+
+	return 1;
+}
+
+static const struct drm_connector_helper_funcs tinydrm_connector_hfuncs = {
+	.get_modes = tinydrm_connector_get_modes,
+	.best_encoder = drm_atomic_helper_best_encoder,
+};
+
+static enum drm_connector_status
+tinydrm_connector_detect(struct drm_connector *connector, bool force)
+{
+	if (drm_device_is_unplugged(connector->dev))
+		return connector_status_disconnected;
+
+	return connector->status;
+}
+
+static void tinydrm_connector_destroy(struct drm_connector *connector)
+{
+	struct tinydrm_connector *tconn = to_tinydrm_connector(connector);
+
+	drm_connector_cleanup(connector);
+	kfree(tconn);
+}
+
+static const struct drm_connector_funcs tinydrm_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.reset = drm_atomic_helper_connector_reset,
+	.detect = tinydrm_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = tinydrm_connector_destroy,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+struct drm_connector *
+tinydrm_connector_create(struct drm_device *drm,
+			 const struct drm_display_mode *mode,
+			 int connector_type)
+{
+	struct tinydrm_connector *tconn;
+	struct drm_connector *connector;
+	int ret;
+
+	tconn = kzalloc(sizeof(*tconn), GFP_KERNEL);
+	if (!tconn)
+		return ERR_PTR(-ENOMEM);
+
+	tconn->mode = mode;
+	connector = &tconn->base;
+
+	drm_connector_helper_add(connector, &tinydrm_connector_hfuncs);
+	ret = drm_connector_init(drm, connector, &tinydrm_connector_funcs,
+				 connector_type);
+	if (ret) {
+		kfree(tconn);
+		return ERR_PTR(ret);
+	}
+
+	connector->status = connector_status_connected;
+
+	return connector;
+}
+
+/**
+ * tinydrm_display_pipe_update - Display pipe update helper
+ * @pipe: Simple display pipe
+ * @old_state: Old plane state
+ *
+ * This function does a full framebuffer flush if the plane framebuffer
+ * has changed. It also handles vblank events. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->update callback.
+ */
+void tinydrm_display_pipe_update(struct drm_simple_display_pipe *pipe,
+				 struct drm_plane_state *old_state)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct drm_framebuffer *fb = pipe->plane.state->fb;
+	struct drm_crtc *crtc = &tdev->pipe.crtc;
+
+	if (fb && (fb != old_state->fb)) {
+		pipe->plane.fb = fb;
+		if (fb->funcs->dirty)
+			fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+	}
+
+	if (crtc->state->event) {
+		spin_lock_irq(&crtc->dev->event_lock);
+		drm_crtc_send_vblank_event(crtc, crtc->state->event);
+		spin_unlock_irq(&crtc->dev->event_lock);
+		crtc->state->event = NULL;
+	}
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_update);
+
+/**
+ * tinydrm_display_pipe_prepare_fb - Display pipe prepare_fb helper
+ * @pipe: Simple display pipe
+ * @plane_state: Plane state
+ *
+ * This function uses drm_fb_cma_prepare_fb() to check if the plane FB has an
+ * dma-buf attached, extracts the exclusive fence and attaches it to plane
+ * state for the atomic helper to wait on. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->prepare_fb callback.
+ */
+int tinydrm_display_pipe_prepare_fb(struct drm_simple_display_pipe *pipe,
+				    struct drm_plane_state *plane_state)
+{
+	return drm_fb_cma_prepare_fb(&pipe->plane, plane_state);
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_prepare_fb);
+
+static int tinydrm_rotate_mode(struct drm_display_mode *mode,
+			       unsigned int rotation)
+{
+	if (rotation == 0 || rotation == 180) {
+		return 0;
+	} else if (rotation == 90 || rotation == 270) {
+		swap(mode->hdisplay, mode->vdisplay);
+		swap(mode->hsync_start, mode->vsync_start);
+		swap(mode->hsync_end, mode->vsync_end);
+		swap(mode->htotal, mode->vtotal);
+		swap(mode->width_mm, mode->height_mm);
+		return 0;
+	} else {
+		return -EINVAL;
+	}
+}
+
+/**
+ * tinydrm_display_pipe_init - Initialize display pipe
+ * @tdev: tinydrm device
+ * @funcs: Display pipe functions
+ * @connector_type: Connector type
+ * @formats: Array of supported formats (DRM_FORMAT\_\*)
+ * @format_count: Number of elements in @formats
+ * @mode: Supported mode
+ * @rotation: Initial @mode rotation in degrees Counter Clock Wise
+ *
+ * This function sets up a &drm_simple_display_pipe with a &drm_connector that
+ * has one fixed &drm_display_mode which is rotated according to @rotation.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int
+tinydrm_display_pipe_init(struct tinydrm_device *tdev,
+			  const struct drm_simple_display_pipe_funcs *funcs,
+			  int connector_type,
+			  const uint32_t *formats,
+			  unsigned int format_count,
+			  const struct drm_display_mode *mode,
+			  unsigned int rotation)
+{
+	struct drm_device *drm = tdev->drm;
+	struct drm_display_mode *mode_copy;
+	struct drm_connector *connector;
+	int ret;
+
+	mode_copy = devm_kmalloc(drm->dev, sizeof(*mode_copy), GFP_KERNEL);
+	if (!mode_copy)
+		return -ENOMEM;
+
+	*mode_copy = *mode;
+	ret = tinydrm_rotate_mode(mode_copy, rotation);
+	if (ret) {
+		DRM_ERROR("Illegal rotation value %u\n", rotation);
+		return -EINVAL;
+	}
+
+	drm->mode_config.min_width = mode_copy->hdisplay;
+	drm->mode_config.max_width = mode_copy->hdisplay;
+	drm->mode_config.min_height = mode_copy->vdisplay;
+	drm->mode_config.max_height = mode_copy->vdisplay;
+
+	connector = tinydrm_connector_create(drm, mode_copy, connector_type);
+	if (IS_ERR(connector))
+		return PTR_ERR(connector);
+
+	ret = drm_simple_display_pipe_init(drm, &tdev->pipe, funcs, formats,
+					   format_count, connector);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+EXPORT_SYMBOL(tinydrm_display_pipe_init);
diff --git a/drivers/gpu/drm/tinydrm/mi0283qt.c b/drivers/gpu/drm/tinydrm/mi0283qt.c
new file mode 100644
index 000000000000..b29fe86158f7
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/mi0283qt.c
@@ -0,0 +1,279 @@
+/*
+ * DRM driver for Multi-Inno MI0283QT panels
+ *
+ * Copyright 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/ili9341.h>
+#include <drm/tinydrm/mipi-dbi.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/property.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <video/mipi_display.h>
+
+static int mi0283qt_init(struct mipi_dbi *mipi)
+{
+	struct tinydrm_device *tdev = &mipi->tinydrm;
+	struct device *dev = tdev->drm->dev;
+	u8 addr_mode;
+	int ret;
+
+	DRM_DEBUG_KMS("\n");
+
+	ret = regulator_enable(mipi->regulator);
+	if (ret) {
+		dev_err(dev, "Failed to enable regulator %d\n", ret);
+		return ret;
+	}
+
+	/* Avoid flicker by skipping setup if the bootloader has done it */
+	if (mipi_dbi_display_is_on(mipi))
+		return 0;
+
+	mipi_dbi_hw_reset(mipi);
+	ret = mipi_dbi_command(mipi, MIPI_DCS_SOFT_RESET);
+	if (ret) {
+		dev_err(dev, "Error sending command %d\n", ret);
+		regulator_disable(mipi->regulator);
+		return ret;
+	}
+
+	msleep(20);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_OFF);
+
+	mipi_dbi_command(mipi, ILI9341_PWCTRLB, 0x00, 0x83, 0x30);
+	mipi_dbi_command(mipi, ILI9341_PWRSEQ, 0x64, 0x03, 0x12, 0x81);
+	mipi_dbi_command(mipi, ILI9341_DTCTRLA, 0x85, 0x01, 0x79);
+	mipi_dbi_command(mipi, ILI9341_PWCTRLA, 0x39, 0x2c, 0x00, 0x34, 0x02);
+	mipi_dbi_command(mipi, ILI9341_PUMPCTRL, 0x20);
+	mipi_dbi_command(mipi, ILI9341_DTCTRLB, 0x00, 0x00);
+
+	/* Power Control */
+	mipi_dbi_command(mipi, ILI9341_PWCTRL1, 0x26);
+	mipi_dbi_command(mipi, ILI9341_PWCTRL2, 0x11);
+	/* VCOM */
+	mipi_dbi_command(mipi, ILI9341_VMCTRL1, 0x35, 0x3e);
+	mipi_dbi_command(mipi, ILI9341_VMCTRL2, 0xbe);
+
+	/* Memory Access Control */
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PIXEL_FORMAT, 0x55);
+
+	switch (mipi->rotation) {
+	default:
+		addr_mode = ILI9341_MADCTL_MV | ILI9341_MADCTL_MY |
+			    ILI9341_MADCTL_MX;
+		break;
+	case 90:
+		addr_mode = ILI9341_MADCTL_MY;
+		break;
+	case 180:
+		addr_mode = ILI9341_MADCTL_MV;
+		break;
+	case 270:
+		addr_mode = ILI9341_MADCTL_MX;
+		break;
+	}
+	addr_mode |= ILI9341_MADCTL_BGR;
+	mipi_dbi_command(mipi, MIPI_DCS_SET_ADDRESS_MODE, addr_mode);
+
+	/* Frame Rate */
+	mipi_dbi_command(mipi, ILI9341_FRMCTR1, 0x00, 0x1b);
+
+	/* Gamma */
+	mipi_dbi_command(mipi, ILI9341_EN3GAM, 0x08);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_GAMMA_CURVE, 0x01);
+	mipi_dbi_command(mipi, ILI9341_PGAMCTRL,
+		       0x1f, 0x1a, 0x18, 0x0a, 0x0f, 0x06, 0x45, 0x87,
+		       0x32, 0x0a, 0x07, 0x02, 0x07, 0x05, 0x00);
+	mipi_dbi_command(mipi, ILI9341_NGAMCTRL,
+		       0x00, 0x25, 0x27, 0x05, 0x10, 0x09, 0x3a, 0x78,
+		       0x4d, 0x05, 0x18, 0x0d, 0x38, 0x3a, 0x1f);
+
+	/* DDRAM */
+	mipi_dbi_command(mipi, ILI9341_ETMOD, 0x07);
+
+	/* Display */
+	mipi_dbi_command(mipi, ILI9341_DISCTRL, 0x0a, 0x82, 0x27, 0x00);
+	mipi_dbi_command(mipi, MIPI_DCS_EXIT_SLEEP_MODE);
+	msleep(100);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_DISPLAY_ON);
+	msleep(100);
+
+	return 0;
+}
+
+static void mi0283qt_fini(void *data)
+{
+	struct mipi_dbi *mipi = data;
+
+	DRM_DEBUG_KMS("\n");
+	regulator_disable(mipi->regulator);
+}
+
+static const struct drm_simple_display_pipe_funcs mi0283qt_pipe_funcs = {
+	.enable = mipi_dbi_pipe_enable,
+	.disable = mipi_dbi_pipe_disable,
+	.update = tinydrm_display_pipe_update,
+	.prepare_fb = tinydrm_display_pipe_prepare_fb,
+};
+
+static const struct drm_display_mode mi0283qt_mode = {
+	TINYDRM_MODE(320, 240, 58, 43),
+};
+
+static struct drm_driver mi0283qt_driver = {
+	.driver_features	= DRIVER_GEM | DRIVER_MODESET | DRIVER_PRIME |
+				  DRIVER_ATOMIC,
+	TINYDRM_GEM_DRIVER_OPS,
+	.lastclose		= tinydrm_lastclose,
+	.debugfs_init		= mipi_dbi_debugfs_init,
+	.name			= "mi0283qt",
+	.desc			= "Multi-Inno MI0283QT",
+	.date			= "20160614",
+	.major			= 1,
+	.minor			= 0,
+};
+
+static const struct of_device_id mi0283qt_of_match[] = {
+	{ .compatible = "multi-inno,mi0283qt" },
+	{},
+};
+MODULE_DEVICE_TABLE(of, mi0283qt_of_match);
+
+static const struct spi_device_id mi0283qt_id[] = {
+	{ "mi0283qt", 0 },
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, mi0283qt_id);
+
+static int mi0283qt_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct tinydrm_device *tdev;
+	struct mipi_dbi *mipi;
+	struct gpio_desc *dc;
+	u32 rotation = 0;
+	int ret;
+
+	mipi = devm_kzalloc(dev, sizeof(*mipi), GFP_KERNEL);
+	if (!mipi)
+		return -ENOMEM;
+
+	mipi->reset = devm_gpiod_get_optional(dev, "reset", GPIOD_OUT_HIGH);
+	if (IS_ERR(mipi->reset)) {
+		dev_err(dev, "Failed to get gpio 'reset'\n");
+		return PTR_ERR(mipi->reset);
+	}
+
+	dc = devm_gpiod_get_optional(dev, "dc", GPIOD_OUT_LOW);
+	if (IS_ERR(dc)) {
+		dev_err(dev, "Failed to get gpio 'dc'\n");
+		return PTR_ERR(dc);
+	}
+
+	mipi->regulator = devm_regulator_get(dev, "power");
+	if (IS_ERR(mipi->regulator))
+		return PTR_ERR(mipi->regulator);
+
+	mipi->backlight = tinydrm_of_find_backlight(dev);
+	if (IS_ERR(mipi->backlight))
+		return PTR_ERR(mipi->backlight);
+
+	device_property_read_u32(dev, "rotation", &rotation);
+
+	ret = mipi_dbi_spi_init(spi, mipi, dc, &mi0283qt_pipe_funcs,
+				&mi0283qt_driver, &mi0283qt_mode, rotation);
+	if (ret)
+		return ret;
+
+	ret = mi0283qt_init(mipi);
+	if (ret)
+		return ret;
+
+	/* use devres to fini after drm unregister (drv->remove is before) */
+	ret = devm_add_action(dev, mi0283qt_fini, mipi);
+	if (ret) {
+		mi0283qt_fini(mipi);
+		return ret;
+	}
+
+	tdev = &mipi->tinydrm;
+
+	ret = devm_tinydrm_register(tdev);
+	if (ret)
+		return ret;
+
+	spi_set_drvdata(spi, mipi);
+
+	DRM_DEBUG_DRIVER("Initialized %s:%s @%uMHz on minor %d\n",
+			 tdev->drm->driver->name, dev_name(dev),
+			 spi->max_speed_hz / 1000000,
+			 tdev->drm->primary->index);
+
+	return 0;
+}
+
+static void mi0283qt_shutdown(struct spi_device *spi)
+{
+	struct mipi_dbi *mipi = spi_get_drvdata(spi);
+
+	tinydrm_shutdown(&mipi->tinydrm);
+}
+
+static int __maybe_unused mi0283qt_pm_suspend(struct device *dev)
+{
+	struct mipi_dbi *mipi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = tinydrm_suspend(&mipi->tinydrm);
+	if (ret)
+		return ret;
+
+	mi0283qt_fini(mipi);
+
+	return 0;
+}
+
+static int __maybe_unused mi0283qt_pm_resume(struct device *dev)
+{
+	struct mipi_dbi *mipi = dev_get_drvdata(dev);
+	int ret;
+
+	ret = mi0283qt_init(mipi);
+	if (ret)
+		return ret;
+
+	return tinydrm_resume(&mipi->tinydrm);
+}
+
+static const struct dev_pm_ops mi0283qt_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(mi0283qt_pm_suspend, mi0283qt_pm_resume)
+};
+
+static struct spi_driver mi0283qt_spi_driver = {
+	.driver = {
+		.name = "mi0283qt",
+		.owner = THIS_MODULE,
+		.of_match_table = mi0283qt_of_match,
+		.pm = &mi0283qt_pm_ops,
+	},
+	.id_table = mi0283qt_id,
+	.probe = mi0283qt_probe,
+	.shutdown = mi0283qt_shutdown,
+};
+module_spi_driver(mi0283qt_spi_driver);
+
+MODULE_DESCRIPTION("Multi-Inno MI0283QT DRM driver");
+MODULE_AUTHOR("Noralf Trønnes");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/tinydrm/mipi-dbi.c b/drivers/gpu/drm/tinydrm/mipi-dbi.c
new file mode 100644
index 000000000000..29c0939f5247
--- /dev/null
+++ b/drivers/gpu/drm/tinydrm/mipi-dbi.c
@@ -0,0 +1,1005 @@
+/*
+ * MIPI Display Bus Interface (DBI) LCD controller support
+ *
+ * Copyright 2016 Noralf Trønnes
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <drm/tinydrm/mipi-dbi.h>
+#include <drm/tinydrm/tinydrm-helpers.h>
+#include <linux/debugfs.h>
+#include <linux/dma-buf.h>
+#include <linux/gpio/consumer.h>
+#include <linux/module.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+#include <video/mipi_display.h>
+
+#define MIPI_DBI_MAX_SPI_READ_SPEED 2000000 /* 2MHz */
+
+#define DCS_POWER_MODE_DISPLAY			BIT(2)
+#define DCS_POWER_MODE_DISPLAY_NORMAL_MODE	BIT(3)
+#define DCS_POWER_MODE_SLEEP_MODE		BIT(4)
+#define DCS_POWER_MODE_PARTIAL_MODE		BIT(5)
+#define DCS_POWER_MODE_IDLE_MODE		BIT(6)
+#define DCS_POWER_MODE_RESERVED_MASK		(BIT(0) | BIT(1) | BIT(7))
+
+/**
+ * DOC: overview
+ *
+ * This library provides helpers for MIPI Display Bus Interface (DBI)
+ * compatible display controllers.
+ *
+ * Many controllers for tiny lcd displays are MIPI compliant and can use this
+ * library. If a controller uses registers 0x2A and 0x2B to set the area to
+ * update and uses register 0x2C to write to frame memory, it is most likely
+ * MIPI compliant.
+ *
+ * Only MIPI Type 1 displays are supported since a full frame memory is needed.
+ *
+ * There are 3 MIPI DBI implementation types:
+ *
+ * A. Motorola 6800 type parallel bus
+ *
+ * B. Intel 8080 type parallel bus
+ *
+ * C. SPI type with 3 options:
+ *
+ *    1. 9-bit with the Data/Command signal as the ninth bit
+ *    2. Same as above except it's sent as 16 bits
+ *    3. 8-bit with the Data/Command signal as a separate D/CX pin
+ *
+ * Currently mipi_dbi only supports Type C options 1 and 3 with
+ * mipi_dbi_spi_init().
+ */
+
+#define MIPI_DBI_DEBUG_COMMAND(cmd, data, len) \
+({ \
+	if (!len) \
+		DRM_DEBUG_DRIVER("cmd=%02x\n", cmd); \
+	else if (len <= 32) \
+		DRM_DEBUG_DRIVER("cmd=%02x, par=%*ph\n", cmd, (int)len, data);\
+	else \
+		DRM_DEBUG_DRIVER("cmd=%02x, len=%zu\n", cmd, len); \
+})
+
+static const u8 mipi_dbi_dcs_read_commands[] = {
+	MIPI_DCS_GET_DISPLAY_ID,
+	MIPI_DCS_GET_RED_CHANNEL,
+	MIPI_DCS_GET_GREEN_CHANNEL,
+	MIPI_DCS_GET_BLUE_CHANNEL,
+	MIPI_DCS_GET_DISPLAY_STATUS,
+	MIPI_DCS_GET_POWER_MODE,
+	MIPI_DCS_GET_ADDRESS_MODE,
+	MIPI_DCS_GET_PIXEL_FORMAT,
+	MIPI_DCS_GET_DISPLAY_MODE,
+	MIPI_DCS_GET_SIGNAL_MODE,
+	MIPI_DCS_GET_DIAGNOSTIC_RESULT,
+	MIPI_DCS_READ_MEMORY_START,
+	MIPI_DCS_READ_MEMORY_CONTINUE,
+	MIPI_DCS_GET_SCANLINE,
+	MIPI_DCS_GET_DISPLAY_BRIGHTNESS,
+	MIPI_DCS_GET_CONTROL_DISPLAY,
+	MIPI_DCS_GET_POWER_SAVE,
+	MIPI_DCS_GET_CABC_MIN_BRIGHTNESS,
+	MIPI_DCS_READ_DDB_START,
+	MIPI_DCS_READ_DDB_CONTINUE,
+	0, /* sentinel */
+};
+
+static bool mipi_dbi_command_is_read(struct mipi_dbi *mipi, u8 cmd)
+{
+	unsigned int i;
+
+	if (!mipi->read_commands)
+		return false;
+
+	for (i = 0; i < 0xff; i++) {
+		if (!mipi->read_commands[i])
+			return false;
+		if (cmd == mipi->read_commands[i])
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * mipi_dbi_command_read - MIPI DCS read command
+ * @mipi: MIPI structure
+ * @cmd: Command
+ * @val: Value read
+ *
+ * Send MIPI DCS read command to the controller.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_command_read(struct mipi_dbi *mipi, u8 cmd, u8 *val)
+{
+	if (!mipi->read_commands)
+		return -EACCES;
+
+	if (!mipi_dbi_command_is_read(mipi, cmd))
+		return -EINVAL;
+
+	return mipi_dbi_command_buf(mipi, cmd, val, 1);
+}
+EXPORT_SYMBOL(mipi_dbi_command_read);
+
+/**
+ * mipi_dbi_command_buf - MIPI DCS command with parameter(s) in an array
+ * @mipi: MIPI structure
+ * @cmd: Command
+ * @data: Parameter buffer
+ * @len: Buffer length
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_command_buf(struct mipi_dbi *mipi, u8 cmd, u8 *data, size_t len)
+{
+	int ret;
+
+	mutex_lock(&mipi->cmdlock);
+	ret = mipi->command(mipi, cmd, data, len);
+	mutex_unlock(&mipi->cmdlock);
+
+	return ret;
+}
+EXPORT_SYMBOL(mipi_dbi_command_buf);
+
+static int mipi_dbi_buf_copy(void *dst, struct drm_framebuffer *fb,
+				struct drm_clip_rect *clip, bool swap)
+{
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	struct dma_buf_attachment *import_attach = cma_obj->base.import_attach;
+	struct drm_format_name_buf format_name;
+	void *src = cma_obj->vaddr;
+	int ret = 0;
+
+	if (import_attach) {
+		ret = dma_buf_begin_cpu_access(import_attach->dmabuf,
+					       DMA_FROM_DEVICE);
+		if (ret)
+			return ret;
+	}
+
+	switch (fb->format->format) {
+	case DRM_FORMAT_RGB565:
+		if (swap)
+			tinydrm_swab16(dst, src, fb, clip);
+		else
+			tinydrm_memcpy(dst, src, fb, clip);
+		break;
+	case DRM_FORMAT_XRGB8888:
+		tinydrm_xrgb8888_to_rgb565(dst, src, fb, clip, swap);
+		break;
+	default:
+		dev_err_once(fb->dev->dev, "Format is not supported: %s\n",
+			     drm_get_format_name(fb->format->format,
+						 &format_name));
+		return -EINVAL;
+	}
+
+	if (import_attach)
+		ret = dma_buf_end_cpu_access(import_attach->dmabuf,
+					     DMA_FROM_DEVICE);
+	return ret;
+}
+
+static int mipi_dbi_fb_dirty(struct drm_framebuffer *fb,
+			     struct drm_file *file_priv,
+			     unsigned int flags, unsigned int color,
+			     struct drm_clip_rect *clips,
+			     unsigned int num_clips)
+{
+	struct drm_gem_cma_object *cma_obj = drm_fb_cma_get_gem_obj(fb, 0);
+	struct tinydrm_device *tdev = fb->dev->dev_private;
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	bool swap = mipi->swap_bytes;
+	struct drm_clip_rect clip;
+	int ret = 0;
+	bool full;
+	void *tr;
+
+	mutex_lock(&tdev->dirty_lock);
+
+	if (!mipi->enabled)
+		goto out_unlock;
+
+	/* fbdev can flush even when we're not interested */
+	if (tdev->pipe.plane.fb != fb)
+		goto out_unlock;
+
+	full = tinydrm_merge_clips(&clip, clips, num_clips, flags,
+				   fb->width, fb->height);
+
+	DRM_DEBUG("Flushing [FB:%d] x1=%u, x2=%u, y1=%u, y2=%u\n", fb->base.id,
+		  clip.x1, clip.x2, clip.y1, clip.y2);
+
+	if (!mipi->dc || !full || swap ||
+	    fb->format->format == DRM_FORMAT_XRGB8888) {
+		tr = mipi->tx_buf;
+		ret = mipi_dbi_buf_copy(mipi->tx_buf, fb, &clip, swap);
+		if (ret)
+			goto out_unlock;
+	} else {
+		tr = cma_obj->vaddr;
+	}
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_COLUMN_ADDRESS,
+			 (clip.x1 >> 8) & 0xFF, clip.x1 & 0xFF,
+			 (clip.x2 >> 8) & 0xFF, (clip.x2 - 1) & 0xFF);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PAGE_ADDRESS,
+			 (clip.y1 >> 8) & 0xFF, clip.y1 & 0xFF,
+			 (clip.y2 >> 8) & 0xFF, (clip.y2 - 1) & 0xFF);
+
+	ret = mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START, tr,
+				(clip.x2 - clip.x1) * (clip.y2 - clip.y1) * 2);
+
+out_unlock:
+	mutex_unlock(&tdev->dirty_lock);
+
+	if (ret)
+		dev_err_once(fb->dev->dev, "Failed to update display %d\n",
+			     ret);
+
+	return ret;
+}
+
+static const struct drm_framebuffer_funcs mipi_dbi_fb_funcs = {
+	.destroy	= drm_fb_cma_destroy,
+	.create_handle	= drm_fb_cma_create_handle,
+	.dirty		= mipi_dbi_fb_dirty,
+};
+
+/**
+ * mipi_dbi_pipe_enable - MIPI DBI pipe enable helper
+ * @pipe: Display pipe
+ * @crtc_state: CRTC state
+ *
+ * This function enables backlight. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->enable callback.
+ */
+void mipi_dbi_pipe_enable(struct drm_simple_display_pipe *pipe,
+			  struct drm_crtc_state *crtc_state)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	struct drm_framebuffer *fb = pipe->plane.fb;
+
+	DRM_DEBUG_KMS("\n");
+
+	mipi->enabled = true;
+	if (fb)
+		fb->funcs->dirty(fb, NULL, 0, 0, NULL, 0);
+
+	tinydrm_enable_backlight(mipi->backlight);
+}
+EXPORT_SYMBOL(mipi_dbi_pipe_enable);
+
+static void mipi_dbi_blank(struct mipi_dbi *mipi)
+{
+	struct drm_device *drm = mipi->tinydrm.drm;
+	u16 height = drm->mode_config.min_height;
+	u16 width = drm->mode_config.min_width;
+	size_t len = width * height * 2;
+
+	memset(mipi->tx_buf, 0, len);
+
+	mipi_dbi_command(mipi, MIPI_DCS_SET_COLUMN_ADDRESS, 0, 0,
+			 (width >> 8) & 0xFF, (width - 1) & 0xFF);
+	mipi_dbi_command(mipi, MIPI_DCS_SET_PAGE_ADDRESS, 0, 0,
+			 (height >> 8) & 0xFF, (height - 1) & 0xFF);
+	mipi_dbi_command_buf(mipi, MIPI_DCS_WRITE_MEMORY_START,
+			     (u8 *)mipi->tx_buf, len);
+}
+
+/**
+ * mipi_dbi_pipe_disable - MIPI DBI pipe disable helper
+ * @pipe: Display pipe
+ *
+ * This function disables backlight if present or if not the
+ * display memory is blanked. Drivers can use this as their
+ * &drm_simple_display_pipe_funcs->disable callback.
+ */
+void mipi_dbi_pipe_disable(struct drm_simple_display_pipe *pipe)
+{
+	struct tinydrm_device *tdev = pipe_to_tinydrm(pipe);
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+
+	DRM_DEBUG_KMS("\n");
+
+	mipi->enabled = false;
+
+	if (mipi->backlight)
+		tinydrm_disable_backlight(mipi->backlight);
+	else
+		mipi_dbi_blank(mipi);
+}
+EXPORT_SYMBOL(mipi_dbi_pipe_disable);
+
+static const uint32_t mipi_dbi_formats[] = {
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB8888,
+};
+
+/**
+ * mipi_dbi_init - MIPI DBI initialization
+ * @dev: Parent device
+ * @mipi: &mipi_dbi structure to initialize
+ * @pipe_funcs: Display pipe functions
+ * @driver: DRM driver
+ * @mode: Display mode
+ * @rotation: Initial rotation in degrees Counter Clock Wise
+ *
+ * This function initializes a &mipi_dbi structure and it's underlying
+ * @tinydrm_device. It also sets up the display pipeline.
+ *
+ * Supported formats: Native RGB565 and emulated XRGB8888.
+ *
+ * Objects created by this function will be automatically freed on driver
+ * detach (devres).
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_init(struct device *dev, struct mipi_dbi *mipi,
+		  const struct drm_simple_display_pipe_funcs *pipe_funcs,
+		  struct drm_driver *driver,
+		  const struct drm_display_mode *mode, unsigned int rotation)
+{
+	size_t bufsize = mode->vdisplay * mode->hdisplay * sizeof(u16);
+	struct tinydrm_device *tdev = &mipi->tinydrm;
+	int ret;
+
+	if (!mipi->command)
+		return -EINVAL;
+
+	mutex_init(&mipi->cmdlock);
+
+	mipi->tx_buf = devm_kmalloc(dev, bufsize, GFP_KERNEL);
+	if (!mipi->tx_buf)
+		return -ENOMEM;
+
+	ret = devm_tinydrm_init(dev, tdev, &mipi_dbi_fb_funcs, driver);
+	if (ret)
+		return ret;
+
+	/* TODO: Maybe add DRM_MODE_CONNECTOR_SPI */
+	ret = tinydrm_display_pipe_init(tdev, pipe_funcs,
+					DRM_MODE_CONNECTOR_VIRTUAL,
+					mipi_dbi_formats,
+					ARRAY_SIZE(mipi_dbi_formats), mode,
+					rotation);
+	if (ret)
+		return ret;
+
+	tdev->drm->mode_config.preferred_depth = 16;
+	mipi->rotation = rotation;
+
+	drm_mode_config_reset(tdev->drm);
+
+	DRM_DEBUG_KMS("preferred_depth=%u, rotation = %u\n",
+		      tdev->drm->mode_config.preferred_depth, rotation);
+
+	return 0;
+}
+EXPORT_SYMBOL(mipi_dbi_init);
+
+/**
+ * mipi_dbi_hw_reset - Hardware reset of controller
+ * @mipi: MIPI DBI structure
+ *
+ * Reset controller if the &mipi_dbi->reset gpio is set.
+ */
+void mipi_dbi_hw_reset(struct mipi_dbi *mipi)
+{
+	if (!mipi->reset)
+		return;
+
+	gpiod_set_value_cansleep(mipi->reset, 0);
+	msleep(20);
+	gpiod_set_value_cansleep(mipi->reset, 1);
+	msleep(120);
+}
+EXPORT_SYMBOL(mipi_dbi_hw_reset);
+
+/**
+ * mipi_dbi_display_is_on - Check if display is on
+ * @mipi: MIPI DBI structure
+ *
+ * This function checks the Power Mode register (if readable) to see if
+ * display output is turned on. This can be used to see if the bootloader
+ * has already turned on the display avoiding flicker when the pipeline is
+ * enabled.
+ *
+ * Returns:
+ * true if the display can be verified to be on, false otherwise.
+ */
+bool mipi_dbi_display_is_on(struct mipi_dbi *mipi)
+{
+	u8 val;
+
+	if (mipi_dbi_command_read(mipi, MIPI_DCS_GET_POWER_MODE, &val))
+		return false;
+
+	val &= ~DCS_POWER_MODE_RESERVED_MASK;
+
+	if (val != (DCS_POWER_MODE_DISPLAY |
+	    DCS_POWER_MODE_DISPLAY_NORMAL_MODE | DCS_POWER_MODE_SLEEP_MODE))
+		return false;
+
+	DRM_DEBUG_DRIVER("Display is ON\n");
+
+	return true;
+}
+EXPORT_SYMBOL(mipi_dbi_display_is_on);
+
+#if IS_ENABLED(CONFIG_SPI)
+
+/*
+ * Many controllers have a max speed of 10MHz, but can be pushed way beyond
+ * that. Increase reliability by running pixel data at max speed and the rest
+ * at 10MHz, preventing transfer glitches from messing up the init settings.
+ */
+static u32 mipi_dbi_spi_cmd_max_speed(struct spi_device *spi, size_t len)
+{
+	if (len > 64)
+		return 0; /* use default */
+
+	return min_t(u32, 10000000, spi->max_speed_hz);
+}
+
+/*
+ * MIPI DBI Type C Option 1
+ *
+ * If the SPI controller doesn't have 9 bits per word support,
+ * use blocks of 9 bytes to send 8x 9-bit words using a 8-bit SPI transfer.
+ * Pad partial blocks with MIPI_DCS_NOP (zero).
+ * This is how the D/C bit (x) is added:
+ *     x7654321
+ *     0x765432
+ *     10x76543
+ *     210x7654
+ *     3210x765
+ *     43210x76
+ *     543210x7
+ *     6543210x
+ *     76543210
+ */
+
+static int mipi_dbi_spi1e_transfer(struct mipi_dbi *mipi, int dc,
+				   const void *buf, size_t len,
+				   unsigned int bpw)
+{
+	bool swap_bytes = (bpw == 16 && tinydrm_machine_little_endian());
+	size_t chunk, max_chunk = mipi->tx_buf9_len;
+	struct spi_device *spi = mipi->spi;
+	struct spi_transfer tr = {
+		.tx_buf = mipi->tx_buf9,
+		.bits_per_word = 8,
+	};
+	struct spi_message m;
+	const u8 *src = buf;
+	int i, ret;
+	u8 *dst;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
+			 __func__, dc, max_chunk);
+
+	tr.speed_hz = mipi_dbi_spi_cmd_max_speed(spi, len);
+	spi_message_init_with_transfers(&m, &tr, 1);
+
+	if (!dc) {
+		if (WARN_ON_ONCE(len != 1))
+			return -EINVAL;
+
+		/* Command: pad no-op's (zeroes) at beginning of block */
+		dst = mipi->tx_buf9;
+		memset(dst, 0, 9);
+		dst[8] = *src;
+		tr.len = 9;
+
+		tinydrm_dbg_spi_message(spi, &m);
+
+		return spi_sync(spi, &m);
+	}
+
+	/* max with room for adding one bit per byte */
+	max_chunk = max_chunk / 9 * 8;
+	/* but no bigger than len */
+	max_chunk = min(max_chunk, len);
+	/* 8 byte blocks */
+	max_chunk = max_t(size_t, 8, max_chunk & ~0x7);
+
+	while (len) {
+		size_t added = 0;
+
+		chunk = min(len, max_chunk);
+		len -= chunk;
+		dst = mipi->tx_buf9;
+
+		if (chunk < 8) {
+			u8 val, carry = 0;
+
+			/* Data: pad no-op's (zeroes) at end of block */
+			memset(dst, 0, 9);
+
+			if (swap_bytes) {
+				for (i = 1; i < (chunk + 1); i++) {
+					val = src[1];
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+					i++;
+					val = src[0];
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+					src += 2;
+				}
+				*dst++ = carry;
+			} else {
+				for (i = 1; i < (chunk + 1); i++) {
+					val = *src++;
+					*dst++ = carry | BIT(8 - i) | (val >> i);
+					carry = val << (8 - i);
+				}
+				*dst++ = carry;
+			}
+
+			chunk = 8;
+			added = 1;
+		} else {
+			for (i = 0; i < chunk; i += 8) {
+				if (swap_bytes) {
+					*dst++ =                 BIT(7) | (src[1] >> 1);
+					*dst++ = (src[1] << 7) | BIT(6) | (src[0] >> 2);
+					*dst++ = (src[0] << 6) | BIT(5) | (src[3] >> 3);
+					*dst++ = (src[3] << 5) | BIT(4) | (src[2] >> 4);
+					*dst++ = (src[2] << 4) | BIT(3) | (src[5] >> 5);
+					*dst++ = (src[5] << 3) | BIT(2) | (src[4] >> 6);
+					*dst++ = (src[4] << 2) | BIT(1) | (src[7] >> 7);
+					*dst++ = (src[7] << 1) | BIT(0);
+					*dst++ = src[6];
+				} else {
+					*dst++ =                 BIT(7) | (src[0] >> 1);
+					*dst++ = (src[0] << 7) | BIT(6) | (src[1] >> 2);
+					*dst++ = (src[1] << 6) | BIT(5) | (src[2] >> 3);
+					*dst++ = (src[2] << 5) | BIT(4) | (src[3] >> 4);
+					*dst++ = (src[3] << 4) | BIT(3) | (src[4] >> 5);
+					*dst++ = (src[4] << 3) | BIT(2) | (src[5] >> 6);
+					*dst++ = (src[5] << 2) | BIT(1) | (src[6] >> 7);
+					*dst++ = (src[6] << 1) | BIT(0);
+					*dst++ = src[7];
+				}
+
+				src += 8;
+				added++;
+			}
+		}
+
+		tr.len = chunk + added;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+
+static int mipi_dbi_spi1_transfer(struct mipi_dbi *mipi, int dc,
+				  const void *buf, size_t len,
+				  unsigned int bpw)
+{
+	struct spi_device *spi = mipi->spi;
+	struct spi_transfer tr = {
+		.bits_per_word = 9,
+	};
+	const u16 *src16 = buf;
+	const u8 *src8 = buf;
+	struct spi_message m;
+	size_t max_chunk;
+	u16 *dst16;
+	int ret;
+
+	if (!tinydrm_spi_bpw_supported(spi, 9))
+		return mipi_dbi_spi1e_transfer(mipi, dc, buf, len, bpw);
+
+	tr.speed_hz = mipi_dbi_spi_cmd_max_speed(spi, len);
+	max_chunk = mipi->tx_buf9_len;
+	dst16 = mipi->tx_buf9;
+
+	if (drm_debug & DRM_UT_DRIVER)
+		pr_debug("[drm:%s] dc=%d, max_chunk=%zu, transfers:\n",
+			 __func__, dc, max_chunk);
+
+	max_chunk = min(max_chunk / 2, len);
+
+	spi_message_init_with_transfers(&m, &tr, 1);
+	tr.tx_buf = dst16;
+
+	while (len) {
+		size_t chunk = min(len, max_chunk);
+		unsigned int i;
+
+		if (bpw == 16 && tinydrm_machine_little_endian()) {
+			for (i = 0; i < (chunk * 2); i += 2) {
+				dst16[i]     = *src16 >> 8;
+				dst16[i + 1] = *src16++ & 0xFF;
+				if (dc) {
+					dst16[i]     |= 0x0100;
+					dst16[i + 1] |= 0x0100;
+				}
+			}
+		} else {
+			for (i = 0; i < chunk; i++) {
+				dst16[i] = *src8++;
+				if (dc)
+					dst16[i] |= 0x0100;
+			}
+		}
+
+		tr.len = chunk;
+		len -= chunk;
+
+		tinydrm_dbg_spi_message(spi, &m);
+		ret = spi_sync(spi, &m);
+		if (ret)
+			return ret;
+	};
+
+	return 0;
+}
+
+static int mipi_dbi_typec1_command(struct mipi_dbi *mipi, u8 cmd,
+				   u8 *parameters, size_t num)
+{
+	unsigned int bpw = (cmd == MIPI_DCS_WRITE_MEMORY_START) ? 16 : 8;
+	int ret;
+
+	if (mipi_dbi_command_is_read(mipi, cmd))
+		return -ENOTSUPP;
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, parameters, num);
+
+	ret = mipi_dbi_spi1_transfer(mipi, 0, &cmd, 1, 8);
+	if (ret || !num)
+		return ret;
+
+	return mipi_dbi_spi1_transfer(mipi, 1, parameters, num, bpw);
+}
+
+/* MIPI DBI Type C Option 3 */
+
+static int mipi_dbi_typec3_command_read(struct mipi_dbi *mipi, u8 cmd,
+					u8 *data, size_t len)
+{
+	struct spi_device *spi = mipi->spi;
+	u32 speed_hz = min_t(u32, MIPI_DBI_MAX_SPI_READ_SPEED,
+			     spi->max_speed_hz / 2);
+	struct spi_transfer tr[2] = {
+		{
+			.speed_hz = speed_hz,
+			.tx_buf = &cmd,
+			.len = 1,
+		}, {
+			.speed_hz = speed_hz,
+			.len = len,
+		},
+	};
+	struct spi_message m;
+	u8 *buf;
+	int ret;
+
+	if (!len)
+		return -EINVAL;
+
+	/*
+	 * Support non-standard 24-bit and 32-bit Nokia read commands which
+	 * start with a dummy clock, so we need to read an extra byte.
+	 */
+	if (cmd == MIPI_DCS_GET_DISPLAY_ID ||
+	    cmd == MIPI_DCS_GET_DISPLAY_STATUS) {
+		if (!(len == 3 || len == 4))
+			return -EINVAL;
+
+		tr[1].len = len + 1;
+	}
+
+	buf = kmalloc(tr[1].len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	tr[1].rx_buf = buf;
+	gpiod_set_value_cansleep(mipi->dc, 0);
+
+	spi_message_init_with_transfers(&m, tr, ARRAY_SIZE(tr));
+	ret = spi_sync(spi, &m);
+	if (ret)
+		goto err_free;
+
+	tinydrm_dbg_spi_message(spi, &m);
+
+	if (tr[1].len == len) {
+		memcpy(data, buf, len);
+	} else {
+		unsigned int i;
+
+		for (i = 0; i < len; i++)
+			data[i] = (buf[i] << 1) | !!(buf[i + 1] & BIT(7));
+	}
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, data, len);
+
+err_free:
+	kfree(buf);
+
+	return ret;
+}
+
+static int mipi_dbi_typec3_command(struct mipi_dbi *mipi, u8 cmd,
+				   u8 *par, size_t num)
+{
+	struct spi_device *spi = mipi->spi;
+	unsigned int bpw = 8;
+	u32 speed_hz;
+	int ret;
+
+	if (mipi_dbi_command_is_read(mipi, cmd))
+		return mipi_dbi_typec3_command_read(mipi, cmd, par, num);
+
+	MIPI_DBI_DEBUG_COMMAND(cmd, par, num);
+
+	gpiod_set_value_cansleep(mipi->dc, 0);
+	speed_hz = mipi_dbi_spi_cmd_max_speed(spi, 1);
+	ret = tinydrm_spi_transfer(spi, speed_hz, NULL, 8, &cmd, 1);
+	if (ret || !num)
+		return ret;
+
+	if (cmd == MIPI_DCS_WRITE_MEMORY_START && !mipi->swap_bytes)
+		bpw = 16;
+
+	gpiod_set_value_cansleep(mipi->dc, 1);
+	speed_hz = mipi_dbi_spi_cmd_max_speed(spi, num);
+
+	return tinydrm_spi_transfer(spi, speed_hz, NULL, bpw, par, num);
+}
+
+/**
+ * mipi_dbi_spi_init - Initialize MIPI DBI SPI interfaced controller
+ * @spi: SPI device
+ * @dc: D/C gpio (optional)
+ * @mipi: &mipi_dbi structure to initialize
+ * @pipe_funcs: Display pipe functions
+ * @driver: DRM driver
+ * @mode: Display mode
+ * @rotation: Initial rotation in degrees Counter Clock Wise
+ *
+ * This function sets &mipi_dbi->command, enables &mipi->read_commands for the
+ * usual read commands and initializes @mipi using mipi_dbi_init().
+ *
+ * If @dc is set, a Type C Option 3 interface is assumed, if not
+ * Type C Option 1.
+ *
+ * If the SPI master driver doesn't support the necessary bits per word,
+ * the following transformation is used:
+ *
+ * - 9-bit: reorder buffer as 9x 8-bit words, padded with no-op command.
+ * - 16-bit: if big endian send as 8-bit, if little endian swap bytes
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_spi_init(struct spi_device *spi, struct mipi_dbi *mipi,
+		      struct gpio_desc *dc,
+		      const struct drm_simple_display_pipe_funcs *pipe_funcs,
+		      struct drm_driver *driver,
+		      const struct drm_display_mode *mode,
+		      unsigned int rotation)
+{
+	size_t tx_size = tinydrm_spi_max_transfer_size(spi, 0);
+	struct device *dev = &spi->dev;
+	int ret;
+
+	if (tx_size < 16) {
+		DRM_ERROR("SPI transmit buffer too small: %zu\n", tx_size);
+		return -EINVAL;
+	}
+
+	/*
+	 * Even though it's not the SPI device that does DMA (the master does),
+	 * the dma mask is necessary for the dma_alloc_wc() in
+	 * drm_gem_cma_create(). The dma_addr returned will be a physical
+	 * adddress which might be different from the bus address, but this is
+	 * not a problem since the address will not be used.
+	 * The virtual address is used in the transfer and the SPI core
+	 * re-maps it on the SPI master device using the DMA streaming API
+	 * (spi_map_buf()).
+	 */
+	if (!dev->coherent_dma_mask) {
+		ret = dma_coerce_mask_and_coherent(dev, DMA_BIT_MASK(32));
+		if (ret) {
+			dev_warn(dev, "Failed to set dma mask %d\n", ret);
+			return ret;
+		}
+	}
+
+	mipi->spi = spi;
+	mipi->read_commands = mipi_dbi_dcs_read_commands;
+
+	if (dc) {
+		mipi->command = mipi_dbi_typec3_command;
+		mipi->dc = dc;
+		if (tinydrm_machine_little_endian() &&
+		    !tinydrm_spi_bpw_supported(spi, 16))
+			mipi->swap_bytes = true;
+	} else {
+		mipi->command = mipi_dbi_typec1_command;
+		mipi->tx_buf9_len = tx_size;
+		mipi->tx_buf9 = devm_kmalloc(dev, tx_size, GFP_KERNEL);
+		if (!mipi->tx_buf9)
+			return -ENOMEM;
+	}
+
+	return mipi_dbi_init(dev, mipi, pipe_funcs, driver, mode, rotation);
+}
+EXPORT_SYMBOL(mipi_dbi_spi_init);
+
+#endif /* CONFIG_SPI */
+
+#ifdef CONFIG_DEBUG_FS
+
+static ssize_t mipi_dbi_debugfs_command_write(struct file *file,
+					      const char __user *ubuf,
+					      size_t count, loff_t *ppos)
+{
+	struct seq_file *m = file->private_data;
+	struct mipi_dbi *mipi = m->private;
+	u8 val, cmd = 0, parameters[64];
+	char *buf, *pos, *token;
+	unsigned int i;
+	int ret;
+
+	buf = memdup_user_nul(ubuf, count);
+	if (IS_ERR(buf))
+		return PTR_ERR(buf);
+
+	/* strip trailing whitespace */
+	for (i = count - 1; i > 0; i--)
+		if (isspace(buf[i]))
+			buf[i] = '\0';
+		else
+			break;
+	i = 0;
+	pos = buf;
+	while (pos) {
+		token = strsep(&pos, " ");
+		if (!token) {
+			ret = -EINVAL;
+			goto err_free;
+		}
+
+		ret = kstrtou8(token, 16, &val);
+		if (ret < 0)
+			goto err_free;
+
+		if (token == buf)
+			cmd = val;
+		else
+			parameters[i++] = val;
+
+		if (i == 64) {
+			ret = -E2BIG;
+			goto err_free;
+		}
+	}
+
+	ret = mipi_dbi_command_buf(mipi, cmd, parameters, i);
+
+err_free:
+	kfree(buf);
+
+	return ret < 0 ? ret : count;
+}
+
+static int mipi_dbi_debugfs_command_show(struct seq_file *m, void *unused)
+{
+	struct mipi_dbi *mipi = m->private;
+	u8 cmd, val[4];
+	size_t len, i;
+	int ret;
+
+	for (cmd = 0; cmd < 255; cmd++) {
+		if (!mipi_dbi_command_is_read(mipi, cmd))
+			continue;
+
+		switch (cmd) {
+		case MIPI_DCS_READ_MEMORY_START:
+		case MIPI_DCS_READ_MEMORY_CONTINUE:
+			len = 2;
+			break;
+		case MIPI_DCS_GET_DISPLAY_ID:
+			len = 3;
+			break;
+		case MIPI_DCS_GET_DISPLAY_STATUS:
+			len = 4;
+			break;
+		default:
+			len = 1;
+			break;
+		}
+
+		seq_printf(m, "%02x: ", cmd);
+		ret = mipi_dbi_command_buf(mipi, cmd, val, len);
+		if (ret) {
+			seq_puts(m, "XX\n");
+			continue;
+		}
+
+		for (i = 0; i < len; i++)
+			seq_printf(m, "%02x", val[i]);
+		seq_puts(m, "\n");
+	}
+
+	return 0;
+}
+
+static int mipi_dbi_debugfs_command_open(struct inode *inode,
+					 struct file *file)
+{
+	return single_open(file, mipi_dbi_debugfs_command_show,
+			   inode->i_private);
+}
+
+static const struct file_operations mipi_dbi_debugfs_command_fops = {
+	.owner = THIS_MODULE,
+	.open = mipi_dbi_debugfs_command_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+	.write = mipi_dbi_debugfs_command_write,
+};
+
+static const struct drm_info_list mipi_dbi_debugfs_list[] = {
+	{ "fb",   drm_fb_cma_debugfs_show, 0 },
+};
+
+/**
+ * mipi_dbi_debugfs_init - Create debugfs entries
+ * @minor: DRM minor
+ *
+ * This function creates a 'command' debugfs file for sending commands to the
+ * controller or getting the read command values.
+ * Drivers can use this as their &drm_driver->debugfs_init callback.
+ *
+ * Returns:
+ * Zero on success, negative error code on failure.
+ */
+int mipi_dbi_debugfs_init(struct drm_minor *minor)
+{
+	struct tinydrm_device *tdev = minor->dev->dev_private;
+	struct mipi_dbi *mipi = mipi_dbi_from_tinydrm(tdev);
+	umode_t mode = S_IFREG | S_IWUSR;
+
+	if (mipi->read_commands)
+		mode |= S_IRUGO;
+	debugfs_create_file("command", mode, minor->debugfs_root, mipi,
+			    &mipi_dbi_debugfs_command_fops);
+
+	return drm_debugfs_create_files(mipi_dbi_debugfs_list,
+					ARRAY_SIZE(mipi_dbi_debugfs_list),
+					minor->debugfs_root, minor);
+}
+EXPORT_SYMBOL(mipi_dbi_debugfs_init);
+
+#endif
+
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 4562e53c8244..17478f38dea3 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -140,8 +140,8 @@ static void ttm_bo_release_list(struct kref *list_kref)
 	struct ttm_bo_device *bdev = bo->bdev;
 	size_t acc_size = bo->acc_size;
 
-	BUG_ON(atomic_read(&bo->list_kref.refcount));
-	BUG_ON(atomic_read(&bo->kref.refcount));
+	BUG_ON(kref_read(&bo->list_kref));
+	BUG_ON(kref_read(&bo->kref));
 	BUG_ON(atomic_read(&bo->cpu_writers));
 	BUG_ON(bo->mem.mm_node != NULL);
 	BUG_ON(!list_empty(&bo->lru));
@@ -184,58 +184,41 @@ void ttm_bo_add_to_lru(struct ttm_buffer_object *bo)
 }
 EXPORT_SYMBOL(ttm_bo_add_to_lru);
 
-int ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+static void ttm_bo_ref_bug(struct kref *list_kref)
 {
-	int put_count = 0;
+	BUG();
+}
 
+void ttm_bo_del_from_lru(struct ttm_buffer_object *bo)
+{
 	if (!list_empty(&bo->swap)) {
 		list_del_init(&bo->swap);
-		++put_count;
+		kref_put(&bo->list_kref, ttm_bo_ref_bug);
 	}
 	if (!list_empty(&bo->lru)) {
 		list_del_init(&bo->lru);
-		++put_count;
+		kref_put(&bo->list_kref, ttm_bo_ref_bug);
 	}
 
 	/*
 	 * TODO: Add a driver hook to delete from
 	 * driver-specific LRU's here.
 	 */
-
-	return put_count;
-}
-
-static void ttm_bo_ref_bug(struct kref *list_kref)
-{
-	BUG();
-}
-
-void ttm_bo_list_ref_sub(struct ttm_buffer_object *bo, int count,
-			 bool never_free)
-{
-	kref_sub(&bo->list_kref, count,
-		 (never_free) ? ttm_bo_ref_bug : ttm_bo_release_list);
 }
 
 void ttm_bo_del_sub_from_lru(struct ttm_buffer_object *bo)
 {
-	int put_count;
-
 	spin_lock(&bo->glob->lru_lock);
-	put_count = ttm_bo_del_from_lru(bo);
+	ttm_bo_del_from_lru(bo);
 	spin_unlock(&bo->glob->lru_lock);
-	ttm_bo_list_ref_sub(bo, put_count, true);
 }
 EXPORT_SYMBOL(ttm_bo_del_sub_from_lru);
 
 void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo)
 {
-	int put_count = 0;
-
 	lockdep_assert_held(&bo->resv->lock.base);
 
-	put_count = ttm_bo_del_from_lru(bo);
-	ttm_bo_list_ref_sub(bo, put_count, true);
+	ttm_bo_del_from_lru(bo);
 	ttm_bo_add_to_lru(bo);
 }
 EXPORT_SYMBOL(ttm_bo_move_to_lru_tail);
@@ -435,7 +418,6 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 {
 	struct ttm_bo_device *bdev = bo->bdev;
 	struct ttm_bo_global *glob = bo->glob;
-	int put_count;
 	int ret;
 
 	spin_lock(&glob->lru_lock);
@@ -443,13 +425,10 @@ static void ttm_bo_cleanup_refs_or_queue(struct ttm_buffer_object *bo)
 
 	if (!ret) {
 		if (!ttm_bo_wait(bo, false, true)) {
-			put_count = ttm_bo_del_from_lru(bo);
-
+			ttm_bo_del_from_lru(bo);
 			spin_unlock(&glob->lru_lock);
 			ttm_bo_cleanup_memtype_use(bo);
 
-			ttm_bo_list_ref_sub(bo, put_count, true);
-
 			return;
 		} else
 			ttm_bo_flush_all_fences(bo);
@@ -492,7 +471,6 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 					  bool no_wait_gpu)
 {
 	struct ttm_bo_global *glob = bo->glob;
-	int put_count;
 	int ret;
 
 	ret = ttm_bo_wait(bo, false, true);
@@ -542,15 +520,13 @@ static int ttm_bo_cleanup_refs_and_unlock(struct ttm_buffer_object *bo,
 		return ret;
 	}
 
-	put_count = ttm_bo_del_from_lru(bo);
+	ttm_bo_del_from_lru(bo);
 	list_del_init(&bo->ddestroy);
-	++put_count;
+	kref_put(&bo->list_kref, ttm_bo_ref_bug);
 
 	spin_unlock(&glob->lru_lock);
 	ttm_bo_cleanup_memtype_use(bo);
 
-	ttm_bo_list_ref_sub(bo, put_count, true);
-
 	return 0;
 }
 
@@ -728,7 +704,7 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 	struct ttm_bo_global *glob = bdev->glob;
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
 	struct ttm_buffer_object *bo;
-	int ret = -EBUSY, put_count;
+	int ret = -EBUSY;
 	unsigned i;
 
 	spin_lock(&glob->lru_lock);
@@ -766,13 +742,11 @@ static int ttm_mem_evict_first(struct ttm_bo_device *bdev,
 		return ret;
 	}
 
-	put_count = ttm_bo_del_from_lru(bo);
+	ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
 	BUG_ON(ret != 0);
 
-	ttm_bo_list_ref_sub(bo, put_count, true);
-
 	ret = ttm_bo_evict(bo, interruptible, no_wait_gpu);
 	ttm_bo_unreserve(bo);
 
@@ -1667,7 +1641,6 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 	    container_of(shrink, struct ttm_bo_global, shrink);
 	struct ttm_buffer_object *bo;
 	int ret = -EBUSY;
-	int put_count;
 	unsigned i;
 
 	spin_lock(&glob->lru_lock);
@@ -1694,11 +1667,9 @@ static int ttm_bo_swapout(struct ttm_mem_shrink *shrink)
 		return ret;
 	}
 
-	put_count = ttm_bo_del_from_lru(bo);
+	ttm_bo_del_from_lru(bo);
 	spin_unlock(&glob->lru_lock);
 
-	ttm_bo_list_ref_sub(bo, put_count, true);
-
 	/**
 	 * Move to system cached
 	 */
diff --git a/drivers/gpu/drm/ttm/ttm_bo_manager.c b/drivers/gpu/drm/ttm/ttm_bo_manager.c
index 988c48d1cf3e..90a6c0b03afc 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_manager.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_manager.c
@@ -54,9 +54,8 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 {
 	struct ttm_range_manager *rman = (struct ttm_range_manager *) man->priv;
 	struct drm_mm *mm = &rman->mm;
-	struct drm_mm_node *node = NULL;
-	enum drm_mm_search_flags sflags = DRM_MM_SEARCH_BEST;
-	enum drm_mm_allocator_flags aflags = DRM_MM_CREATE_DEFAULT;
+	struct drm_mm_node *node;
+	enum drm_mm_insert_mode mode;
 	unsigned long lpfn;
 	int ret;
 
@@ -68,16 +67,15 @@ static int ttm_bo_man_get_node(struct ttm_mem_type_manager *man,
 	if (!node)
 		return -ENOMEM;
 
-	if (place->flags & TTM_PL_FLAG_TOPDOWN) {
-		sflags = DRM_MM_SEARCH_BELOW;
-		aflags = DRM_MM_CREATE_TOP;
-	}
+	mode = DRM_MM_INSERT_BEST;
+	if (place->flags & TTM_PL_FLAG_TOPDOWN)
+		mode = DRM_MM_INSERT_HIGH;
 
 	spin_lock(&rman->lock);
-	ret = drm_mm_insert_node_in_range_generic(mm, node, mem->num_pages,
+	ret = drm_mm_insert_node_in_range(mm, node,
+					  mem->num_pages,
 					  mem->page_alignment, 0,
-					  place->fpfn, lpfn,
-					  sflags, aflags);
+					  place->fpfn, lpfn, mode);
 	spin_unlock(&rman->lock);
 
 	if (unlikely(ret)) {
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 68ef993ab431..35ffb3754feb 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -43,7 +43,6 @@
 #define TTM_BO_VM_NUM_PREFAULT 16
 
 static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
-				struct vm_area_struct *vma,
 				struct vm_fault *vmf)
 {
 	int ret = 0;
@@ -66,8 +65,11 @@ static int ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
 		if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
 			goto out_unlock;
 
-		up_read(&vma->vm_mm->mmap_sem);
+		ttm_bo_reference(bo);
+		up_read(&vmf->vma->vm_mm->mmap_sem);
 		(void) dma_fence_wait(bo->moving, true);
+		ttm_bo_unreserve(bo);
+		ttm_bo_unref(&bo);
 		goto out_unlock;
 	}
 
@@ -89,8 +91,9 @@ out_unlock:
 	return ret;
 }
 
-static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int ttm_bo_vm_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
 	    vma->vm_private_data;
 	struct ttm_bo_device *bdev = bo->bdev;
@@ -120,8 +123,10 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 
 		if (vmf->flags & FAULT_FLAG_ALLOW_RETRY) {
 			if (!(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
-				up_read(&vma->vm_mm->mmap_sem);
+				ttm_bo_reference(bo);
+				up_read(&vmf->vma->vm_mm->mmap_sem);
 				(void) ttm_bo_wait_unreserved(bo);
+				ttm_bo_unref(&bo);
 			}
 
 			return VM_FAULT_RETRY;
@@ -163,9 +168,16 @@ static int ttm_bo_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
 	 * Wait for buffer data in transit, due to a pipelined
 	 * move.
 	 */
-	ret = ttm_bo_vm_fault_idle(bo, vma, vmf);
+	ret = ttm_bo_vm_fault_idle(bo, vmf);
 	if (unlikely(ret != 0)) {
 		retval = ret;
+
+		if (retval == VM_FAULT_RETRY &&
+		    !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
+			/* The BO has already been unreserved. */
+			return retval;
+		}
+
 		goto out_unlock;
 	}
 
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index d35bc491e8de..5e1bcabffef5 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -48,9 +48,7 @@ static void ttm_eu_del_from_lru_locked(struct list_head *list)
 
 	list_for_each_entry(entry, list, head) {
 		struct ttm_buffer_object *bo = entry->bo;
-		unsigned put_count = ttm_bo_del_from_lru(bo);
-
-		ttm_bo_list_ref_sub(bo, put_count, true);
+		ttm_bo_del_from_lru(bo);
 	}
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c
index f154fb1929bd..913f4318cdc0 100644
--- a/drivers/gpu/drm/ttm/ttm_lock.c
+++ b/drivers/gpu/drm/ttm/ttm_lock.c
@@ -33,7 +33,7 @@
 #include <linux/atomic.h>
 #include <linux/errno.h>
 #include <linux/wait.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/module.h>
 
 #define TTM_WRITE_LOCK_PENDING    (1 << 0)
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 4f5fa8d65fe9..fdb451e3ec01 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -304,7 +304,7 @@ bool ttm_ref_object_exists(struct ttm_object_file *tfile,
 	 * Verify that the ref->obj pointer was actually valid!
 	 */
 	rmb();
-	if (unlikely(atomic_read(&ref->kref.refcount) == 0))
+	if (unlikely(kref_read(&ref->kref) == 0))
 		goto out_false;
 
 	rcu_read_unlock();
diff --git a/drivers/gpu/drm/udl/udl_drv.h b/drivers/gpu/drm/udl/udl_drv.h
index 6c4286e57362..2a75ab80527a 100644
--- a/drivers/gpu/drm/udl/udl_drv.h
+++ b/drivers/gpu/drm/udl/udl_drv.h
@@ -134,7 +134,7 @@ void udl_gem_put_pages(struct udl_gem_object *obj);
 int udl_gem_vmap(struct udl_gem_object *obj);
 void udl_gem_vunmap(struct udl_gem_object *obj);
 int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma);
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf);
+int udl_gem_fault(struct vm_fault *vmf);
 
 int udl_handle_damage(struct udl_framebuffer *fb, int x, int y,
 		      int width, int height);
diff --git a/drivers/gpu/drm/udl/udl_fb.c b/drivers/gpu/drm/udl/udl_fb.c
index b8dc06d68777..8e8d60e9a1a2 100644
--- a/drivers/gpu/drm/udl/udl_fb.c
+++ b/drivers/gpu/drm/udl/udl_fb.c
@@ -441,8 +441,7 @@ int udl_fbdev_init(struct drm_device *dev)
 
 	drm_fb_helper_prepare(dev, &ufbdev->helper, &udl_fb_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, &ufbdev->helper,
-				 1, 1);
+	ret = drm_fb_helper_init(dev, &ufbdev->helper, 1);
 	if (ret)
 		goto free;
 
diff --git a/drivers/gpu/drm/udl/udl_gem.c b/drivers/gpu/drm/udl/udl_gem.c
index 3c0c4bd3f750..775c50e4f02c 100644
--- a/drivers/gpu/drm/udl/udl_gem.c
+++ b/drivers/gpu/drm/udl/udl_gem.c
@@ -100,8 +100,9 @@ int udl_drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	return ret;
 }
 
-int udl_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+int udl_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct udl_gem_object *obj = to_udl_bo(vma->vm_private_data);
 	struct page *page;
 	unsigned int page_offset;
diff --git a/drivers/gpu/drm/vc4/Kconfig b/drivers/gpu/drm/vc4/Kconfig
index e53df59cb139..e1517d07cb7d 100644
--- a/drivers/gpu/drm/vc4/Kconfig
+++ b/drivers/gpu/drm/vc4/Kconfig
@@ -2,10 +2,12 @@ config DRM_VC4
 	tristate "Broadcom VC4 Graphics"
 	depends on ARCH_BCM2835 || COMPILE_TEST
 	depends on DRM
+	depends on COMMON_CLK
 	select DRM_KMS_HELPER
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_PANEL
+	select DRM_MIPI_DSI
 	help
 	  Choose this option if you have a system that has a Broadcom
 	  VC4 GPU, such as the Raspberry Pi or other BCM2708/BCM2835.
diff --git a/drivers/gpu/drm/vc4/Makefile b/drivers/gpu/drm/vc4/Makefile
index 7757f69a8a77..61f45d122bd0 100644
--- a/drivers/gpu/drm/vc4/Makefile
+++ b/drivers/gpu/drm/vc4/Makefile
@@ -8,6 +8,7 @@ vc4-y := \
 	vc4_crtc.o \
 	vc4_drv.o \
 	vc4_dpi.o \
+	vc4_dsi.o \
 	vc4_kms.o \
 	vc4_gem.o \
 	vc4_hdmi.o \
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 75b708b36890..0c06844af445 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -348,38 +348,40 @@ static u32 vc4_get_fifo_full_level(u32 format)
 }
 
 /*
- * Returns the clock select bit for the connector attached to the
- * CRTC.
+ * Returns the encoder attached to the CRTC.
+ *
+ * VC4 can only scan out to one encoder at a time, while the DRM core
+ * allows drivers to push pixels to more than one encoder from the
+ * same CRTC.
  */
-static int vc4_get_clock_select(struct drm_crtc *crtc)
+static struct drm_encoder *vc4_get_crtc_encoder(struct drm_crtc *crtc)
 {
 	struct drm_connector *connector;
 
 	drm_for_each_connector(connector, crtc->dev) {
 		if (connector->state->crtc == crtc) {
-			struct drm_encoder *encoder = connector->encoder;
-			struct vc4_encoder *vc4_encoder =
-				to_vc4_encoder(encoder);
-
-			return vc4_encoder->clock_select;
+			return connector->encoder;
 		}
 	}
 
-	return -1;
+	return NULL;
 }
 
 static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct vc4_dev *vc4 = to_vc4_dev(dev);
+	struct drm_encoder *encoder = vc4_get_crtc_encoder(crtc);
+	struct vc4_encoder *vc4_encoder = to_vc4_encoder(encoder);
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	struct drm_crtc_state *state = crtc->state;
 	struct drm_display_mode *mode = &state->adjusted_mode;
 	bool interlace = mode->flags & DRM_MODE_FLAG_INTERLACE;
 	u32 pixel_rep = (mode->flags & DRM_MODE_FLAG_DBLCLK) ? 2 : 1;
-	u32 format = PV_CONTROL_FORMAT_24;
+	bool is_dsi = (vc4_encoder->type == VC4_ENCODER_TYPE_DSI0 ||
+		       vc4_encoder->type == VC4_ENCODER_TYPE_DSI1);
+	u32 format = is_dsi ? PV_CONTROL_FORMAT_DSIV_24 : PV_CONTROL_FORMAT_24;
 	bool debug_dump_regs = false;
-	int clock_select = vc4_get_clock_select(crtc);
 
 	if (debug_dump_regs) {
 		DRM_INFO("CRTC %d regs before:\n", drm_crtc_index(crtc));
@@ -435,17 +437,19 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 		 */
 		CRTC_WRITE(PV_V_CONTROL,
 			   PV_VCONTROL_CONTINUOUS |
+			   (is_dsi ? PV_VCONTROL_DSI : 0) |
 			   PV_VCONTROL_INTERLACE |
 			   VC4_SET_FIELD(mode->htotal * pixel_rep / 2,
 					 PV_VCONTROL_ODD_DELAY));
 		CRTC_WRITE(PV_VSYNCD_EVEN, 0);
 	} else {
-		CRTC_WRITE(PV_V_CONTROL, PV_VCONTROL_CONTINUOUS);
+		CRTC_WRITE(PV_V_CONTROL,
+			   PV_VCONTROL_CONTINUOUS |
+			   (is_dsi ? PV_VCONTROL_DSI : 0));
 	}
 
 	CRTC_WRITE(PV_HACT_ACT, mode->hdisplay * pixel_rep);
 
-
 	CRTC_WRITE(PV_CONTROL,
 		   VC4_SET_FIELD(format, PV_CONTROL_FORMAT) |
 		   VC4_SET_FIELD(vc4_get_fifo_full_level(format),
@@ -454,7 +458,8 @@ static void vc4_crtc_mode_set_nofb(struct drm_crtc *crtc)
 		   PV_CONTROL_CLR_AT_START |
 		   PV_CONTROL_TRIGGER_UNDERFLOW |
 		   PV_CONTROL_WAIT_HSTART |
-		   VC4_SET_FIELD(clock_select, PV_CONTROL_CLK_SELECT) |
+		   VC4_SET_FIELD(vc4_encoder->clock_select,
+				 PV_CONTROL_CLK_SELECT) |
 		   PV_CONTROL_FIFO_CLR |
 		   PV_CONTROL_EN);
 
@@ -588,7 +593,7 @@ static int vc4_crtc_atomic_check(struct drm_crtc *crtc,
 
 	spin_lock_irqsave(&vc4->hvs->mm_lock, flags);
 	ret = drm_mm_insert_node(&vc4->hvs->dlist_mm, &vc4_state->mm,
-				 dlist_count, 1, 0);
+				 dlist_count);
 	spin_unlock_irqrestore(&vc4->hvs->mm_lock, flags);
 	if (ret)
 		return ret;
@@ -838,7 +843,7 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
 
 	}
 
-	__drm_atomic_helper_crtc_destroy_state(state);
+	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
 
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
diff --git a/drivers/gpu/drm/vc4/vc4_debugfs.c b/drivers/gpu/drm/vc4/vc4_debugfs.c
index c4d5e6a8d6f2..5db06bdb5f27 100644
--- a/drivers/gpu/drm/vc4/vc4_debugfs.c
+++ b/drivers/gpu/drm/vc4/vc4_debugfs.c
@@ -18,6 +18,7 @@
 static const struct drm_info_list vc4_debugfs_list[] = {
 	{"bo_stats", vc4_bo_stats_debugfs, 0},
 	{"dpi_regs", vc4_dpi_debugfs_regs, 0},
+	{"dsi1_regs", vc4_dsi_debugfs_regs, 0, (void *)(uintptr_t)1},
 	{"hdmi_regs", vc4_hdmi_debugfs_regs, 0},
 	{"vec_regs", vc4_vec_debugfs_regs, 0},
 	{"hvs_regs", vc4_hvs_debugfs_regs, 0},
diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c
index e4f42ebee517..a459745e96f7 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.c
+++ b/drivers/gpu/drm/vc4/vc4_drv.c
@@ -295,6 +295,7 @@ static struct platform_driver *const component_drivers[] = {
 	&vc4_hdmi_driver,
 	&vc4_vec_driver,
 	&vc4_dpi_driver,
+	&vc4_dsi_driver,
 	&vc4_hvs_driver,
 	&vc4_crtc_driver,
 	&vc4_v3d_driver,
diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h
index 78e3e5a43fb0..0e59f3ee1b83 100644
--- a/drivers/gpu/drm/vc4/vc4_drv.h
+++ b/drivers/gpu/drm/vc4/vc4_drv.h
@@ -18,6 +18,7 @@ struct vc4_dev {
 	struct vc4_hvs *hvs;
 	struct vc4_v3d *v3d;
 	struct vc4_dpi *dpi;
+	struct vc4_dsi *dsi1;
 	struct vc4_vec *vec;
 
 	struct drm_fbdev_cma *fbdev;
@@ -465,6 +466,10 @@ void __iomem *vc4_ioremap_regs(struct platform_device *dev, int index);
 extern struct platform_driver vc4_dpi_driver;
 int vc4_dpi_debugfs_regs(struct seq_file *m, void *unused);
 
+/* vc4_dsi.c */
+extern struct platform_driver vc4_dsi_driver;
+int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused);
+
 /* vc4_gem.c */
 void vc4_gem_init(struct drm_device *dev);
 void vc4_gem_destroy(struct drm_device *dev);
diff --git a/drivers/gpu/drm/vc4/vc4_dsi.c b/drivers/gpu/drm/vc4/vc4_dsi.c
new file mode 100644
index 000000000000..2736b0331beb
--- /dev/null
+++ b/drivers/gpu/drm/vc4/vc4_dsi.c
@@ -0,0 +1,1725 @@
+/*
+ * Copyright (C) 2016 Broadcom
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/**
+ * DOC: VC4 DSI0/DSI1 module
+ *
+ * BCM2835 contains two DSI modules, DSI0 and DSI1.  DSI0 is a
+ * single-lane DSI controller, while DSI1 is a more modern 4-lane DSI
+ * controller.
+ *
+ * Most Raspberry Pi boards expose DSI1 as their "DISPLAY" connector,
+ * while the compute module brings both DSI0 and DSI1 out.
+ *
+ * This driver has been tested for DSI1 video-mode display only
+ * currently, with most of the information necessary for DSI0
+ * hopefully present.
+ */
+
+#include "drm_atomic_helper.h"
+#include "drm_crtc_helper.h"
+#include "drm_edid.h"
+#include "drm_mipi_dsi.h"
+#include "drm_panel.h"
+#include "linux/clk.h"
+#include "linux/clk-provider.h"
+#include "linux/completion.h"
+#include "linux/component.h"
+#include "linux/dmaengine.h"
+#include "linux/i2c.h"
+#include "linux/of_address.h"
+#include "linux/of_platform.h"
+#include "linux/pm_runtime.h"
+#include "vc4_drv.h"
+#include "vc4_regs.h"
+
+#define DSI_CMD_FIFO_DEPTH  16
+#define DSI_PIX_FIFO_DEPTH 256
+#define DSI_PIX_FIFO_WIDTH   4
+
+#define DSI0_CTRL		0x00
+
+/* Command packet control. */
+#define DSI0_TXPKT1C		0x04 /* AKA PKTC */
+#define DSI1_TXPKT1C		0x04
+# define DSI_TXPKT1C_TRIG_CMD_MASK	VC4_MASK(31, 24)
+# define DSI_TXPKT1C_TRIG_CMD_SHIFT	24
+# define DSI_TXPKT1C_CMD_REPEAT_MASK	VC4_MASK(23, 10)
+# define DSI_TXPKT1C_CMD_REPEAT_SHIFT	10
+
+# define DSI_TXPKT1C_DISPLAY_NO_MASK	VC4_MASK(9, 8)
+# define DSI_TXPKT1C_DISPLAY_NO_SHIFT	8
+/* Short, trigger, BTA, or a long packet that fits all in CMDFIFO. */
+# define DSI_TXPKT1C_DISPLAY_NO_SHORT		0
+/* Primary display where cmdfifo provides part of the payload and
+ * pixelvalve the rest.
+ */
+# define DSI_TXPKT1C_DISPLAY_NO_PRIMARY		1
+/* Secondary display where cmdfifo provides part of the payload and
+ * pixfifo the rest.
+ */
+# define DSI_TXPKT1C_DISPLAY_NO_SECONDARY	2
+
+# define DSI_TXPKT1C_CMD_TX_TIME_MASK	VC4_MASK(7, 6)
+# define DSI_TXPKT1C_CMD_TX_TIME_SHIFT	6
+
+# define DSI_TXPKT1C_CMD_CTRL_MASK	VC4_MASK(5, 4)
+# define DSI_TXPKT1C_CMD_CTRL_SHIFT	4
+/* Command only.  Uses TXPKT1H and DISPLAY_NO */
+# define DSI_TXPKT1C_CMD_CTRL_TX	0
+/* Command with BTA for either ack or read data. */
+# define DSI_TXPKT1C_CMD_CTRL_RX	1
+/* Trigger according to TRIG_CMD */
+# define DSI_TXPKT1C_CMD_CTRL_TRIG	2
+/* BTA alone for getting error status after a command, or a TE trigger
+ * without a previous command.
+ */
+# define DSI_TXPKT1C_CMD_CTRL_BTA	3
+
+# define DSI_TXPKT1C_CMD_MODE_LP	BIT(3)
+# define DSI_TXPKT1C_CMD_TYPE_LONG	BIT(2)
+# define DSI_TXPKT1C_CMD_TE_EN		BIT(1)
+# define DSI_TXPKT1C_CMD_EN		BIT(0)
+
+/* Command packet header. */
+#define DSI0_TXPKT1H		0x08 /* AKA PKTH */
+#define DSI1_TXPKT1H		0x08
+# define DSI_TXPKT1H_BC_CMDFIFO_MASK	VC4_MASK(31, 24)
+# define DSI_TXPKT1H_BC_CMDFIFO_SHIFT	24
+# define DSI_TXPKT1H_BC_PARAM_MASK	VC4_MASK(23, 8)
+# define DSI_TXPKT1H_BC_PARAM_SHIFT	8
+# define DSI_TXPKT1H_BC_DT_MASK		VC4_MASK(7, 0)
+# define DSI_TXPKT1H_BC_DT_SHIFT	0
+
+#define DSI0_RXPKT1H		0x0c /* AKA RX1_PKTH */
+#define DSI1_RXPKT1H		0x14
+# define DSI_RXPKT1H_CRC_ERR		BIT(31)
+# define DSI_RXPKT1H_DET_ERR		BIT(30)
+# define DSI_RXPKT1H_ECC_ERR		BIT(29)
+# define DSI_RXPKT1H_COR_ERR		BIT(28)
+# define DSI_RXPKT1H_INCOMP_PKT		BIT(25)
+# define DSI_RXPKT1H_PKT_TYPE_LONG	BIT(24)
+/* Byte count if DSI_RXPKT1H_PKT_TYPE_LONG */
+# define DSI_RXPKT1H_BC_PARAM_MASK	VC4_MASK(23, 8)
+# define DSI_RXPKT1H_BC_PARAM_SHIFT	8
+/* Short return bytes if !DSI_RXPKT1H_PKT_TYPE_LONG */
+# define DSI_RXPKT1H_SHORT_1_MASK	VC4_MASK(23, 16)
+# define DSI_RXPKT1H_SHORT_1_SHIFT	16
+# define DSI_RXPKT1H_SHORT_0_MASK	VC4_MASK(15, 8)
+# define DSI_RXPKT1H_SHORT_0_SHIFT	8
+# define DSI_RXPKT1H_DT_LP_CMD_MASK	VC4_MASK(7, 0)
+# define DSI_RXPKT1H_DT_LP_CMD_SHIFT	0
+
+#define DSI0_RXPKT2H		0x10 /* AKA RX2_PKTH */
+#define DSI1_RXPKT2H		0x18
+# define DSI_RXPKT1H_DET_ERR		BIT(30)
+# define DSI_RXPKT1H_ECC_ERR		BIT(29)
+# define DSI_RXPKT1H_COR_ERR		BIT(28)
+# define DSI_RXPKT1H_INCOMP_PKT		BIT(25)
+# define DSI_RXPKT1H_BC_PARAM_MASK	VC4_MASK(23, 8)
+# define DSI_RXPKT1H_BC_PARAM_SHIFT	8
+# define DSI_RXPKT1H_DT_MASK		VC4_MASK(7, 0)
+# define DSI_RXPKT1H_DT_SHIFT		0
+
+#define DSI0_TXPKT_CMD_FIFO	0x14 /* AKA CMD_DATAF */
+#define DSI1_TXPKT_CMD_FIFO	0x1c
+
+#define DSI0_DISP0_CTRL		0x18
+# define DSI_DISP0_PIX_CLK_DIV_MASK	VC4_MASK(21, 13)
+# define DSI_DISP0_PIX_CLK_DIV_SHIFT	13
+# define DSI_DISP0_LP_STOP_CTRL_MASK	VC4_MASK(12, 11)
+# define DSI_DISP0_LP_STOP_CTRL_SHIFT	11
+# define DSI_DISP0_LP_STOP_DISABLE	0
+# define DSI_DISP0_LP_STOP_PERLINE	1
+# define DSI_DISP0_LP_STOP_PERFRAME	2
+
+/* Transmit RGB pixels and null packets only during HACTIVE, instead
+ * of going to LP-STOP.
+ */
+# define DSI_DISP_HACTIVE_NULL		BIT(10)
+/* Transmit blanking packet only during vblank, instead of allowing LP-STOP. */
+# define DSI_DISP_VBLP_CTRL		BIT(9)
+/* Transmit blanking packet only during HFP, instead of allowing LP-STOP. */
+# define DSI_DISP_HFP_CTRL		BIT(8)
+/* Transmit blanking packet only during HBP, instead of allowing LP-STOP. */
+# define DSI_DISP_HBP_CTRL		BIT(7)
+# define DSI_DISP0_CHANNEL_MASK		VC4_MASK(6, 5)
+# define DSI_DISP0_CHANNEL_SHIFT	5
+/* Enables end events for HSYNC/VSYNC, not just start events. */
+# define DSI_DISP0_ST_END		BIT(4)
+# define DSI_DISP0_PFORMAT_MASK		VC4_MASK(3, 2)
+# define DSI_DISP0_PFORMAT_SHIFT	2
+# define DSI_PFORMAT_RGB565		0
+# define DSI_PFORMAT_RGB666_PACKED	1
+# define DSI_PFORMAT_RGB666		2
+# define DSI_PFORMAT_RGB888		3
+/* Default is VIDEO mode. */
+# define DSI_DISP0_COMMAND_MODE		BIT(1)
+# define DSI_DISP0_ENABLE		BIT(0)
+
+#define DSI0_DISP1_CTRL		0x1c
+#define DSI1_DISP1_CTRL		0x2c
+/* Format of the data written to TXPKT_PIX_FIFO. */
+# define DSI_DISP1_PFORMAT_MASK		VC4_MASK(2, 1)
+# define DSI_DISP1_PFORMAT_SHIFT	1
+# define DSI_DISP1_PFORMAT_16BIT	0
+# define DSI_DISP1_PFORMAT_24BIT	1
+# define DSI_DISP1_PFORMAT_32BIT_LE	2
+# define DSI_DISP1_PFORMAT_32BIT_BE	3
+
+/* DISP1 is always command mode. */
+# define DSI_DISP1_ENABLE		BIT(0)
+
+#define DSI0_TXPKT_PIX_FIFO		0x20 /* AKA PIX_FIFO */
+
+#define DSI0_INT_STAT		0x24
+#define DSI0_INT_EN		0x28
+# define DSI1_INT_PHY_D3_ULPS		BIT(30)
+# define DSI1_INT_PHY_D3_STOP		BIT(29)
+# define DSI1_INT_PHY_D2_ULPS		BIT(28)
+# define DSI1_INT_PHY_D2_STOP		BIT(27)
+# define DSI1_INT_PHY_D1_ULPS		BIT(26)
+# define DSI1_INT_PHY_D1_STOP		BIT(25)
+# define DSI1_INT_PHY_D0_ULPS		BIT(24)
+# define DSI1_INT_PHY_D0_STOP		BIT(23)
+# define DSI1_INT_FIFO_ERR		BIT(22)
+# define DSI1_INT_PHY_DIR_RTF		BIT(21)
+# define DSI1_INT_PHY_RXLPDT		BIT(20)
+# define DSI1_INT_PHY_RXTRIG		BIT(19)
+# define DSI1_INT_PHY_D0_LPDT		BIT(18)
+# define DSI1_INT_PHY_DIR_FTR		BIT(17)
+
+/* Signaled when the clock lane enters the given state. */
+# define DSI1_INT_PHY_CLOCK_ULPS	BIT(16)
+# define DSI1_INT_PHY_CLOCK_HS		BIT(15)
+# define DSI1_INT_PHY_CLOCK_STOP	BIT(14)
+
+/* Signaled on timeouts */
+# define DSI1_INT_PR_TO			BIT(13)
+# define DSI1_INT_TA_TO			BIT(12)
+# define DSI1_INT_LPRX_TO		BIT(11)
+# define DSI1_INT_HSTX_TO		BIT(10)
+
+/* Contention on a line when trying to drive the line low */
+# define DSI1_INT_ERR_CONT_LP1		BIT(9)
+# define DSI1_INT_ERR_CONT_LP0		BIT(8)
+
+/* Control error: incorrect line state sequence on data lane 0. */
+# define DSI1_INT_ERR_CONTROL		BIT(7)
+/* LPDT synchronization error (bits received not a multiple of 8. */
+
+# define DSI1_INT_ERR_SYNC_ESC		BIT(6)
+/* Signaled after receiving an error packet from the display in
+ * response to a read.
+ */
+# define DSI1_INT_RXPKT2		BIT(5)
+/* Signaled after receiving a packet.  The header and optional short
+ * response will be in RXPKT1H, and a long response will be in the
+ * RXPKT_FIFO.
+ */
+# define DSI1_INT_RXPKT1		BIT(4)
+# define DSI1_INT_TXPKT2_DONE		BIT(3)
+# define DSI1_INT_TXPKT2_END		BIT(2)
+/* Signaled after all repeats of TXPKT1 are transferred. */
+# define DSI1_INT_TXPKT1_DONE		BIT(1)
+/* Signaled after each TXPKT1 repeat is scheduled. */
+# define DSI1_INT_TXPKT1_END		BIT(0)
+
+#define DSI1_INTERRUPTS_ALWAYS_ENABLED	(DSI1_INT_ERR_SYNC_ESC | \
+					 DSI1_INT_ERR_CONTROL |	 \
+					 DSI1_INT_ERR_CONT_LP0 | \
+					 DSI1_INT_ERR_CONT_LP1 | \
+					 DSI1_INT_HSTX_TO |	 \
+					 DSI1_INT_LPRX_TO |	 \
+					 DSI1_INT_TA_TO |	 \
+					 DSI1_INT_PR_TO)
+
+#define DSI0_STAT		0x2c
+#define DSI0_HSTX_TO_CNT	0x30
+#define DSI0_LPRX_TO_CNT	0x34
+#define DSI0_TA_TO_CNT		0x38
+#define DSI0_PR_TO_CNT		0x3c
+#define DSI0_PHYC		0x40
+# define DSI1_PHYC_ESC_CLK_LPDT_MASK	VC4_MASK(25, 20)
+# define DSI1_PHYC_ESC_CLK_LPDT_SHIFT	20
+# define DSI1_PHYC_HS_CLK_CONTINUOUS	BIT(18)
+# define DSI0_PHYC_ESC_CLK_LPDT_MASK	VC4_MASK(17, 12)
+# define DSI0_PHYC_ESC_CLK_LPDT_SHIFT	12
+# define DSI1_PHYC_CLANE_ULPS		BIT(17)
+# define DSI1_PHYC_CLANE_ENABLE		BIT(16)
+# define DSI_PHYC_DLANE3_ULPS		BIT(13)
+# define DSI_PHYC_DLANE3_ENABLE		BIT(12)
+# define DSI0_PHYC_HS_CLK_CONTINUOUS	BIT(10)
+# define DSI0_PHYC_CLANE_ULPS		BIT(9)
+# define DSI_PHYC_DLANE2_ULPS		BIT(9)
+# define DSI0_PHYC_CLANE_ENABLE		BIT(8)
+# define DSI_PHYC_DLANE2_ENABLE		BIT(8)
+# define DSI_PHYC_DLANE1_ULPS		BIT(5)
+# define DSI_PHYC_DLANE1_ENABLE		BIT(4)
+# define DSI_PHYC_DLANE0_FORCE_STOP	BIT(2)
+# define DSI_PHYC_DLANE0_ULPS		BIT(1)
+# define DSI_PHYC_DLANE0_ENABLE		BIT(0)
+
+#define DSI0_HS_CLT0		0x44
+#define DSI0_HS_CLT1		0x48
+#define DSI0_HS_CLT2		0x4c
+#define DSI0_HS_DLT3		0x50
+#define DSI0_HS_DLT4		0x54
+#define DSI0_HS_DLT5		0x58
+#define DSI0_HS_DLT6		0x5c
+#define DSI0_HS_DLT7		0x60
+
+#define DSI0_PHY_AFEC0		0x64
+# define DSI0_PHY_AFEC0_DDR2CLK_EN		BIT(26)
+# define DSI0_PHY_AFEC0_DDRCLK_EN		BIT(25)
+# define DSI0_PHY_AFEC0_LATCH_ULPS		BIT(24)
+# define DSI1_PHY_AFEC0_IDR_DLANE3_MASK		VC4_MASK(31, 29)
+# define DSI1_PHY_AFEC0_IDR_DLANE3_SHIFT	29
+# define DSI1_PHY_AFEC0_IDR_DLANE2_MASK		VC4_MASK(28, 26)
+# define DSI1_PHY_AFEC0_IDR_DLANE2_SHIFT	26
+# define DSI1_PHY_AFEC0_IDR_DLANE1_MASK		VC4_MASK(27, 23)
+# define DSI1_PHY_AFEC0_IDR_DLANE1_SHIFT	23
+# define DSI1_PHY_AFEC0_IDR_DLANE0_MASK		VC4_MASK(22, 20)
+# define DSI1_PHY_AFEC0_IDR_DLANE0_SHIFT	20
+# define DSI1_PHY_AFEC0_IDR_CLANE_MASK		VC4_MASK(19, 17)
+# define DSI1_PHY_AFEC0_IDR_CLANE_SHIFT		17
+# define DSI0_PHY_AFEC0_ACTRL_DLANE1_MASK	VC4_MASK(23, 20)
+# define DSI0_PHY_AFEC0_ACTRL_DLANE1_SHIFT	20
+# define DSI0_PHY_AFEC0_ACTRL_DLANE0_MASK	VC4_MASK(19, 16)
+# define DSI0_PHY_AFEC0_ACTRL_DLANE0_SHIFT	16
+# define DSI0_PHY_AFEC0_ACTRL_CLANE_MASK	VC4_MASK(15, 12)
+# define DSI0_PHY_AFEC0_ACTRL_CLANE_SHIFT	12
+# define DSI1_PHY_AFEC0_DDR2CLK_EN		BIT(16)
+# define DSI1_PHY_AFEC0_DDRCLK_EN		BIT(15)
+# define DSI1_PHY_AFEC0_LATCH_ULPS		BIT(14)
+# define DSI1_PHY_AFEC0_RESET			BIT(13)
+# define DSI1_PHY_AFEC0_PD			BIT(12)
+# define DSI0_PHY_AFEC0_RESET			BIT(11)
+# define DSI1_PHY_AFEC0_PD_BG			BIT(11)
+# define DSI0_PHY_AFEC0_PD			BIT(10)
+# define DSI1_PHY_AFEC0_PD_DLANE3		BIT(10)
+# define DSI0_PHY_AFEC0_PD_BG			BIT(9)
+# define DSI1_PHY_AFEC0_PD_DLANE2		BIT(9)
+# define DSI0_PHY_AFEC0_PD_DLANE1		BIT(8)
+# define DSI1_PHY_AFEC0_PD_DLANE1		BIT(8)
+# define DSI_PHY_AFEC0_PTATADJ_MASK		VC4_MASK(7, 4)
+# define DSI_PHY_AFEC0_PTATADJ_SHIFT		4
+# define DSI_PHY_AFEC0_CTATADJ_MASK		VC4_MASK(3, 0)
+# define DSI_PHY_AFEC0_CTATADJ_SHIFT		0
+
+#define DSI0_PHY_AFEC1		0x68
+# define DSI0_PHY_AFEC1_IDR_DLANE1_MASK		VC4_MASK(10, 8)
+# define DSI0_PHY_AFEC1_IDR_DLANE1_SHIFT	8
+# define DSI0_PHY_AFEC1_IDR_DLANE0_MASK		VC4_MASK(6, 4)
+# define DSI0_PHY_AFEC1_IDR_DLANE0_SHIFT	4
+# define DSI0_PHY_AFEC1_IDR_CLANE_MASK		VC4_MASK(2, 0)
+# define DSI0_PHY_AFEC1_IDR_CLANE_SHIFT		0
+
+#define DSI0_TST_SEL		0x6c
+#define DSI0_TST_MON		0x70
+#define DSI0_ID			0x74
+# define DSI_ID_VALUE		0x00647369
+
+#define DSI1_CTRL		0x00
+# define DSI_CTRL_HS_CLKC_MASK		VC4_MASK(15, 14)
+# define DSI_CTRL_HS_CLKC_SHIFT		14
+# define DSI_CTRL_HS_CLKC_BYTE		0
+# define DSI_CTRL_HS_CLKC_DDR2		1
+# define DSI_CTRL_HS_CLKC_DDR		2
+
+# define DSI_CTRL_RX_LPDT_EOT_DISABLE	BIT(13)
+# define DSI_CTRL_LPDT_EOT_DISABLE	BIT(12)
+# define DSI_CTRL_HSDT_EOT_DISABLE	BIT(11)
+# define DSI_CTRL_SOFT_RESET_CFG	BIT(10)
+# define DSI_CTRL_CAL_BYTE		BIT(9)
+# define DSI_CTRL_INV_BYTE		BIT(8)
+# define DSI_CTRL_CLR_LDF		BIT(7)
+# define DSI0_CTRL_CLR_PBCF		BIT(6)
+# define DSI1_CTRL_CLR_RXF		BIT(6)
+# define DSI0_CTRL_CLR_CPBCF		BIT(5)
+# define DSI1_CTRL_CLR_PDF		BIT(5)
+# define DSI0_CTRL_CLR_PDF		BIT(4)
+# define DSI1_CTRL_CLR_CDF		BIT(4)
+# define DSI0_CTRL_CLR_CDF		BIT(3)
+# define DSI0_CTRL_CTRL2		BIT(2)
+# define DSI1_CTRL_DISABLE_DISP_CRCC	BIT(2)
+# define DSI0_CTRL_CTRL1		BIT(1)
+# define DSI1_CTRL_DISABLE_DISP_ECCC	BIT(1)
+# define DSI0_CTRL_CTRL0		BIT(0)
+# define DSI1_CTRL_EN			BIT(0)
+# define DSI0_CTRL_RESET_FIFOS		(DSI_CTRL_CLR_LDF | \
+					 DSI0_CTRL_CLR_PBCF | \
+					 DSI0_CTRL_CLR_CPBCF |	\
+					 DSI0_CTRL_CLR_PDF | \
+					 DSI0_CTRL_CLR_CDF)
+# define DSI1_CTRL_RESET_FIFOS		(DSI_CTRL_CLR_LDF | \
+					 DSI1_CTRL_CLR_RXF | \
+					 DSI1_CTRL_CLR_PDF | \
+					 DSI1_CTRL_CLR_CDF)
+
+#define DSI1_TXPKT2C		0x0c
+#define DSI1_TXPKT2H		0x10
+#define DSI1_TXPKT_PIX_FIFO	0x20
+#define DSI1_RXPKT_FIFO		0x24
+#define DSI1_DISP0_CTRL		0x28
+#define DSI1_INT_STAT		0x30
+#define DSI1_INT_EN		0x34
+/* State reporting bits.  These mostly behave like INT_STAT, where
+ * writing a 1 clears the bit.
+ */
+#define DSI1_STAT		0x38
+# define DSI1_STAT_PHY_D3_ULPS		BIT(31)
+# define DSI1_STAT_PHY_D3_STOP		BIT(30)
+# define DSI1_STAT_PHY_D2_ULPS		BIT(29)
+# define DSI1_STAT_PHY_D2_STOP		BIT(28)
+# define DSI1_STAT_PHY_D1_ULPS		BIT(27)
+# define DSI1_STAT_PHY_D1_STOP		BIT(26)
+# define DSI1_STAT_PHY_D0_ULPS		BIT(25)
+# define DSI1_STAT_PHY_D0_STOP		BIT(24)
+# define DSI1_STAT_FIFO_ERR		BIT(23)
+# define DSI1_STAT_PHY_RXLPDT		BIT(22)
+# define DSI1_STAT_PHY_RXTRIG		BIT(21)
+# define DSI1_STAT_PHY_D0_LPDT		BIT(20)
+/* Set when in forward direction */
+# define DSI1_STAT_PHY_DIR		BIT(19)
+# define DSI1_STAT_PHY_CLOCK_ULPS	BIT(18)
+# define DSI1_STAT_PHY_CLOCK_HS		BIT(17)
+# define DSI1_STAT_PHY_CLOCK_STOP	BIT(16)
+# define DSI1_STAT_PR_TO		BIT(15)
+# define DSI1_STAT_TA_TO		BIT(14)
+# define DSI1_STAT_LPRX_TO		BIT(13)
+# define DSI1_STAT_HSTX_TO		BIT(12)
+# define DSI1_STAT_ERR_CONT_LP1		BIT(11)
+# define DSI1_STAT_ERR_CONT_LP0		BIT(10)
+# define DSI1_STAT_ERR_CONTROL		BIT(9)
+# define DSI1_STAT_ERR_SYNC_ESC		BIT(8)
+# define DSI1_STAT_RXPKT2		BIT(7)
+# define DSI1_STAT_RXPKT1		BIT(6)
+# define DSI1_STAT_TXPKT2_BUSY		BIT(5)
+# define DSI1_STAT_TXPKT2_DONE		BIT(4)
+# define DSI1_STAT_TXPKT2_END		BIT(3)
+# define DSI1_STAT_TXPKT1_BUSY		BIT(2)
+# define DSI1_STAT_TXPKT1_DONE		BIT(1)
+# define DSI1_STAT_TXPKT1_END		BIT(0)
+
+#define DSI1_HSTX_TO_CNT	0x3c
+#define DSI1_LPRX_TO_CNT	0x40
+#define DSI1_TA_TO_CNT		0x44
+#define DSI1_PR_TO_CNT		0x48
+#define DSI1_PHYC		0x4c
+
+#define DSI1_HS_CLT0		0x50
+# define DSI_HS_CLT0_CZERO_MASK		VC4_MASK(26, 18)
+# define DSI_HS_CLT0_CZERO_SHIFT	18
+# define DSI_HS_CLT0_CPRE_MASK		VC4_MASK(17, 9)
+# define DSI_HS_CLT0_CPRE_SHIFT		9
+# define DSI_HS_CLT0_CPREP_MASK		VC4_MASK(8, 0)
+# define DSI_HS_CLT0_CPREP_SHIFT	0
+
+#define DSI1_HS_CLT1		0x54
+# define DSI_HS_CLT1_CTRAIL_MASK	VC4_MASK(17, 9)
+# define DSI_HS_CLT1_CTRAIL_SHIFT	9
+# define DSI_HS_CLT1_CPOST_MASK		VC4_MASK(8, 0)
+# define DSI_HS_CLT1_CPOST_SHIFT	0
+
+#define DSI1_HS_CLT2		0x58
+# define DSI_HS_CLT2_WUP_MASK		VC4_MASK(23, 0)
+# define DSI_HS_CLT2_WUP_SHIFT		0
+
+#define DSI1_HS_DLT3		0x5c
+# define DSI_HS_DLT3_EXIT_MASK		VC4_MASK(26, 18)
+# define DSI_HS_DLT3_EXIT_SHIFT		18
+# define DSI_HS_DLT3_ZERO_MASK		VC4_MASK(17, 9)
+# define DSI_HS_DLT3_ZERO_SHIFT		9
+# define DSI_HS_DLT3_PRE_MASK		VC4_MASK(8, 0)
+# define DSI_HS_DLT3_PRE_SHIFT		0
+
+#define DSI1_HS_DLT4		0x60
+# define DSI_HS_DLT4_ANLAT_MASK		VC4_MASK(22, 18)
+# define DSI_HS_DLT4_ANLAT_SHIFT	18
+# define DSI_HS_DLT4_TRAIL_MASK		VC4_MASK(17, 9)
+# define DSI_HS_DLT4_TRAIL_SHIFT	9
+# define DSI_HS_DLT4_LPX_MASK		VC4_MASK(8, 0)
+# define DSI_HS_DLT4_LPX_SHIFT		0
+
+#define DSI1_HS_DLT5		0x64
+# define DSI_HS_DLT5_INIT_MASK		VC4_MASK(23, 0)
+# define DSI_HS_DLT5_INIT_SHIFT		0
+
+#define DSI1_HS_DLT6		0x68
+# define DSI_HS_DLT6_TA_GET_MASK	VC4_MASK(31, 24)
+# define DSI_HS_DLT6_TA_GET_SHIFT	24
+# define DSI_HS_DLT6_TA_SURE_MASK	VC4_MASK(23, 16)
+# define DSI_HS_DLT6_TA_SURE_SHIFT	16
+# define DSI_HS_DLT6_TA_GO_MASK		VC4_MASK(15, 8)
+# define DSI_HS_DLT6_TA_GO_SHIFT	8
+# define DSI_HS_DLT6_LP_LPX_MASK	VC4_MASK(7, 0)
+# define DSI_HS_DLT6_LP_LPX_SHIFT	0
+
+#define DSI1_HS_DLT7		0x6c
+# define DSI_HS_DLT7_LP_WUP_MASK	VC4_MASK(23, 0)
+# define DSI_HS_DLT7_LP_WUP_SHIFT	0
+
+#define DSI1_PHY_AFEC0		0x70
+
+#define DSI1_PHY_AFEC1		0x74
+# define DSI1_PHY_AFEC1_ACTRL_DLANE3_MASK	VC4_MASK(19, 16)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE3_SHIFT	16
+# define DSI1_PHY_AFEC1_ACTRL_DLANE2_MASK	VC4_MASK(15, 12)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE2_SHIFT	12
+# define DSI1_PHY_AFEC1_ACTRL_DLANE1_MASK	VC4_MASK(11, 8)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE1_SHIFT	8
+# define DSI1_PHY_AFEC1_ACTRL_DLANE0_MASK	VC4_MASK(7, 4)
+# define DSI1_PHY_AFEC1_ACTRL_DLANE0_SHIFT	4
+# define DSI1_PHY_AFEC1_ACTRL_CLANE_MASK	VC4_MASK(3, 0)
+# define DSI1_PHY_AFEC1_ACTRL_CLANE_SHIFT	0
+
+#define DSI1_TST_SEL		0x78
+#define DSI1_TST_MON		0x7c
+#define DSI1_PHY_TST1		0x80
+#define DSI1_PHY_TST2		0x84
+#define DSI1_PHY_FIFO_STAT	0x88
+/* Actually, all registers in the range that aren't otherwise claimed
+ * will return the ID.
+ */
+#define DSI1_ID			0x8c
+
+/* General DSI hardware state. */
+struct vc4_dsi {
+	struct platform_device *pdev;
+
+	struct mipi_dsi_host dsi_host;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct drm_panel *panel;
+
+	void __iomem *regs;
+
+	struct dma_chan *reg_dma_chan;
+	dma_addr_t reg_dma_paddr;
+	u32 *reg_dma_mem;
+	dma_addr_t reg_paddr;
+
+	/* Whether we're on bcm2835's DSI0 or DSI1. */
+	int port;
+
+	/* DSI channel for the panel we're connected to. */
+	u32 channel;
+	u32 lanes;
+	enum mipi_dsi_pixel_format format;
+	u32 mode_flags;
+
+	/* Input clock from CPRMAN to the digital PHY, for the DSI
+	 * escape clock.
+	 */
+	struct clk *escape_clock;
+
+	/* Input clock to the analog PHY, used to generate the DSI bit
+	 * clock.
+	 */
+	struct clk *pll_phy_clock;
+
+	/* HS Clocks generated within the DSI analog PHY. */
+	struct clk_fixed_factor phy_clocks[3];
+
+	struct clk_hw_onecell_data *clk_onecell;
+
+	/* Pixel clock output to the pixelvalve, generated from the HS
+	 * clock.
+	 */
+	struct clk *pixel_clock;
+
+	struct completion xfer_completion;
+	int xfer_result;
+};
+
+#define host_to_dsi(host) container_of(host, struct vc4_dsi, dsi_host)
+
+static inline void
+dsi_dma_workaround_write(struct vc4_dsi *dsi, u32 offset, u32 val)
+{
+	struct dma_chan *chan = dsi->reg_dma_chan;
+	struct dma_async_tx_descriptor *tx;
+	dma_cookie_t cookie;
+	int ret;
+
+	/* DSI0 should be able to write normally. */
+	if (!chan) {
+		writel(val, dsi->regs + offset);
+		return;
+	}
+
+	*dsi->reg_dma_mem = val;
+
+	tx = chan->device->device_prep_dma_memcpy(chan,
+						  dsi->reg_paddr + offset,
+						  dsi->reg_dma_paddr,
+						  4, 0);
+	if (!tx) {
+		DRM_ERROR("Failed to set up DMA register write\n");
+		return;
+	}
+
+	cookie = tx->tx_submit(tx);
+	ret = dma_submit_error(cookie);
+	if (ret) {
+		DRM_ERROR("Failed to submit DMA: %d\n", ret);
+		return;
+	}
+	ret = dma_sync_wait(chan, cookie);
+	if (ret)
+		DRM_ERROR("Failed to wait for DMA: %d\n", ret);
+}
+
+#define DSI_READ(offset) readl(dsi->regs + (offset))
+#define DSI_WRITE(offset, val) dsi_dma_workaround_write(dsi, offset, val)
+#define DSI_PORT_READ(offset) \
+	DSI_READ(dsi->port ? DSI1_##offset : DSI0_##offset)
+#define DSI_PORT_WRITE(offset, val) \
+	DSI_WRITE(dsi->port ? DSI1_##offset : DSI0_##offset, val)
+#define DSI_PORT_BIT(bit) (dsi->port ? DSI1_##bit : DSI0_##bit)
+
+/* VC4 DSI encoder KMS struct */
+struct vc4_dsi_encoder {
+	struct vc4_encoder base;
+	struct vc4_dsi *dsi;
+};
+
+static inline struct vc4_dsi_encoder *
+to_vc4_dsi_encoder(struct drm_encoder *encoder)
+{
+	return container_of(encoder, struct vc4_dsi_encoder, base.base);
+}
+
+/* VC4 DSI connector KMS struct */
+struct vc4_dsi_connector {
+	struct drm_connector base;
+	struct vc4_dsi *dsi;
+};
+
+static inline struct vc4_dsi_connector *
+to_vc4_dsi_connector(struct drm_connector *connector)
+{
+	return container_of(connector, struct vc4_dsi_connector, base);
+}
+
+#define DSI_REG(reg) { reg, #reg }
+static const struct {
+	u32 reg;
+	const char *name;
+} dsi0_regs[] = {
+	DSI_REG(DSI0_CTRL),
+	DSI_REG(DSI0_STAT),
+	DSI_REG(DSI0_HSTX_TO_CNT),
+	DSI_REG(DSI0_LPRX_TO_CNT),
+	DSI_REG(DSI0_TA_TO_CNT),
+	DSI_REG(DSI0_PR_TO_CNT),
+	DSI_REG(DSI0_DISP0_CTRL),
+	DSI_REG(DSI0_DISP1_CTRL),
+	DSI_REG(DSI0_INT_STAT),
+	DSI_REG(DSI0_INT_EN),
+	DSI_REG(DSI0_PHYC),
+	DSI_REG(DSI0_HS_CLT0),
+	DSI_REG(DSI0_HS_CLT1),
+	DSI_REG(DSI0_HS_CLT2),
+	DSI_REG(DSI0_HS_DLT3),
+	DSI_REG(DSI0_HS_DLT4),
+	DSI_REG(DSI0_HS_DLT5),
+	DSI_REG(DSI0_HS_DLT6),
+	DSI_REG(DSI0_HS_DLT7),
+	DSI_REG(DSI0_PHY_AFEC0),
+	DSI_REG(DSI0_PHY_AFEC1),
+	DSI_REG(DSI0_ID),
+};
+
+static const struct {
+	u32 reg;
+	const char *name;
+} dsi1_regs[] = {
+	DSI_REG(DSI1_CTRL),
+	DSI_REG(DSI1_STAT),
+	DSI_REG(DSI1_HSTX_TO_CNT),
+	DSI_REG(DSI1_LPRX_TO_CNT),
+	DSI_REG(DSI1_TA_TO_CNT),
+	DSI_REG(DSI1_PR_TO_CNT),
+	DSI_REG(DSI1_DISP0_CTRL),
+	DSI_REG(DSI1_DISP1_CTRL),
+	DSI_REG(DSI1_INT_STAT),
+	DSI_REG(DSI1_INT_EN),
+	DSI_REG(DSI1_PHYC),
+	DSI_REG(DSI1_HS_CLT0),
+	DSI_REG(DSI1_HS_CLT1),
+	DSI_REG(DSI1_HS_CLT2),
+	DSI_REG(DSI1_HS_DLT3),
+	DSI_REG(DSI1_HS_DLT4),
+	DSI_REG(DSI1_HS_DLT5),
+	DSI_REG(DSI1_HS_DLT6),
+	DSI_REG(DSI1_HS_DLT7),
+	DSI_REG(DSI1_PHY_AFEC0),
+	DSI_REG(DSI1_PHY_AFEC1),
+	DSI_REG(DSI1_ID),
+};
+
+static void vc4_dsi_dump_regs(struct vc4_dsi *dsi)
+{
+	int i;
+
+	if (dsi->port == 0) {
+		for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
+			DRM_INFO("0x%04x (%s): 0x%08x\n",
+				 dsi0_regs[i].reg, dsi0_regs[i].name,
+				 DSI_READ(dsi0_regs[i].reg));
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
+			DRM_INFO("0x%04x (%s): 0x%08x\n",
+				 dsi1_regs[i].reg, dsi1_regs[i].name,
+				 DSI_READ(dsi1_regs[i].reg));
+		}
+	}
+}
+
+#ifdef CONFIG_DEBUG_FS
+int vc4_dsi_debugfs_regs(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *)m->private;
+	struct drm_device *drm = node->minor->dev;
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	int dsi_index = (uintptr_t)node->info_ent->data;
+	struct vc4_dsi *dsi = (dsi_index == 1 ? vc4->dsi1 : NULL);
+	int i;
+
+	if (!dsi)
+		return 0;
+
+	if (dsi->port == 0) {
+		for (i = 0; i < ARRAY_SIZE(dsi0_regs); i++) {
+			seq_printf(m, "0x%04x (%s): 0x%08x\n",
+				   dsi0_regs[i].reg, dsi0_regs[i].name,
+				   DSI_READ(dsi0_regs[i].reg));
+		}
+	} else {
+		for (i = 0; i < ARRAY_SIZE(dsi1_regs); i++) {
+			seq_printf(m, "0x%04x (%s): 0x%08x\n",
+				   dsi1_regs[i].reg, dsi1_regs[i].name,
+				   DSI_READ(dsi1_regs[i].reg));
+		}
+	}
+
+	return 0;
+}
+#endif
+
+static enum drm_connector_status
+vc4_dsi_connector_detect(struct drm_connector *connector, bool force)
+{
+	struct vc4_dsi_connector *vc4_connector =
+		to_vc4_dsi_connector(connector);
+	struct vc4_dsi *dsi = vc4_connector->dsi;
+
+	if (dsi->panel)
+		return connector_status_connected;
+	else
+		return connector_status_disconnected;
+}
+
+static void vc4_dsi_connector_destroy(struct drm_connector *connector)
+{
+	drm_connector_unregister(connector);
+	drm_connector_cleanup(connector);
+}
+
+static int vc4_dsi_connector_get_modes(struct drm_connector *connector)
+{
+	struct vc4_dsi_connector *vc4_connector =
+		to_vc4_dsi_connector(connector);
+	struct vc4_dsi *dsi = vc4_connector->dsi;
+
+	if (dsi->panel)
+		return drm_panel_get_modes(dsi->panel);
+
+	return 0;
+}
+
+static const struct drm_connector_funcs vc4_dsi_connector_funcs = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.detect = vc4_dsi_connector_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.destroy = vc4_dsi_connector_destroy,
+	.reset = drm_atomic_helper_connector_reset,
+	.atomic_duplicate_state = drm_atomic_helper_connector_duplicate_state,
+	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
+};
+
+static const struct drm_connector_helper_funcs vc4_dsi_connector_helper_funcs = {
+	.get_modes = vc4_dsi_connector_get_modes,
+};
+
+static struct drm_connector *vc4_dsi_connector_init(struct drm_device *dev,
+						    struct vc4_dsi *dsi)
+{
+	struct drm_connector *connector = NULL;
+	struct vc4_dsi_connector *dsi_connector;
+	int ret = 0;
+
+	dsi_connector = devm_kzalloc(dev->dev, sizeof(*dsi_connector),
+				     GFP_KERNEL);
+	if (!dsi_connector) {
+		ret = -ENOMEM;
+		goto fail;
+	}
+	connector = &dsi_connector->base;
+
+	dsi_connector->dsi = dsi;
+
+	drm_connector_init(dev, connector, &vc4_dsi_connector_funcs,
+			   DRM_MODE_CONNECTOR_DSI);
+	drm_connector_helper_add(connector, &vc4_dsi_connector_helper_funcs);
+
+	connector->polled = 0;
+	connector->interlace_allowed = 0;
+	connector->doublescan_allowed = 0;
+
+	drm_mode_connector_attach_encoder(connector, dsi->encoder);
+
+	return connector;
+
+fail:
+	if (connector)
+		vc4_dsi_connector_destroy(connector);
+
+	return ERR_PTR(ret);
+}
+
+static void vc4_dsi_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs vc4_dsi_encoder_funcs = {
+	.destroy = vc4_dsi_encoder_destroy,
+};
+
+static void vc4_dsi_latch_ulps(struct vc4_dsi *dsi, bool latch)
+{
+	u32 afec0 = DSI_PORT_READ(PHY_AFEC0);
+
+	if (latch)
+		afec0 |= DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS);
+	else
+		afec0 &= ~DSI_PORT_BIT(PHY_AFEC0_LATCH_ULPS);
+
+	DSI_PORT_WRITE(PHY_AFEC0, afec0);
+}
+
+/* Enters or exits Ultra Low Power State. */
+static void vc4_dsi_ulps(struct vc4_dsi *dsi, bool ulps)
+{
+	bool continuous = dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS;
+	u32 phyc_ulps = ((continuous ? DSI_PORT_BIT(PHYC_CLANE_ULPS) : 0) |
+			 DSI_PHYC_DLANE0_ULPS |
+			 (dsi->lanes > 1 ? DSI_PHYC_DLANE1_ULPS : 0) |
+			 (dsi->lanes > 2 ? DSI_PHYC_DLANE2_ULPS : 0) |
+			 (dsi->lanes > 3 ? DSI_PHYC_DLANE3_ULPS : 0));
+	u32 stat_ulps = ((continuous ? DSI1_STAT_PHY_CLOCK_ULPS : 0) |
+			 DSI1_STAT_PHY_D0_ULPS |
+			 (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_ULPS : 0) |
+			 (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_ULPS : 0) |
+			 (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_ULPS : 0));
+	u32 stat_stop = ((continuous ? DSI1_STAT_PHY_CLOCK_STOP : 0) |
+			 DSI1_STAT_PHY_D0_STOP |
+			 (dsi->lanes > 1 ? DSI1_STAT_PHY_D1_STOP : 0) |
+			 (dsi->lanes > 2 ? DSI1_STAT_PHY_D2_STOP : 0) |
+			 (dsi->lanes > 3 ? DSI1_STAT_PHY_D3_STOP : 0));
+	int ret;
+
+	DSI_PORT_WRITE(STAT, stat_ulps);
+	DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) | phyc_ulps);
+	ret = wait_for((DSI_PORT_READ(STAT) & stat_ulps) == stat_ulps, 200);
+	if (ret) {
+		dev_warn(&dsi->pdev->dev,
+			 "Timeout waiting for DSI ULPS entry: STAT 0x%08x",
+			 DSI_PORT_READ(STAT));
+		DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+		vc4_dsi_latch_ulps(dsi, false);
+		return;
+	}
+
+	/* The DSI module can't be disabled while the module is
+	 * generating ULPS state.  So, to be able to disable the
+	 * module, we have the AFE latch the ULPS state and continue
+	 * on to having the module enter STOP.
+	 */
+	vc4_dsi_latch_ulps(dsi, ulps);
+
+	DSI_PORT_WRITE(STAT, stat_stop);
+	DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+	ret = wait_for((DSI_PORT_READ(STAT) & stat_stop) == stat_stop, 200);
+	if (ret) {
+		dev_warn(&dsi->pdev->dev,
+			 "Timeout waiting for DSI STOP entry: STAT 0x%08x",
+			 DSI_PORT_READ(STAT));
+		DSI_PORT_WRITE(PHYC, DSI_PORT_READ(PHYC) & ~phyc_ulps);
+		return;
+	}
+}
+
+static u32
+dsi_hs_timing(u32 ui_ns, u32 ns, u32 ui)
+{
+	/* The HS timings have to be rounded up to a multiple of 8
+	 * because we're using the byte clock.
+	 */
+	return roundup(ui + DIV_ROUND_UP(ns, ui_ns), 8);
+}
+
+/* ESC always runs at 100Mhz. */
+#define ESC_TIME_NS 10
+
+static u32
+dsi_esc_timing(u32 ns)
+{
+	return DIV_ROUND_UP(ns, ESC_TIME_NS);
+}
+
+static void vc4_dsi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+	struct vc4_dsi *dsi = vc4_encoder->dsi;
+	struct device *dev = &dsi->pdev->dev;
+
+	drm_panel_disable(dsi->panel);
+
+	vc4_dsi_ulps(dsi, true);
+
+	drm_panel_unprepare(dsi->panel);
+
+	clk_disable_unprepare(dsi->pll_phy_clock);
+	clk_disable_unprepare(dsi->escape_clock);
+	clk_disable_unprepare(dsi->pixel_clock);
+
+	pm_runtime_put(dev);
+}
+
+static void vc4_dsi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct drm_display_mode *mode = &encoder->crtc->mode;
+	struct vc4_dsi_encoder *vc4_encoder = to_vc4_dsi_encoder(encoder);
+	struct vc4_dsi *dsi = vc4_encoder->dsi;
+	struct device *dev = &dsi->pdev->dev;
+	u32 format = 0, divider = 0;
+	bool debug_dump_regs = false;
+	unsigned long hs_clock;
+	u32 ui_ns;
+	/* Minimum LP state duration in escape clock cycles. */
+	u32 lpx = dsi_esc_timing(60);
+	unsigned long pixel_clock_hz = mode->clock * 1000;
+	unsigned long dsip_clock;
+	unsigned long phy_clock;
+	int ret;
+
+	ret = pm_runtime_get_sync(dev);
+	if (ret) {
+		DRM_ERROR("Failed to runtime PM enable on DSI%d\n", dsi->port);
+		return;
+	}
+
+	ret = drm_panel_prepare(dsi->panel);
+	if (ret) {
+		DRM_ERROR("Panel failed to prepare\n");
+		return;
+	}
+
+	if (debug_dump_regs) {
+		DRM_INFO("DSI regs before:\n");
+		vc4_dsi_dump_regs(dsi);
+	}
+
+	switch (dsi->format) {
+	case MIPI_DSI_FMT_RGB888:
+		format = DSI_PFORMAT_RGB888;
+		divider = 24 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+		format = DSI_PFORMAT_RGB666;
+		divider = 24 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		format = DSI_PFORMAT_RGB666_PACKED;
+		divider = 18 / dsi->lanes;
+		break;
+	case MIPI_DSI_FMT_RGB565:
+		format = DSI_PFORMAT_RGB565;
+		divider = 16 / dsi->lanes;
+		break;
+	}
+
+	phy_clock = pixel_clock_hz * divider;
+	ret = clk_set_rate(dsi->pll_phy_clock, phy_clock);
+	if (ret) {
+		dev_err(&dsi->pdev->dev,
+			"Failed to set phy clock to %ld: %d\n", phy_clock, ret);
+	}
+
+	/* Reset the DSI and all its fifos. */
+	DSI_PORT_WRITE(CTRL,
+		       DSI_CTRL_SOFT_RESET_CFG |
+		       DSI_PORT_BIT(CTRL_RESET_FIFOS));
+
+	DSI_PORT_WRITE(CTRL,
+		       DSI_CTRL_HSDT_EOT_DISABLE |
+		       DSI_CTRL_RX_LPDT_EOT_DISABLE);
+
+	/* Clear all stat bits so we see what has happened during enable. */
+	DSI_PORT_WRITE(STAT, DSI_PORT_READ(STAT));
+
+	/* Set AFE CTR00/CTR1 to release powerdown of analog. */
+	if (dsi->port == 0) {
+		u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) |
+			     VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ));
+
+		if (dsi->lanes < 2)
+			afec0 |= DSI0_PHY_AFEC0_PD_DLANE1;
+
+		if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO))
+			afec0 |= DSI0_PHY_AFEC0_RESET;
+
+		DSI_PORT_WRITE(PHY_AFEC0, afec0);
+
+		DSI_PORT_WRITE(PHY_AFEC1,
+			       VC4_SET_FIELD(6,  DSI0_PHY_AFEC1_IDR_DLANE1) |
+			       VC4_SET_FIELD(6,  DSI0_PHY_AFEC1_IDR_DLANE0) |
+			       VC4_SET_FIELD(6,  DSI0_PHY_AFEC1_IDR_CLANE));
+	} else {
+		u32 afec0 = (VC4_SET_FIELD(7, DSI_PHY_AFEC0_PTATADJ) |
+			     VC4_SET_FIELD(7, DSI_PHY_AFEC0_CTATADJ) |
+			     VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_CLANE) |
+			     VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE0) |
+			     VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE1) |
+			     VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE2) |
+			     VC4_SET_FIELD(6, DSI1_PHY_AFEC0_IDR_DLANE3));
+
+		if (dsi->lanes < 4)
+			afec0 |= DSI1_PHY_AFEC0_PD_DLANE3;
+		if (dsi->lanes < 3)
+			afec0 |= DSI1_PHY_AFEC0_PD_DLANE2;
+		if (dsi->lanes < 2)
+			afec0 |= DSI1_PHY_AFEC0_PD_DLANE1;
+
+		afec0 |= DSI1_PHY_AFEC0_RESET;
+
+		DSI_PORT_WRITE(PHY_AFEC0, afec0);
+
+		DSI_PORT_WRITE(PHY_AFEC1, 0);
+
+		/* AFEC reset hold time */
+		mdelay(1);
+	}
+
+	ret = clk_prepare_enable(dsi->escape_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on DSI escape clock: %d\n", ret);
+		return;
+	}
+
+	ret = clk_prepare_enable(dsi->pll_phy_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on DSI PLL: %d\n", ret);
+		return;
+	}
+
+	hs_clock = clk_get_rate(dsi->pll_phy_clock);
+
+	/* Yes, we set the DSI0P/DSI1P pixel clock to the byte rate,
+	 * not the pixel clock rate.  DSIxP take from the APHY's byte,
+	 * DDR2, or DDR4 clock (we use byte) and feed into the PV at
+	 * that rate.  Separately, a value derived from PIX_CLK_DIV
+	 * and HS_CLKC is fed into the PV to divide down to the actual
+	 * pixel clock for pushing pixels into DSI.
+	 */
+	dsip_clock = phy_clock / 8;
+	ret = clk_set_rate(dsi->pixel_clock, dsip_clock);
+	if (ret) {
+		dev_err(dev, "Failed to set pixel clock to %ldHz: %d\n",
+			dsip_clock, ret);
+	}
+
+	ret = clk_prepare_enable(dsi->pixel_clock);
+	if (ret) {
+		DRM_ERROR("Failed to turn on DSI pixel clock: %d\n", ret);
+		return;
+	}
+
+	/* How many ns one DSI unit interval is.  Note that the clock
+	 * is DDR, so there's an extra divide by 2.
+	 */
+	ui_ns = DIV_ROUND_UP(500000000, hs_clock);
+
+	DSI_PORT_WRITE(HS_CLT0,
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 262, 0),
+				     DSI_HS_CLT0_CZERO) |
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 0, 8),
+				     DSI_HS_CLT0_CPRE) |
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 38, 0),
+				     DSI_HS_CLT0_CPREP));
+
+	DSI_PORT_WRITE(HS_CLT1,
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 0),
+				     DSI_HS_CLT1_CTRAIL) |
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 60, 52),
+				     DSI_HS_CLT1_CPOST));
+
+	DSI_PORT_WRITE(HS_CLT2,
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000000, 0),
+				     DSI_HS_CLT2_WUP));
+
+	DSI_PORT_WRITE(HS_DLT3,
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 100, 0),
+				     DSI_HS_DLT3_EXIT) |
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 105, 6),
+				     DSI_HS_DLT3_ZERO) |
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, 40, 4),
+				     DSI_HS_DLT3_PRE));
+
+	DSI_PORT_WRITE(HS_DLT4,
+		       VC4_SET_FIELD(dsi_hs_timing(ui_ns, lpx * ESC_TIME_NS, 0),
+				     DSI_HS_DLT4_LPX) |
+		       VC4_SET_FIELD(max(dsi_hs_timing(ui_ns, 0, 8),
+					 dsi_hs_timing(ui_ns, 60, 4)),
+				     DSI_HS_DLT4_TRAIL) |
+		       VC4_SET_FIELD(0, DSI_HS_DLT4_ANLAT));
+
+	DSI_PORT_WRITE(HS_DLT5, VC4_SET_FIELD(dsi_hs_timing(ui_ns, 1000, 5000),
+					      DSI_HS_DLT5_INIT));
+
+	DSI_PORT_WRITE(HS_DLT6,
+		       VC4_SET_FIELD(lpx * 5, DSI_HS_DLT6_TA_GET) |
+		       VC4_SET_FIELD(lpx, DSI_HS_DLT6_TA_SURE) |
+		       VC4_SET_FIELD(lpx * 4, DSI_HS_DLT6_TA_GO) |
+		       VC4_SET_FIELD(lpx, DSI_HS_DLT6_LP_LPX));
+
+	DSI_PORT_WRITE(HS_DLT7,
+		       VC4_SET_FIELD(dsi_esc_timing(1000000),
+				     DSI_HS_DLT7_LP_WUP));
+
+	DSI_PORT_WRITE(PHYC,
+		       DSI_PHYC_DLANE0_ENABLE |
+		       (dsi->lanes >= 2 ? DSI_PHYC_DLANE1_ENABLE : 0) |
+		       (dsi->lanes >= 3 ? DSI_PHYC_DLANE2_ENABLE : 0) |
+		       (dsi->lanes >= 4 ? DSI_PHYC_DLANE3_ENABLE : 0) |
+		       DSI_PORT_BIT(PHYC_CLANE_ENABLE) |
+		       ((dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) ?
+			0 : DSI_PORT_BIT(PHYC_HS_CLK_CONTINUOUS)) |
+		       (dsi->port == 0 ?
+			VC4_SET_FIELD(lpx - 1, DSI0_PHYC_ESC_CLK_LPDT) :
+			VC4_SET_FIELD(lpx - 1, DSI1_PHYC_ESC_CLK_LPDT)));
+
+	DSI_PORT_WRITE(CTRL,
+		       DSI_PORT_READ(CTRL) |
+		       DSI_CTRL_CAL_BYTE);
+
+	/* HS timeout in HS clock cycles: disabled. */
+	DSI_PORT_WRITE(HSTX_TO_CNT, 0);
+	/* LP receive timeout in HS clocks. */
+	DSI_PORT_WRITE(LPRX_TO_CNT, 0xffffff);
+	/* Bus turnaround timeout */
+	DSI_PORT_WRITE(TA_TO_CNT, 100000);
+	/* Display reset sequence timeout */
+	DSI_PORT_WRITE(PR_TO_CNT, 100000);
+
+	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
+		DSI_PORT_WRITE(DISP0_CTRL,
+			       VC4_SET_FIELD(divider, DSI_DISP0_PIX_CLK_DIV) |
+			       VC4_SET_FIELD(format, DSI_DISP0_PFORMAT) |
+			       VC4_SET_FIELD(DSI_DISP0_LP_STOP_PERFRAME,
+					     DSI_DISP0_LP_STOP_CTRL) |
+			       DSI_DISP0_ST_END |
+			       DSI_DISP0_ENABLE);
+	} else {
+		DSI_PORT_WRITE(DISP0_CTRL,
+			       DSI_DISP0_COMMAND_MODE |
+			       DSI_DISP0_ENABLE);
+	}
+
+	/* Set up DISP1 for transferring long command payloads through
+	 * the pixfifo.
+	 */
+	DSI_PORT_WRITE(DISP1_CTRL,
+		       VC4_SET_FIELD(DSI_DISP1_PFORMAT_32BIT_LE,
+				     DSI_DISP1_PFORMAT) |
+		       DSI_DISP1_ENABLE);
+
+	/* Ungate the block. */
+	if (dsi->port == 0)
+		DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI0_CTRL_CTRL0);
+	else
+		DSI_PORT_WRITE(CTRL, DSI_PORT_READ(CTRL) | DSI1_CTRL_EN);
+
+	/* Bring AFE out of reset. */
+	if (dsi->port == 0) {
+	} else {
+		DSI_PORT_WRITE(PHY_AFEC0,
+			       DSI_PORT_READ(PHY_AFEC0) &
+			       ~DSI1_PHY_AFEC0_RESET);
+	}
+
+	vc4_dsi_ulps(dsi, false);
+
+	if (debug_dump_regs) {
+		DRM_INFO("DSI regs after:\n");
+		vc4_dsi_dump_regs(dsi);
+	}
+
+	ret = drm_panel_enable(dsi->panel);
+	if (ret) {
+		DRM_ERROR("Panel failed to enable\n");
+		drm_panel_unprepare(dsi->panel);
+		return;
+	}
+}
+
+static ssize_t vc4_dsi_host_transfer(struct mipi_dsi_host *host,
+				     const struct mipi_dsi_msg *msg)
+{
+	struct vc4_dsi *dsi = host_to_dsi(host);
+	struct mipi_dsi_packet packet;
+	u32 pkth = 0, pktc = 0;
+	int i, ret;
+	bool is_long = mipi_dsi_packet_format_is_long(msg->type);
+	u32 cmd_fifo_len = 0, pix_fifo_len = 0;
+
+	mipi_dsi_create_packet(&packet, msg);
+
+	pkth |= VC4_SET_FIELD(packet.header[0], DSI_TXPKT1H_BC_DT);
+	pkth |= VC4_SET_FIELD(packet.header[1] |
+			      (packet.header[2] << 8),
+			      DSI_TXPKT1H_BC_PARAM);
+	if (is_long) {
+		/* Divide data across the various FIFOs we have available.
+		 * The command FIFO takes byte-oriented data, but is of
+		 * limited size. The pixel FIFO (never actually used for
+		 * pixel data in reality) is word oriented, and substantially
+		 * larger. So, we use the pixel FIFO for most of the data,
+		 * sending the residual bytes in the command FIFO at the start.
+		 *
+		 * With this arrangement, the command FIFO will never get full.
+		 */
+		if (packet.payload_length <= 16) {
+			cmd_fifo_len = packet.payload_length;
+			pix_fifo_len = 0;
+		} else {
+			cmd_fifo_len = (packet.payload_length %
+					DSI_PIX_FIFO_WIDTH);
+			pix_fifo_len = ((packet.payload_length - cmd_fifo_len) /
+					DSI_PIX_FIFO_WIDTH);
+		}
+
+		WARN_ON_ONCE(pix_fifo_len >= DSI_PIX_FIFO_DEPTH);
+
+		pkth |= VC4_SET_FIELD(cmd_fifo_len, DSI_TXPKT1H_BC_CMDFIFO);
+	}
+
+	if (msg->rx_len) {
+		pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_RX,
+				      DSI_TXPKT1C_CMD_CTRL);
+	} else {
+		pktc |= VC4_SET_FIELD(DSI_TXPKT1C_CMD_CTRL_TX,
+				      DSI_TXPKT1C_CMD_CTRL);
+	}
+
+	for (i = 0; i < cmd_fifo_len; i++)
+		DSI_PORT_WRITE(TXPKT_CMD_FIFO, packet.payload[i]);
+	for (i = 0; i < pix_fifo_len; i++) {
+		const u8 *pix = packet.payload + cmd_fifo_len + i * 4;
+
+		DSI_PORT_WRITE(TXPKT_PIX_FIFO,
+			       pix[0] |
+			       pix[1] << 8 |
+			       pix[2] << 16 |
+			       pix[3] << 24);
+	}
+
+	if (msg->flags & MIPI_DSI_MSG_USE_LPM)
+		pktc |= DSI_TXPKT1C_CMD_MODE_LP;
+	if (is_long)
+		pktc |= DSI_TXPKT1C_CMD_TYPE_LONG;
+
+	/* Send one copy of the packet.  Larger repeats are used for pixel
+	 * data in command mode.
+	 */
+	pktc |= VC4_SET_FIELD(1, DSI_TXPKT1C_CMD_REPEAT);
+
+	pktc |= DSI_TXPKT1C_CMD_EN;
+	if (pix_fifo_len) {
+		pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SECONDARY,
+				      DSI_TXPKT1C_DISPLAY_NO);
+	} else {
+		pktc |= VC4_SET_FIELD(DSI_TXPKT1C_DISPLAY_NO_SHORT,
+				      DSI_TXPKT1C_DISPLAY_NO);
+	}
+
+	/* Enable the appropriate interrupt for the transfer completion. */
+	dsi->xfer_result = 0;
+	reinit_completion(&dsi->xfer_completion);
+	DSI_PORT_WRITE(INT_STAT, DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF);
+	if (msg->rx_len) {
+		DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+					DSI1_INT_PHY_DIR_RTF));
+	} else {
+		DSI_PORT_WRITE(INT_EN, (DSI1_INTERRUPTS_ALWAYS_ENABLED |
+					DSI1_INT_TXPKT1_DONE));
+	}
+
+	/* Send the packet. */
+	DSI_PORT_WRITE(TXPKT1H, pkth);
+	DSI_PORT_WRITE(TXPKT1C, pktc);
+
+	if (!wait_for_completion_timeout(&dsi->xfer_completion,
+					 msecs_to_jiffies(1000))) {
+		dev_err(&dsi->pdev->dev, "transfer interrupt wait timeout");
+		dev_err(&dsi->pdev->dev, "instat: 0x%08x\n",
+			DSI_PORT_READ(INT_STAT));
+		ret = -ETIMEDOUT;
+	} else {
+		ret = dsi->xfer_result;
+	}
+
+	DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+
+	if (ret)
+		goto reset_fifo_and_return;
+
+	if (ret == 0 && msg->rx_len) {
+		u32 rxpkt1h = DSI_PORT_READ(RXPKT1H);
+		u8 *msg_rx = msg->rx_buf;
+
+		if (rxpkt1h & DSI_RXPKT1H_PKT_TYPE_LONG) {
+			u32 rxlen = VC4_GET_FIELD(rxpkt1h,
+						  DSI_RXPKT1H_BC_PARAM);
+
+			if (rxlen != msg->rx_len) {
+				DRM_ERROR("DSI returned %db, expecting %db\n",
+					  rxlen, (int)msg->rx_len);
+				ret = -ENXIO;
+				goto reset_fifo_and_return;
+			}
+
+			for (i = 0; i < msg->rx_len; i++)
+				msg_rx[i] = DSI_READ(DSI1_RXPKT_FIFO);
+		} else {
+			/* FINISHME: Handle AWER */
+
+			msg_rx[0] = VC4_GET_FIELD(rxpkt1h,
+						  DSI_RXPKT1H_SHORT_0);
+			if (msg->rx_len > 1) {
+				msg_rx[1] = VC4_GET_FIELD(rxpkt1h,
+							  DSI_RXPKT1H_SHORT_1);
+			}
+		}
+	}
+
+	return ret;
+
+reset_fifo_and_return:
+	DRM_ERROR("DSI transfer failed, resetting: %d\n", ret);
+
+	DSI_PORT_WRITE(TXPKT1C, DSI_PORT_READ(TXPKT1C) & ~DSI_TXPKT1C_CMD_EN);
+	udelay(1);
+	DSI_PORT_WRITE(CTRL,
+		       DSI_PORT_READ(CTRL) |
+		       DSI_PORT_BIT(CTRL_RESET_FIFOS));
+
+	DSI_PORT_WRITE(TXPKT1C, 0);
+	DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+	return ret;
+}
+
+static int vc4_dsi_host_attach(struct mipi_dsi_host *host,
+			       struct mipi_dsi_device *device)
+{
+	struct vc4_dsi *dsi = host_to_dsi(host);
+	int ret = 0;
+
+	dsi->lanes = device->lanes;
+	dsi->channel = device->channel;
+	dsi->format = device->format;
+	dsi->mode_flags = device->mode_flags;
+
+	if (!(dsi->mode_flags & MIPI_DSI_MODE_VIDEO)) {
+		dev_err(&dsi->pdev->dev,
+			"Only VIDEO mode panels supported currently.\n");
+		return 0;
+	}
+
+	dsi->panel = of_drm_find_panel(device->dev.of_node);
+	if (!dsi->panel)
+		return 0;
+
+	ret = drm_panel_attach(dsi->panel, dsi->connector);
+	if (ret != 0)
+		return ret;
+
+	drm_helper_hpd_irq_event(dsi->connector->dev);
+
+	return 0;
+}
+
+static int vc4_dsi_host_detach(struct mipi_dsi_host *host,
+			       struct mipi_dsi_device *device)
+{
+	struct vc4_dsi *dsi = host_to_dsi(host);
+
+	if (dsi->panel) {
+		int ret = drm_panel_detach(dsi->panel);
+
+		if (ret)
+			return ret;
+
+		dsi->panel = NULL;
+
+		drm_helper_hpd_irq_event(dsi->connector->dev);
+	}
+
+	return 0;
+}
+
+static const struct mipi_dsi_host_ops vc4_dsi_host_ops = {
+	.attach = vc4_dsi_host_attach,
+	.detach = vc4_dsi_host_detach,
+	.transfer = vc4_dsi_host_transfer,
+};
+
+static const struct drm_encoder_helper_funcs vc4_dsi_encoder_helper_funcs = {
+	.disable = vc4_dsi_encoder_disable,
+	.enable = vc4_dsi_encoder_enable,
+};
+
+static const struct of_device_id vc4_dsi_dt_match[] = {
+	{ .compatible = "brcm,bcm2835-dsi1", (void *)(uintptr_t)1 },
+	{}
+};
+
+static void dsi_handle_error(struct vc4_dsi *dsi,
+			     irqreturn_t *ret, u32 stat, u32 bit,
+			     const char *type)
+{
+	if (!(stat & bit))
+		return;
+
+	DRM_ERROR("DSI%d: %s error\n", dsi->port, type);
+	*ret = IRQ_HANDLED;
+}
+
+static irqreturn_t vc4_dsi_irq_handler(int irq, void *data)
+{
+	struct vc4_dsi *dsi = data;
+	u32 stat = DSI_PORT_READ(INT_STAT);
+	irqreturn_t ret = IRQ_NONE;
+
+	DSI_PORT_WRITE(INT_STAT, stat);
+
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_ERR_SYNC_ESC, "LPDT sync");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_ERR_CONTROL, "data lane 0 sequence");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_ERR_CONT_LP0, "LP0 contention");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_ERR_CONT_LP1, "LP1 contention");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_HSTX_TO, "HSTX timeout");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_LPRX_TO, "LPRX timeout");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_TA_TO, "turnaround timeout");
+	dsi_handle_error(dsi, &ret, stat,
+			 DSI1_INT_PR_TO, "peripheral reset timeout");
+
+	if (stat & (DSI1_INT_TXPKT1_DONE | DSI1_INT_PHY_DIR_RTF)) {
+		complete(&dsi->xfer_completion);
+		ret = IRQ_HANDLED;
+	} else if (stat & DSI1_INT_HSTX_TO) {
+		complete(&dsi->xfer_completion);
+		dsi->xfer_result = -ETIMEDOUT;
+		ret = IRQ_HANDLED;
+	}
+
+	return ret;
+}
+
+/**
+ * Exposes clocks generated by the analog PHY that are consumed by
+ * CPRMAN (clk-bcm2835.c).
+ */
+static int
+vc4_dsi_init_phy_clocks(struct vc4_dsi *dsi)
+{
+	struct device *dev = &dsi->pdev->dev;
+	const char *parent_name = __clk_get_name(dsi->pll_phy_clock);
+	static const struct {
+		const char *dsi0_name, *dsi1_name;
+		int div;
+	} phy_clocks[] = {
+		{ "dsi0_byte", "dsi1_byte", 8 },
+		{ "dsi0_ddr2", "dsi1_ddr2", 4 },
+		{ "dsi0_ddr", "dsi1_ddr", 2 },
+	};
+	int i;
+
+	dsi->clk_onecell = devm_kzalloc(dev,
+					sizeof(*dsi->clk_onecell) +
+					ARRAY_SIZE(phy_clocks) *
+					sizeof(struct clk_hw *),
+					GFP_KERNEL);
+	if (!dsi->clk_onecell)
+		return -ENOMEM;
+	dsi->clk_onecell->num = ARRAY_SIZE(phy_clocks);
+
+	for (i = 0; i < ARRAY_SIZE(phy_clocks); i++) {
+		struct clk_fixed_factor *fix = &dsi->phy_clocks[i];
+		struct clk_init_data init;
+		int ret;
+
+		/* We just use core fixed factor clock ops for the PHY
+		 * clocks.  The clocks are actually gated by the
+		 * PHY_AFEC0_DDRCLK_EN bits, which we should be
+		 * setting if we use the DDR/DDR2 clocks.  However,
+		 * vc4_dsi_encoder_enable() is setting up both AFEC0,
+		 * setting both our parent DSI PLL's rate and this
+		 * clock's rate, so it knows if DDR/DDR2 are going to
+		 * be used and could enable the gates itself.
+		 */
+		fix->mult = 1;
+		fix->div = phy_clocks[i].div;
+		fix->hw.init = &init;
+
+		memset(&init, 0, sizeof(init));
+		init.parent_names = &parent_name;
+		init.num_parents = 1;
+		if (dsi->port == 1)
+			init.name = phy_clocks[i].dsi1_name;
+		else
+			init.name = phy_clocks[i].dsi0_name;
+		init.ops = &clk_fixed_factor_ops;
+
+		ret = devm_clk_hw_register(dev, &fix->hw);
+		if (ret)
+			return ret;
+
+		dsi->clk_onecell->hws[i] = &fix->hw;
+	}
+
+	return of_clk_add_hw_provider(dev->of_node,
+				      of_clk_hw_onecell_get,
+				      dsi->clk_onecell);
+}
+
+static int vc4_dsi_bind(struct device *dev, struct device *master, void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	struct vc4_dsi *dsi;
+	struct vc4_dsi_encoder *vc4_dsi_encoder;
+	const struct of_device_id *match;
+	dma_cap_mask_t dma_mask;
+	int ret;
+
+	dsi = devm_kzalloc(dev, sizeof(*dsi), GFP_KERNEL);
+	if (!dsi)
+		return -ENOMEM;
+
+	match = of_match_device(vc4_dsi_dt_match, dev);
+	if (!match)
+		return -ENODEV;
+
+	dsi->port = (uintptr_t)match->data;
+
+	vc4_dsi_encoder = devm_kzalloc(dev, sizeof(*vc4_dsi_encoder),
+				       GFP_KERNEL);
+	if (!vc4_dsi_encoder)
+		return -ENOMEM;
+	vc4_dsi_encoder->base.type = VC4_ENCODER_TYPE_DSI1;
+	vc4_dsi_encoder->dsi = dsi;
+	dsi->encoder = &vc4_dsi_encoder->base.base;
+
+	dsi->pdev = pdev;
+	dsi->regs = vc4_ioremap_regs(pdev, 0);
+	if (IS_ERR(dsi->regs))
+		return PTR_ERR(dsi->regs);
+
+	if (DSI_PORT_READ(ID) != DSI_ID_VALUE) {
+		dev_err(dev, "Port returned 0x%08x for ID instead of 0x%08x\n",
+			DSI_PORT_READ(ID), DSI_ID_VALUE);
+		return -ENODEV;
+	}
+
+	/* DSI1 has a broken AXI slave that doesn't respond to writes
+	 * from the ARM.  It does handle writes from the DMA engine,
+	 * so set up a channel for talking to it.
+	 */
+	if (dsi->port == 1) {
+		dsi->reg_dma_mem = dma_alloc_coherent(dev, 4,
+						      &dsi->reg_dma_paddr,
+						      GFP_KERNEL);
+		if (!dsi->reg_dma_mem) {
+			DRM_ERROR("Failed to get DMA memory\n");
+			return -ENOMEM;
+		}
+
+		dma_cap_zero(dma_mask);
+		dma_cap_set(DMA_MEMCPY, dma_mask);
+		dsi->reg_dma_chan = dma_request_chan_by_mask(&dma_mask);
+		if (IS_ERR(dsi->reg_dma_chan)) {
+			ret = PTR_ERR(dsi->reg_dma_chan);
+			if (ret != -EPROBE_DEFER)
+				DRM_ERROR("Failed to get DMA channel: %d\n",
+					  ret);
+			return ret;
+		}
+
+		/* Get the physical address of the device's registers.  The
+		 * struct resource for the regs gives us the bus address
+		 * instead.
+		 */
+		dsi->reg_paddr = be32_to_cpup(of_get_address(dev->of_node,
+							     0, NULL, NULL));
+	}
+
+	init_completion(&dsi->xfer_completion);
+	/* At startup enable error-reporting interrupts and nothing else. */
+	DSI_PORT_WRITE(INT_EN, DSI1_INTERRUPTS_ALWAYS_ENABLED);
+	/* Clear any existing interrupt state. */
+	DSI_PORT_WRITE(INT_STAT, DSI_PORT_READ(INT_STAT));
+
+	ret = devm_request_irq(dev, platform_get_irq(pdev, 0),
+			       vc4_dsi_irq_handler, 0, "vc4 dsi", dsi);
+	if (ret) {
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get interrupt: %d\n", ret);
+		return ret;
+	}
+
+	dsi->escape_clock = devm_clk_get(dev, "escape");
+	if (IS_ERR(dsi->escape_clock)) {
+		ret = PTR_ERR(dsi->escape_clock);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get escape clock: %d\n", ret);
+		return ret;
+	}
+
+	dsi->pll_phy_clock = devm_clk_get(dev, "phy");
+	if (IS_ERR(dsi->pll_phy_clock)) {
+		ret = PTR_ERR(dsi->pll_phy_clock);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get phy clock: %d\n", ret);
+		return ret;
+	}
+
+	dsi->pixel_clock = devm_clk_get(dev, "pixel");
+	if (IS_ERR(dsi->pixel_clock)) {
+		ret = PTR_ERR(dsi->pixel_clock);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get pixel clock: %d\n", ret);
+		return ret;
+	}
+
+	/* The esc clock rate is supposed to always be 100Mhz. */
+	ret = clk_set_rate(dsi->escape_clock, 100 * 1000000);
+	if (ret) {
+		dev_err(dev, "Failed to set esc clock: %d\n", ret);
+		return ret;
+	}
+
+	ret = vc4_dsi_init_phy_clocks(dsi);
+	if (ret)
+		return ret;
+
+	if (dsi->port == 1)
+		vc4->dsi1 = dsi;
+
+	drm_encoder_init(drm, dsi->encoder, &vc4_dsi_encoder_funcs,
+			 DRM_MODE_ENCODER_DSI, NULL);
+	drm_encoder_helper_add(dsi->encoder, &vc4_dsi_encoder_helper_funcs);
+
+	dsi->connector = vc4_dsi_connector_init(drm, dsi);
+	if (IS_ERR(dsi->connector)) {
+		ret = PTR_ERR(dsi->connector);
+		goto err_destroy_encoder;
+	}
+
+	dsi->dsi_host.ops = &vc4_dsi_host_ops;
+	dsi->dsi_host.dev = dev;
+
+	mipi_dsi_host_register(&dsi->dsi_host);
+
+	dev_set_drvdata(dev, dsi);
+
+	pm_runtime_enable(dev);
+
+	return 0;
+
+err_destroy_encoder:
+	vc4_dsi_encoder_destroy(dsi->encoder);
+
+	return ret;
+}
+
+static void vc4_dsi_unbind(struct device *dev, struct device *master,
+			   void *data)
+{
+	struct drm_device *drm = dev_get_drvdata(master);
+	struct vc4_dev *vc4 = to_vc4_dev(drm);
+	struct vc4_dsi *dsi = dev_get_drvdata(dev);
+
+	pm_runtime_disable(dev);
+
+	vc4_dsi_connector_destroy(dsi->connector);
+	vc4_dsi_encoder_destroy(dsi->encoder);
+
+	mipi_dsi_host_unregister(&dsi->dsi_host);
+
+	clk_disable_unprepare(dsi->pll_phy_clock);
+	clk_disable_unprepare(dsi->escape_clock);
+
+	if (dsi->port == 1)
+		vc4->dsi1 = NULL;
+}
+
+static const struct component_ops vc4_dsi_ops = {
+	.bind   = vc4_dsi_bind,
+	.unbind = vc4_dsi_unbind,
+};
+
+static int vc4_dsi_dev_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &vc4_dsi_ops);
+}
+
+static int vc4_dsi_dev_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &vc4_dsi_ops);
+	return 0;
+}
+
+struct platform_driver vc4_dsi_driver = {
+	.probe = vc4_dsi_dev_probe,
+	.remove = vc4_dsi_dev_remove,
+	.driver = {
+		.name = "vc4_dsi",
+		.of_match_table = vc4_dsi_dt_match,
+	},
+};
diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c
index db920771bfb5..1eef98c3331d 100644
--- a/drivers/gpu/drm/vc4/vc4_gem.c
+++ b/drivers/gpu/drm/vc4/vc4_gem.c
@@ -26,6 +26,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/device.h>
 #include <linux/io.h>
+#include <linux/sched/signal.h>
 
 #include "uapi/drm/vc4_drm.h"
 #include "vc4_drv.h"
@@ -594,12 +595,14 @@ vc4_get_bcl(struct drm_device *dev, struct vc4_exec_info *exec)
 					  args->shader_rec_count);
 	struct vc4_bo *bo;
 
-	if (uniforms_offset < shader_rec_offset ||
+	if (shader_rec_offset < args->bin_cl_size ||
+	    uniforms_offset < shader_rec_offset ||
 	    exec_size < uniforms_offset ||
 	    args->shader_rec_count >= (UINT_MAX /
 					  sizeof(struct vc4_shader_state)) ||
 	    temp_size < exec_size) {
 		DRM_ERROR("overflow in exec arguments\n");
+		ret = -EINVAL;
 		goto fail;
 	}
 
diff --git a/drivers/gpu/drm/vc4/vc4_hvs.c b/drivers/gpu/drm/vc4/vc4_hvs.c
index 6fbab1c82cb1..f7f7677f6d8d 100644
--- a/drivers/gpu/drm/vc4/vc4_hvs.c
+++ b/drivers/gpu/drm/vc4/vc4_hvs.c
@@ -141,8 +141,7 @@ static int vc4_hvs_upload_linear_kernel(struct vc4_hvs *hvs,
 	int ret, i;
 	u32 __iomem *dst_kernel;
 
-	ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS, 1,
-				 0);
+	ret = drm_mm_insert_node(&hvs->dlist_mm, space, VC4_KERNEL_DWORDS);
 	if (ret) {
 		DRM_ERROR("Failed to allocate space for filter kernel: %d\n",
 			  ret);
@@ -170,6 +169,7 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 	struct vc4_dev *vc4 = drm->dev_private;
 	struct vc4_hvs *hvs = NULL;
 	int ret;
+	u32 dispctrl;
 
 	hvs = devm_kzalloc(&pdev->dev, sizeof(*hvs), GFP_KERNEL);
 	if (!hvs)
@@ -211,6 +211,19 @@ static int vc4_hvs_bind(struct device *dev, struct device *master, void *data)
 		return ret;
 
 	vc4->hvs = hvs;
+
+	dispctrl = HVS_READ(SCALER_DISPCTRL);
+
+	dispctrl |= SCALER_DISPCTRL_ENABLE;
+
+	/* Set DSP3 (PV1) to use HVS channel 2, which would otherwise
+	 * be unused.
+	 */
+	dispctrl &= ~SCALER_DISPCTRL_DSP3_MUX_MASK;
+	dispctrl |= VC4_SET_FIELD(2, SCALER_DISPCTRL_DSP3_MUX);
+
+	HVS_WRITE(SCALER_DISPCTRL, dispctrl);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/vc4/vc4_kms.c b/drivers/gpu/drm/vc4/vc4_kms.c
index be8dd8262f27..ad7925a9e0ea 100644
--- a/drivers/gpu/drm/vc4/vc4_kms.c
+++ b/drivers/gpu/drm/vc4/vc4_kms.c
@@ -231,7 +231,6 @@ int vc4_kms_load(struct drm_device *dev)
 	drm_mode_config_reset(dev);
 
 	vc4->fbdev = drm_fbdev_cma_init(dev, 32,
-					dev->mode_config.num_crtc,
 					dev->mode_config.num_connector);
 	if (IS_ERR(vc4->fbdev))
 		vc4->fbdev = NULL;
diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c
index 110d1518f5d5..f7a229df572d 100644
--- a/drivers/gpu/drm/vc4/vc4_plane.c
+++ b/drivers/gpu/drm/vc4/vc4_plane.c
@@ -514,9 +514,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane,
 	if (lbm_size) {
 		if (!vc4_state->lbm.allocated) {
 			spin_lock_irqsave(&vc4->hvs->mm_lock, irqflags);
-			ret = drm_mm_insert_node(&vc4->hvs->lbm_mm,
-						 &vc4_state->lbm,
-						 lbm_size, 32, 0);
+			ret = drm_mm_insert_node_generic(&vc4->hvs->lbm_mm,
+							 &vc4_state->lbm,
+							 lbm_size, 32, 0, 0);
 			spin_unlock_irqrestore(&vc4->hvs->mm_lock, irqflags);
 		} else {
 			WARN_ON_ONCE(lbm_size != vc4_state->lbm.size);
@@ -858,7 +858,7 @@ struct drm_plane *vc4_plane_init(struct drm_device *dev,
 		}
 	}
 	plane = &vc4_plane->base;
-	ret = drm_universal_plane_init(dev, plane, 0xff,
+	ret = drm_universal_plane_init(dev, plane, 0,
 				       &vc4_plane_funcs,
 				       formats, num_formats,
 				       type, NULL);
diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h
index 39f6886b2410..385405a2df05 100644
--- a/drivers/gpu/drm/vc4/vc4_regs.h
+++ b/drivers/gpu/drm/vc4/vc4_regs.h
@@ -190,6 +190,8 @@
 # define PV_VCONTROL_ODD_DELAY_SHIFT		6
 # define PV_VCONTROL_ODD_FIRST			BIT(5)
 # define PV_VCONTROL_INTERLACE			BIT(4)
+# define PV_VCONTROL_DSI			BIT(3)
+# define PV_VCONTROL_COMMAND			BIT(2)
 # define PV_VCONTROL_CONTINUOUS			BIT(1)
 # define PV_VCONTROL_VIDEN			BIT(0)
 
@@ -244,6 +246,9 @@
 # define SCALER_DISPCTRL_ENABLE			BIT(31)
 # define SCALER_DISPCTRL_DSP2EISLUR		BIT(15)
 # define SCALER_DISPCTRL_DSP1EISLUR		BIT(14)
+# define SCALER_DISPCTRL_DSP3_MUX_MASK		VC4_MASK(19, 18)
+# define SCALER_DISPCTRL_DSP3_MUX_SHIFT		18
+
 /* Enables Display 0 short line and underrun contribution to
  * SCALER_DISPSTAT_IRQDISP0.  Note that short frame contributions are
  * always enabled.
diff --git a/drivers/gpu/drm/vc4/vc4_render_cl.c b/drivers/gpu/drm/vc4/vc4_render_cl.c
index 08886a309757..5cdd003605f5 100644
--- a/drivers/gpu/drm/vc4/vc4_render_cl.c
+++ b/drivers/gpu/drm/vc4/vc4_render_cl.c
@@ -461,7 +461,7 @@ static int vc4_rcl_surface_setup(struct vc4_exec_info *exec,
 		}
 
 		ret = vc4_full_res_bounds_check(exec, *obj, surf);
-		if (!ret)
+		if (ret)
 			return ret;
 
 		return 0;
diff --git a/drivers/gpu/drm/vgem/vgem_drv.c b/drivers/gpu/drm/vgem/vgem_drv.c
index 477e07f0ecb6..a1f42d125e6e 100644
--- a/drivers/gpu/drm/vgem/vgem_drv.c
+++ b/drivers/gpu/drm/vgem/vgem_drv.c
@@ -50,8 +50,9 @@ static void vgem_gem_free_object(struct drm_gem_object *obj)
 	kfree(vgem_obj);
 }
 
-static int vgem_gem_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int vgem_gem_fault(struct vm_fault *vmf)
 {
+	struct vm_area_struct *vma = vmf->vma;
 	struct drm_vgem_gem_object *obj = vma->vm_private_data;
 	/* We don't use vmf->pgoff since that has the fake offset */
 	unsigned long vaddr = vmf->address;
@@ -287,7 +288,7 @@ static int vgem_prime_mmap(struct drm_gem_object *obj,
 	if (!obj->filp)
 		return -ENODEV;
 
-	ret = obj->filp->f_op->mmap(obj->filp, vma);
+	ret = call_mmap(obj->filp, vma);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/via/via_mm.c b/drivers/gpu/drm/via/via_mm.c
index a04ef1c992d9..4217d66a5cc6 100644
--- a/drivers/gpu/drm/via/via_mm.c
+++ b/drivers/gpu/drm/via/via_mm.c
@@ -140,11 +140,11 @@ int via_mem_alloc(struct drm_device *dev, void *data,
 	if (mem->type == VIA_MEM_AGP)
 		retval = drm_mm_insert_node(&dev_priv->agp_mm,
 					    &item->mm_node,
-					    tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
+					    tmpSize);
 	else
 		retval = drm_mm_insert_node(&dev_priv->vram_mm,
 					    &item->mm_node,
-					    tmpSize, 0, DRM_MM_SEARCH_DEFAULT);
+					    tmpSize);
 	if (retval)
 		goto fail_alloc;
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
index 24f99fc9d8a4..163a67db8cf1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fb.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fb.c
@@ -387,7 +387,6 @@ int virtio_gpu_fbdev_init(struct virtio_gpu_device *vgdev)
 	drm_fb_helper_prepare(vgdev->ddev, &vgfbdev->helper,
 			      &virtio_gpu_fb_helper_funcs);
 	ret = drm_fb_helper_init(vgdev->ddev, &vgfbdev->helper,
-				 vgdev->num_scanouts,
 				 VIRTIO_GPUFB_CONN_LIMIT);
 	if (ret) {
 		kfree(vgfbdev);
diff --git a/drivers/gpu/drm/virtio/virtgpu_kms.c b/drivers/gpu/drm/virtio/virtgpu_kms.c
index fae75394b5d0..491866865c33 100644
--- a/drivers/gpu/drm/virtio/virtgpu_kms.c
+++ b/drivers/gpu/drm/virtio/virtgpu_kms.c
@@ -166,13 +166,17 @@ int virtio_gpu_driver_load(struct drm_device *dev, unsigned long flags)
 	INIT_WORK(&vgdev->config_changed_work,
 		  virtio_gpu_config_changed_work_func);
 
+#ifdef __LITTLE_ENDIAN
 	if (virtio_has_feature(vgdev->vdev, VIRTIO_GPU_F_VIRGL))
 		vgdev->has_virgl_3d = true;
 	DRM_INFO("virgl 3d acceleration %s\n",
-		 vgdev->has_virgl_3d ? "enabled" : "not available");
+		 vgdev->has_virgl_3d ? "enabled" : "not supported by host");
+#else
+	DRM_INFO("virgl 3d acceleration not supported by guest\n");
+#endif
 
 	ret = vgdev->vdev->config->find_vqs(vgdev->vdev, 2, vqs,
-					    callbacks, names);
+					    callbacks, names, NULL);
 	if (ret) {
 		DRM_ERROR("failed to find virt queues\n");
 		goto err_vqs;
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 9cc7079f7aca..70ec8ca8d9b1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -114,18 +114,17 @@ static void virtio_gpu_ttm_global_fini(struct virtio_gpu_device *vgdev)
 static struct vm_operations_struct virtio_gpu_ttm_vm_ops;
 static const struct vm_operations_struct *ttm_vm_ops;
 
-static int virtio_gpu_ttm_fault(struct vm_area_struct *vma,
-				struct vm_fault *vmf)
+static int virtio_gpu_ttm_fault(struct vm_fault *vmf)
 {
 	struct ttm_buffer_object *bo;
 	struct virtio_gpu_device *vgdev;
 	int r;
 
-	bo = (struct ttm_buffer_object *)vma->vm_private_data;
+	bo = (struct ttm_buffer_object *)vmf->vma->vm_private_data;
 	if (bo == NULL)
 		return VM_FAULT_NOPAGE;
 	vgdev = virtio_gpu_get_vgdev(bo->bdev);
-	r = ttm_vm_ops->fault(vma, vmf);
+	r = ttm_vm_ops->fault(vmf);
 	return r;
 }
 #endif
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
index aa04fb0159a7..77cb7c627e09 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_cmdbuf.c
@@ -673,16 +673,10 @@ static bool vmw_cmdbuf_try_alloc(struct vmw_cmdbuf_man *man,
  
 	memset(info->node, 0, sizeof(*info->node));
 	spin_lock_bh(&man->lock);
-	ret = drm_mm_insert_node_generic(&man->mm, info->node, info->page_size,
-					 0, 0,
-					 DRM_MM_SEARCH_DEFAULT,
-					 DRM_MM_CREATE_DEFAULT);
+	ret = drm_mm_insert_node(&man->mm, info->node, info->page_size);
 	if (ret) {
 		vmw_cmdbuf_man_process(man);
-		ret = drm_mm_insert_node_generic(&man->mm, info->node,
-						 info->page_size, 0, 0,
-						 DRM_MM_SEARCH_DEFAULT,
-						 DRM_MM_CREATE_DEFAULT);
+		ret = drm_mm_insert_node(&man->mm, info->node, info->page_size);
 	}
 
 	spin_unlock_bh(&man->lock);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 541a5887dd6c..d08f26973d0b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -199,9 +199,14 @@ static const struct drm_ioctl_desc vmw_ioctls[] = {
 	VMW_IOCTL_DEF(VMW_PRESENT_READBACK,
 		      vmw_present_readback_ioctl,
 		      DRM_MASTER | DRM_AUTH),
+	/*
+	 * The permissions of the below ioctl are overridden in
+	 * vmw_generic_ioctl(). We require either
+	 * DRM_MASTER or capable(CAP_SYS_ADMIN).
+	 */
 	VMW_IOCTL_DEF(VMW_UPDATE_LAYOUT,
 		      vmw_kms_update_layout_ioctl,
-		      DRM_MASTER | DRM_CONTROL_ALLOW),
+		      DRM_RENDER_ALLOW),
 	VMW_IOCTL_DEF(VMW_CREATE_SHADER,
 		      vmw_shader_define_ioctl,
 		      DRM_AUTH | DRM_RENDER_ALLOW),
@@ -1123,6 +1128,10 @@ static long vmw_generic_ioctl(struct file *filp, unsigned int cmd,
 
 			return (long) vmw_execbuf_ioctl(dev, arg, file_priv,
 							_IOC_SIZE(cmd));
+		} else if (nr == DRM_COMMAND_BASE + DRM_VMW_UPDATE_LAYOUT) {
+			if (!drm_is_current_master(file_priv) &&
+			    !capable(CAP_SYS_ADMIN))
+				return -EACCES;
 		}
 
 		if (unlikely(ioctl->cmd != cmd))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 1e59a486bba8..59ff4197173a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -41,9 +41,9 @@
 #include <drm/ttm/ttm_module.h>
 #include "vmwgfx_fence.h"
 
-#define VMWGFX_DRIVER_DATE "20160210"
+#define VMWGFX_DRIVER_DATE "20170221"
 #define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 11
+#define VMWGFX_DRIVER_MINOR 12
 #define VMWGFX_DRIVER_PATCHLEVEL 0
 #define VMWGFX_FILE_PAGE_OFFSET 0x00100000
 #define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
diff --git a/drivers/gpu/drm/zte/zx_drm_drv.c b/drivers/gpu/drm/zte/zx_drm_drv.c
index 13081fed902d..5c6944a1e72c 100644
--- a/drivers/gpu/drm/zte/zx_drm_drv.c
+++ b/drivers/gpu/drm/zte/zx_drm_drv.c
@@ -141,7 +141,7 @@ static int zx_drm_bind(struct device *dev)
 	drm_mode_config_reset(drm);
 	drm_kms_helper_poll_init(drm);
 
-	priv->fbdev = drm_fbdev_cma_init(drm, 32, drm->mode_config.num_crtc,
+	priv->fbdev = drm_fbdev_cma_init(drm, 32,
 					 drm->mode_config.num_connector);
 	if (IS_ERR(priv->fbdev)) {
 		ret = PTR_ERR(priv->fbdev);
diff --git a/drivers/gpu/drm/zte/zx_plane.c b/drivers/gpu/drm/zte/zx_plane.c
index 1d08ba381098..d646ac931663 100644
--- a/drivers/gpu/drm/zte/zx_plane.c
+++ b/drivers/gpu/drm/zte/zx_plane.c
@@ -159,7 +159,7 @@ static void zx_vl_rsz_setup(struct zx_plane *zplane, uint32_t format,
 	void __iomem *rsz = zplane->rsz;
 	u32 src_chroma_w = src_w;
 	u32 src_chroma_h = src_h;
-	u32 fmt;
+	int fmt;
 
 	/* Set up source and destination resolution */
 	zx_writel(rsz + RSZ_SRC_CFG, RSZ_VER(src_h - 1) | RSZ_HOR(src_w - 1));
@@ -203,7 +203,7 @@ static void zx_vl_plane_atomic_update(struct drm_plane *plane,
 	u32 src_x, src_y, src_w, src_h;
 	u32 dst_x, dst_y, dst_w, dst_h;
 	uint32_t format;
-	u32 fmt;
+	int fmt;
 	int num_planes;
 	int i;
 
diff --git a/drivers/gpu/host1x/bus.c b/drivers/gpu/host1x/bus.c
index c27858ae0552..eeb021fe6410 100644
--- a/drivers/gpu/host1x/bus.c
+++ b/drivers/gpu/host1x/bus.c
@@ -399,6 +399,7 @@ static int host1x_device_add(struct host1x *host1x,
 	dev_set_name(&device->dev, "%s", driver->driver.name);
 	of_dma_configure(&device->dev, host1x->dev->of_node);
 	device->dev.release = host1x_device_release;
+	device->dev.of_node = host1x->dev->of_node;
 	device->dev.bus = &host1x_bus_type;
 	device->dev.parent = host1x->dev;
 
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 97218af4fe75..8368e6f766ee 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1238,12 +1238,6 @@ static int ipu_add_client_devices(struct ipu_soc *ipu, unsigned long ipu_base)
 			platform_device_put(pdev);
 			goto err_register;
 		}
-
-		/*
-		 * Set of_node only after calling platform_device_add. Otherwise
-		 * the platform:imx-ipuv3-crtc modalias won't be used.
-		 */
-		pdev->dev.of_node = of_node;
 	}
 
 	return 0;
diff --git a/drivers/gpu/ipu-v3/ipu-csi.c b/drivers/gpu/ipu-v3/ipu-csi.c
index 63c7292f427a..24e12b87a0cb 100644
--- a/drivers/gpu/ipu-v3/ipu-csi.c
+++ b/drivers/gpu/ipu-v3/ipu-csi.c
@@ -544,6 +544,7 @@ void ipu_csi_set_downsize(struct ipu_csi *csi, bool horiz, bool vert)
 
 	spin_unlock_irqrestore(&csi->lock, flags);
 }
+EXPORT_SYMBOL_GPL(ipu_csi_set_downsize);
 
 void ipu_csi_set_test_generator(struct ipu_csi *csi, bool active,
 				u32 r_value, u32 g_value, u32 b_value,
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index 0f5b2dd24507..92f1452dad57 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -41,7 +41,7 @@
 #include <linux/errno.h>
 #include <linux/init.h>
 #include <linux/list.h>
-#include <linux/sched.h>
+#include <linux/sched/signal.h>
 #include <linux/wait.h>
 #include <linux/spinlock.h>
 #include <linux/poll.h>