334 files changed, 13977 insertions, 6259 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 262056778f52..6a8129949333 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -32,7 +32,7 @@
 #include <linux/wait.h>
 #include <linux/list.h>
 #include <linux/kref.h>
-#include <linux/interval_tree.h>
+#include <linux/rbtree.h>
 #include <linux/hashtable.h>
 #include <linux/dma-fence.h>
 
@@ -122,14 +122,6 @@ extern int amdgpu_param_buf_per_se;
 /* max number of IP instances */
 #define AMDGPU_MAX_SDMA_INSTANCES		2
 
-/* max number of VMHUB */
-#define AMDGPU_MAX_VMHUBS			2
-#define AMDGPU_MMHUB				0
-#define AMDGPU_GFXHUB				1
-
-/* hardcode that limit for now */
-#define AMDGPU_VA_RESERVED_SIZE			(8 << 20)
-
 /* hard reset data */
 #define AMDGPU_ASIC_RESET_DATA                  0x39d5e86b
 
@@ -312,12 +304,9 @@ struct amdgpu_gart_funcs {
 	/* set pte flags based per asic */
 	uint64_t (*get_vm_pte_flags)(struct amdgpu_device *adev,
 				     uint32_t flags);
-};
-
-/* provided by the mc block */
-struct amdgpu_mc_funcs {
 	/* adjust mc addr in fb for APU case */
 	u64 (*adjust_mc_addr)(struct amdgpu_device *adev, u64 addr);
+	uint32_t (*get_invalidate_req)(unsigned int vm_id);
 };
 
 /* provided by the ih block */
@@ -379,7 +368,10 @@ struct amdgpu_bo_list_entry {
 
 struct amdgpu_bo_va_mapping {
 	struct list_head		list;
-	struct interval_tree_node	it;
+	struct rb_node			rb;
+	uint64_t			start;
+	uint64_t			last;
+	uint64_t			__subtree_last;
 	uint64_t			offset;
 	uint64_t			flags;
 };
@@ -579,8 +571,6 @@ struct amdgpu_vmhub {
 	uint32_t	vm_context0_cntl;
 	uint32_t	vm_l2_pro_fault_status;
 	uint32_t	vm_l2_pro_fault_cntl;
-	uint32_t	(*get_invalidate_req)(unsigned int vm_id);
-	uint32_t	(*get_vm_protection_bits)(void);
 };
 
 /*
@@ -618,7 +608,6 @@ struct amdgpu_mc {
 	u64					private_aperture_end;
 	/* protects concurrent invalidation */
 	spinlock_t		invalidate_lock;
-	const struct amdgpu_mc_funcs *mc_funcs;
 };
 
 /*
@@ -1712,6 +1701,12 @@ void amdgpu_mm_wdoorbell64(struct amdgpu_device *adev, u32 index, u64 v);
 #define WREG32_FIELD(reg, field, val)	\
 	WREG32(mm##reg, (RREG32(mm##reg) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
 
+#define WREG32_FIELD_OFFSET(reg, offset, field, val)	\
+	WREG32(mm##reg + offset, (RREG32(mm##reg + offset) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
+#define WREG32_FIELD15(ip, idx, reg, field, val)	\
+	WREG32(SOC15_REG_OFFSET(ip, idx, mm##reg), (RREG32(SOC15_REG_OFFSET(ip, idx, mm##reg)) & ~REG_FIELD_MASK(reg, field)) | (val) << REG_FIELD_SHIFT(reg, field))
+
 /*
  * BIOS helpers.
  */
@@ -1887,12 +1882,14 @@ void amdgpu_unregister_atpx_handler(void);
 bool amdgpu_has_atpx_dgpu_power_cntl(void);
 bool amdgpu_is_atpx_hybrid(void);
 bool amdgpu_atpx_dgpu_req_power_for_displays(void);
+bool amdgpu_has_atpx(void);
 #else
 static inline void amdgpu_register_atpx_handler(void) {}
 static inline void amdgpu_unregister_atpx_handler(void) {}
 static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
 static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 static inline bool amdgpu_atpx_dgpu_req_power_for_displays(void) { return false; }
+static inline bool amdgpu_has_atpx(void) { return false; }
 #endif
 
 /*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
index f52b1bf3d3d9..ad4329922f79 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@@ -754,6 +754,35 @@ union igp_info {
 	struct _ATOM_INTEGRATED_SYSTEM_INFO_V1_9 info_9;
 };
 
+/*
+ * Return vram width from integrated system info table, if available,
+ * or 0 if not.
+ */
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev)
+{
+	struct amdgpu_mode_info *mode_info = &adev->mode_info;
+	int index = GetIndexIntoMasterTable(DATA, IntegratedSystemInfo);
+	u16 data_offset, size;
+	union igp_info *igp_info;
+	u8 frev, crev;
+
+	/* get any igp specific overrides */
+	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
+				   &frev, &crev, &data_offset)) {
+		igp_info = (union igp_info *)
+			(mode_info->atom_context->bios + data_offset);
+		switch (crev) {
+		case 8:
+		case 9:
+			return igp_info->info_8.ucUMAChannelNumber * 64;
+		default:
+			return 0;
+		}
+	}
+
+	return 0;
+}
+
 static void amdgpu_atombios_get_igp_ss_overrides(struct amdgpu_device *adev,
 						 struct amdgpu_atom_ss *ss,
 						 int id)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
index 4e0f488487f3..38d0fe32e5cd 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.h
@@ -148,6 +148,8 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev);
 
 int amdgpu_atombios_get_gfx_info(struct amdgpu_device *adev);
 
+int amdgpu_atombios_get_vram_width(struct amdgpu_device *adev);
+
 bool amdgpu_atombios_get_asic_ss_info(struct amdgpu_device *adev,
 				      struct amdgpu_atom_ss *ss,
 				      int id, u32 clock);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 0218cea6be4d..a6649874e6ce 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -237,7 +237,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 	struct amdgpu_fpriv *fpriv = filp->driver_priv;
 	union drm_amdgpu_bo_list *args = data;
 	uint32_t handle = args->in.list_handle;
-	const void __user *uptr = (const void*)(long)args->in.bo_info_ptr;
+	const void __user *uptr = (const void*)(uintptr_t)args->in.bo_info_ptr;
 
 	struct drm_amdgpu_bo_list_entry *info;
 	struct amdgpu_bo_list *list;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index 97f661372a1c..ec71b9320561 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -161,7 +161,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	}
 
 	/* get chunks */
-	chunk_array_user = (uint64_t __user *)(unsigned long)(cs->in.chunks);
+	chunk_array_user = (uint64_t __user *)(uintptr_t)(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
 			   sizeof(uint64_t)*cs->in.num_chunks)) {
 		ret = -EFAULT;
@@ -181,7 +181,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		struct drm_amdgpu_cs_chunk user_chunk;
 		uint32_t __user *cdata;
 
-		chunk_ptr = (void __user *)(unsigned long)chunk_array[i];
+		chunk_ptr = (void __user *)(uintptr_t)chunk_array[i];
 		if (copy_from_user(&user_chunk, chunk_ptr,
 				       sizeof(struct drm_amdgpu_cs_chunk))) {
 			ret = -EFAULT;
@@ -192,7 +192,7 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 		p->chunks[i].length_dw = user_chunk.length_dw;
 
 		size = p->chunks[i].length_dw;
-		cdata = (void __user *)(unsigned long)user_chunk.chunk_data;
+		cdata = (void __user *)(uintptr_t)user_chunk.chunk_data;
 
 		p->chunks[i].kdata = drm_malloc_ab(size, sizeof(uint32_t));
 		if (p->chunks[i].kdata == NULL) {
@@ -949,7 +949,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			}
 
 			if ((chunk_ib->va_start + chunk_ib->ib_bytes) >
-			    (m->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+			    (m->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 				DRM_ERROR("IB va_start+ib_bytes is invalid\n");
 				return -EINVAL;
 			}
@@ -960,7 +960,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 				return r;
 			}
 
-			offset = ((uint64_t)m->it.start) * AMDGPU_GPU_PAGE_SIZE;
+			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 			kptr += chunk_ib->va_start - offset;
 
 			r =  amdgpu_ib_get(adev, vm, chunk_ib->ib_bytes, ib);
@@ -1242,6 +1242,7 @@ static int amdgpu_cs_wait_all_fences(struct amdgpu_device *adev,
 			continue;
 
 		r = dma_fence_wait_timeout(fence, true, timeout);
+		dma_fence_put(fence);
 		if (r < 0)
 			return r;
 
@@ -1339,7 +1340,7 @@ int amdgpu_cs_wait_fences_ioctl(struct drm_device *dev, void *data,
 	if (fences == NULL)
 		return -ENOMEM;
 
-	fences_user = (void __user *)(unsigned long)(wait->in.fences);
+	fences_user = (void __user *)(uintptr_t)(wait->in.fences);
 	if (copy_from_user(fences, fences_user,
 		sizeof(struct drm_amdgpu_fence) * fence_count)) {
 		r = -EFAULT;
@@ -1388,8 +1389,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 			continue;
 
 		list_for_each_entry(mapping, &lobj->bo_va->valids, list) {
-			if (mapping->it.start > addr ||
-			    addr > mapping->it.last)
+			if (mapping->start > addr ||
+			    addr > mapping->last)
 				continue;
 
 			*bo = lobj->bo_va->bo;
@@ -1397,8 +1398,8 @@ amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,
 		}
 
 		list_for_each_entry(mapping, &lobj->bo_va->invalids, list) {
-			if (mapping->it.start > addr ||
-			    addr > mapping->it.last)
+			if (mapping->start > addr ||
+			    addr > mapping->last)
 				continue;
 
 			*bo = lobj->bo_va->bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index 83dda05325b8..483660742f75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -1040,43 +1040,60 @@ static bool amdgpu_check_pot_argument(int arg)
 	return (arg & (arg - 1)) == 0;
 }
 
-static void amdgpu_get_block_size(struct amdgpu_device *adev)
-{
-	/* from AI, asic starts to support multiple level VMPT */
-	if (adev->asic_type >= CHIP_VEGA10) {
-		if (amdgpu_vm_block_size != 9)
-			dev_warn(adev->dev,
-				 "Multi-VMPT limits block size to one page!\n");
-		amdgpu_vm_block_size = 9;
-		return;
-	}
+static void amdgpu_check_block_size(struct amdgpu_device *adev)
+{
 	/* defines number of bits in page table versus page directory,
 	 * a page is 4KB so we have 12 bits offset, minimum 9 bits in the
 	 * page table and the remaining bits are in the page directory */
-	if (amdgpu_vm_block_size == -1) {
-
-		/* Total bits covered by PD + PTs */
-		unsigned bits = ilog2(amdgpu_vm_size) + 18;
-
-		/* Make sure the PD is 4K in size up to 8GB address space.
-		   Above that split equal between PD and PTs */
-		if (amdgpu_vm_size <= 8)
-			amdgpu_vm_block_size = bits - 9;
-		else
-			amdgpu_vm_block_size = (bits + 3) / 2;
+	if (amdgpu_vm_block_size == -1)
+		return;
 
-	} else if (amdgpu_vm_block_size < 9) {
+	if (amdgpu_vm_block_size < 9) {
 		dev_warn(adev->dev, "VM page table size (%d) too small\n",
 			 amdgpu_vm_block_size);
-		amdgpu_vm_block_size = 9;
+		goto def_value;
 	}
 
 	if (amdgpu_vm_block_size > 24 ||
 	    (amdgpu_vm_size * 1024) < (1ull << amdgpu_vm_block_size)) {
 		dev_warn(adev->dev, "VM page table size (%d) too large\n",
 			 amdgpu_vm_block_size);
-		amdgpu_vm_block_size = 9;
+		goto def_value;
 	}
+
+	return;
+
+def_value:
+	amdgpu_vm_block_size = -1;
+}
+
+static void amdgpu_check_vm_size(struct amdgpu_device *adev)
+{
+	if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
+		dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
+			 amdgpu_vm_size);
+		goto def_value;
+	}
+
+	if (amdgpu_vm_size < 1) {
+		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
+			 amdgpu_vm_size);
+		goto def_value;
+	}
+
+	/*
+	 * Max GPUVM size for Cayman, SI, CI VI are 40 bits.
+	 */
+	if (amdgpu_vm_size > 1024) {
+		dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n",
+			 amdgpu_vm_size);
+		goto def_value;
+	}
+
+	return;
+
+def_value:
+	amdgpu_vm_size = -1;
 }
 
 /**
@@ -1108,28 +1125,9 @@ static void amdgpu_check_arguments(struct amdgpu_device *adev)
 		}
 	}
 
-	if (!amdgpu_check_pot_argument(amdgpu_vm_size)) {
-		dev_warn(adev->dev, "VM size (%d) must be a power of 2\n",
-			 amdgpu_vm_size);
-		amdgpu_vm_size = 8;
-	}
+	amdgpu_check_vm_size(adev);
 
-	if (amdgpu_vm_size < 1) {
-		dev_warn(adev->dev, "VM size (%d) too small, min is 1GB\n",
-			 amdgpu_vm_size);
-		amdgpu_vm_size = 8;
-	}
-
-	/*
-	 * Max GPUVM size for Cayman, SI and CI are 40 bits.
-	 */
-	if (amdgpu_vm_size > 1024) {
-		dev_warn(adev->dev, "VM size (%d) too large, max is 1TB\n",
-			 amdgpu_vm_size);
-		amdgpu_vm_size = 8;
-	}
-
-	amdgpu_get_block_size(adev);
+	amdgpu_check_block_size(adev);
 
 	if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
 	    !amdgpu_check_pot_argument(amdgpu_vram_page_split))) {
@@ -2249,9 +2247,10 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	}
 
 	r = amdgpu_resume(adev);
-	if (r)
+	if (r) {
 		DRM_ERROR("amdgpu_resume failed (%d).\n", r);
-
+		return r;
+	}
 	amdgpu_fence_driver_resume(adev);
 
 	if (resume) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index ce15721cadda..96926a221bd5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -614,6 +614,12 @@ amdgpu_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOENT);
 	}
 
+	/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
+	if (obj->import_attach) {
+		DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	amdgpu_fb = kzalloc(sizeof(*amdgpu_fb), GFP_KERNEL);
 	if (amdgpu_fb == NULL) {
 		drm_gem_object_unreference_unlocked(obj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 400917fd7486..4e0f7d2d87f1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -86,7 +86,7 @@ int amdgpu_runtime_pm = -1;
 unsigned amdgpu_ip_block_mask = 0xffffffff;
 int amdgpu_bapm = -1;
 int amdgpu_deep_color = 0;
-int amdgpu_vm_size = 64;
+int amdgpu_vm_size = -1;
 int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index f85520d4e711..03a9c5cad222 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -717,7 +717,7 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 	switch (args->op) {
 	case AMDGPU_GEM_OP_GET_GEM_CREATE_INFO: {
 		struct drm_amdgpu_gem_create_in info;
-		void __user *out = (void __user *)(long)args->value;
+		void __user *out = (void __user *)(uintptr_t)args->value;
 
 		info.bo_size = robj->gem_base.size;
 		info.alignment = robj->tbo.mem.page_alignment << PAGE_SHIFT;
@@ -729,6 +729,11 @@ int amdgpu_gem_op_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case AMDGPU_GEM_OP_SET_PLACEMENT:
+		if (robj->prime_shared_count && (args->value & AMDGPU_GEM_DOMAIN_VRAM)) {
+			r = -EINVAL;
+			amdgpu_bo_unreserve(robj);
+			break;
+		}
 		if (amdgpu_ttm_tt_get_usermm(robj->tbo.ttm)) {
 			r = -EPERM;
 			amdgpu_bo_unreserve(robj);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 13b487235a8b..a6b7e367a860 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -316,9 +316,10 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		return -EINVAL;
 
 	if (!adev->irq.client[client_id].sources) {
-		adev->irq.client[client_id].sources = kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
-							      sizeof(struct amdgpu_irq_src),
-							      GFP_KERNEL);
+		adev->irq.client[client_id].sources =
+			kcalloc(AMDGPU_MAX_IRQ_SRC_ID,
+				sizeof(struct amdgpu_irq_src *),
+				GFP_KERNEL);
 		if (!adev->irq.client[client_id].sources)
 			return -ENOMEM;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index dfb029ab3448..832be632478f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -36,12 +36,6 @@
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
 
-#if defined(CONFIG_VGA_SWITCHEROO)
-bool amdgpu_has_atpx(void);
-#else
-static inline bool amdgpu_has_atpx(void) { return false; }
-#endif
-
 /**
  * amdgpu_driver_unload_kms - Main unload function for KMS.
  *
@@ -243,7 +237,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	struct amdgpu_device *adev = dev->dev_private;
 	struct drm_amdgpu_info *info = data;
 	struct amdgpu_mode_info *minfo = &adev->mode_info;
-	void __user *out = (void __user *)(long)info->return_pointer;
+	void __user *out = (void __user *)(uintptr_t)info->return_pointer;
 	uint32_t size = info->return_size;
 	struct drm_crtc *crtc;
 	uint32_t ui32 = 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index 7ea3cacf9f9f..38f739fb727b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -31,6 +31,7 @@
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/mmu_notifier.h>
+#include <linux/interval_tree.h>
 #include <drm/drmP.h>
 #include <drm/drm.h>
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index 5aac350b007f..cb89fff863c0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -122,20 +122,19 @@ static void amdgpu_ttm_placement_init(struct amdgpu_device *adev,
 
 	if (domain & AMDGPU_GEM_DOMAIN_VRAM) {
 		unsigned visible_pfn = adev->mc.visible_vram_size >> PAGE_SHIFT;
-		unsigned lpfn = 0;
-
-		/* This forces a reallocation if the flag wasn't set before */
-		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
-			lpfn = adev->mc.real_vram_size >> PAGE_SHIFT;
 
 		places[c].fpfn = 0;
-		places[c].lpfn = lpfn;
+		places[c].lpfn = 0;
 		places[c].flags = TTM_PL_FLAG_WC | TTM_PL_FLAG_UNCACHED |
 			TTM_PL_FLAG_VRAM;
+
 		if (flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
 			places[c].lpfn = visible_pfn;
 		else
 			places[c].flags |= TTM_PL_FLAG_TOPDOWN;
+
+		if (flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)
+			places[c].flags |= TTM_PL_FLAG_CONTIGUOUS;
 		c++;
 	}
 
@@ -651,6 +650,10 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	if (WARN_ON_ONCE(min_offset > max_offset))
 		return -EINVAL;
 
+	/* A shared bo cannot be migrated to VRAM */
+	if (bo->prime_shared_count && (domain == AMDGPU_GEM_DOMAIN_VRAM))
+		return -EINVAL;
+
 	if (bo->pin_count) {
 		uint32_t mem_type = bo->tbo.mem.mem_type;
 
@@ -928,8 +931,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	size = bo->mem.num_pages << PAGE_SHIFT;
 	offset = bo->mem.start << PAGE_SHIFT;
 	/* TODO: figure out how to map scattered VRAM to the CPU */
-	if ((offset + size) <= adev->mc.visible_vram_size &&
-	    (abo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS))
+	if ((offset + size) <= adev->mc.visible_vram_size)
 		return 0;
 
 	/* Can't move a pinned BO to visible VRAM */
@@ -937,7 +939,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 		return -EINVAL;
 
 	/* hurrah the memory is not visible ! */
-	abo->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM);
 	lpfn =	adev->mc.visible_vram_size >> PAGE_SHIFT;
 	for (i = 0; i < abo->placement.num_placement; i++) {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index 4731015f6101..ed6e5799016e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -130,7 +130,7 @@ psp_cmd_submit_buf(struct psp_context *psp,
 
 	while (*((unsigned int *)psp->fence_buf) != index) {
 		msleep(1);
-	};
+	}
 
 	amdgpu_bo_free_kernel(&cmd_buf_bo,
 			      &cmd_buf_mc_addr,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
index a87de18160a8..ee9d0f346d75 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@@ -226,8 +226,8 @@ TRACE_EVENT(amdgpu_vm_bo_map,
 
 	    TP_fast_assign(
 			   __entry->bo = bo_va ? bo_va->bo : NULL;
-			   __entry->start = mapping->it.start;
-			   __entry->last = mapping->it.last;
+			   __entry->start = mapping->start;
+			   __entry->last = mapping->last;
 			   __entry->offset = mapping->offset;
 			   __entry->flags = mapping->flags;
 			   ),
@@ -250,8 +250,8 @@ TRACE_EVENT(amdgpu_vm_bo_unmap,
 
 	    TP_fast_assign(
 			   __entry->bo = bo_va->bo;
-			   __entry->start = mapping->it.start;
-			   __entry->last = mapping->it.last;
+			   __entry->start = mapping->start;
+			   __entry->last = mapping->last;
 			   __entry->offset = mapping->offset;
 			   __entry->flags = mapping->flags;
 			   ),
@@ -270,8 +270,8 @@ DECLARE_EVENT_CLASS(amdgpu_vm_mapping,
 			     ),
 
 	    TP_fast_assign(
-			   __entry->soffset = mapping->it.start;
-			   __entry->eoffset = mapping->it.last + 1;
+			   __entry->soffset = mapping->start;
+			   __entry->eoffset = mapping->last + 1;
 			   __entry->flags = mapping->flags;
 			   ),
 	    TP_printk("soffs=%010llx, eoffs=%010llx, flags=%08x",
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 244bb9aacf86..35d53a0d9ba6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -529,40 +529,12 @@ static int amdgpu_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_
 	case TTM_PL_TT:
 		break;
 	case TTM_PL_VRAM:
-		if (mem->start == AMDGPU_BO_INVALID_OFFSET)
-			return -EINVAL;
-
 		mem->bus.offset = mem->start << PAGE_SHIFT;
 		/* check if it's visible */
 		if ((mem->bus.offset + mem->bus.size) > adev->mc.visible_vram_size)
 			return -EINVAL;
 		mem->bus.base = adev->mc.aper_base;
 		mem->bus.is_iomem = true;
-#ifdef __alpha__
-		/*
-		 * Alpha: use bus.addr to hold the ioremap() return,
-		 * so we can modify bus.base below.
-		 */
-		if (mem->placement & TTM_PL_FLAG_WC)
-			mem->bus.addr =
-				ioremap_wc(mem->bus.base + mem->bus.offset,
-					   mem->bus.size);
-		else
-			mem->bus.addr =
-				ioremap_nocache(mem->bus.base + mem->bus.offset,
-						mem->bus.size);
-		if (!mem->bus.addr)
-			return -ENOMEM;
-
-		/*
-		 * Alpha: Use just the bus offset plus
-		 * the hose/domain memory base for bus.base.
-		 * It then can be used to build PTEs for VRAM
-		 * access, as done in ttm_bo_vm_fault().
-		 */
-		mem->bus.base = (mem->bus.base & 0x0ffffffffUL) +
-			adev->ddev->hose->dense_mem_base;
-#endif
 		break;
 	default:
 		return -EINVAL;
@@ -574,6 +546,18 @@ static void amdgpu_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_re
 {
 }
 
+static unsigned long amdgpu_ttm_io_mem_pfn(struct ttm_buffer_object *bo,
+					   unsigned long page_offset)
+{
+	struct drm_mm_node *mm = bo->mem.mm_node;
+	uint64_t size = mm->size;
+	uint64_t offset = page_offset;
+
+	page_offset = do_div(offset, size);
+	mm += offset;
+	return (bo->mem.bus.base >> PAGE_SHIFT) + mm->start + page_offset;
+}
+
 /*
  * TTM backend functions.
  */
@@ -1089,6 +1073,7 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.fault_reserve_notify = &amdgpu_bo_fault_reserve_notify,
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
+	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
 };
 
 int amdgpu_ttm_init(struct amdgpu_device *adev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
index 0b92dd0c1d70..2ca09f111f08 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@@ -741,10 +741,10 @@ static int amdgpu_uvd_cs_pass2(struct amdgpu_uvd_cs_ctx *ctx)
 
 	start = amdgpu_bo_gpu_offset(bo);
 
-	end = (mapping->it.last + 1 - mapping->it.start);
+	end = (mapping->last + 1 - mapping->start);
 	end = end * AMDGPU_GPU_PAGE_SIZE + start;
 
-	addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
+	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
 	start += addr;
 
 	amdgpu_set_ib_value(ctx->parser, ctx->ib_idx, ctx->data0,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
index 0184197eb000..c853400805d1 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@@ -595,13 +595,13 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 	}
 
 	if ((addr + (uint64_t)size) >
-	    ((uint64_t)mapping->it.last + 1) * AMDGPU_GPU_PAGE_SIZE) {
+	    (mapping->last + 1) * AMDGPU_GPU_PAGE_SIZE) {
 		DRM_ERROR("BO to small for addr 0x%010Lx %d %d\n",
 			  addr, lo, hi);
 		return -EINVAL;
 	}
 
-	addr -= ((uint64_t)mapping->it.start) * AMDGPU_GPU_PAGE_SIZE;
+	addr -= mapping->start * AMDGPU_GPU_PAGE_SIZE;
 	addr += amdgpu_bo_gpu_offset(bo);
 	addr -= ((uint64_t)size) * ((uint64_t)index);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
index ecef35a1fe33..ba8b8ae6234f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
@@ -122,9 +122,7 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
 
 	mutex_lock(&adev->virt.lock_kiq);
 	amdgpu_ring_alloc(ring, 32);
-	amdgpu_ring_emit_hdp_flush(ring);
 	amdgpu_ring_emit_rreg(ring, reg);
-	amdgpu_ring_emit_hdp_invalidate(ring);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_ring_commit(ring);
 	mutex_unlock(&adev->virt.lock_kiq);
@@ -150,9 +148,7 @@ void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
 
 	mutex_lock(&adev->virt.lock_kiq);
 	amdgpu_ring_alloc(ring, 32);
-	amdgpu_ring_emit_hdp_flush(ring);
 	amdgpu_ring_emit_wreg(ring, reg, v);
-	amdgpu_ring_emit_hdp_invalidate(ring);
 	amdgpu_fence_emit(ring, &f);
 	amdgpu_ring_commit(ring);
 	mutex_unlock(&adev->virt.lock_kiq);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 0235d7933efd..7ed5302b511a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -26,6 +26,7 @@
  *          Jerome Glisse
  */
 #include <linux/dma-fence-array.h>
+#include <linux/interval_tree_generic.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@@ -51,6 +52,15 @@
  * SI supports 16.
  */
 
+#define START(node) ((node)->start)
+#define LAST(node) ((node)->last)
+
+INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
+		     START, LAST, static, amdgpu_vm_it)
+
+#undef START
+#undef LAST
+
 /* Local structure. Encapsulate some VM table update parameters to reduce
  * the number of function parameters
  */
@@ -90,13 +100,14 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 	if (level == 0)
 		/* For the root directory */
 		return adev->vm_manager.max_pfn >>
-			(amdgpu_vm_block_size * adev->vm_manager.num_level);
+			(adev->vm_manager.block_size *
+			 adev->vm_manager.num_level);
 	else if (level == adev->vm_manager.num_level)
 		/* For the page tables on the leaves */
-		return AMDGPU_VM_PTE_COUNT;
+		return AMDGPU_VM_PTE_COUNT(adev);
 	else
 		/* Everything in between */
-		return 1 << amdgpu_vm_block_size;
+		return 1 << adev->vm_manager.block_size;
 }
 
 /**
@@ -261,7 +272,7 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  unsigned level)
 {
 	unsigned shift = (adev->vm_manager.num_level - level) *
-		amdgpu_vm_block_size;
+		adev->vm_manager.block_size;
 	unsigned pt_idx, from, to;
 	int r;
 
@@ -365,11 +376,19 @@ int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 	return amdgpu_vm_alloc_levels(adev, vm, &vm->root, saddr, eaddr, 0);
 }
 
-static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
-			      struct amdgpu_vm_id *id)
+/**
+ * amdgpu_vm_had_gpu_reset - check if reset occured since last use
+ *
+ * @adev: amdgpu_device pointer
+ * @id: VMID structure
+ *
+ * Check if GPU reset occured since last use of the VMID.
+ */
+static bool amdgpu_vm_had_gpu_reset(struct amdgpu_device *adev,
+				    struct amdgpu_vm_id *id)
 {
 	return id->current_gpu_reset_count !=
-		atomic_read(&adev->gpu_reset_counter) ? true : false;
+		atomic_read(&adev->gpu_reset_counter);
 }
 
 /**
@@ -455,7 +474,7 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		/* Check all the prerequisites to using this VMID */
 		if (!id)
 			continue;
-		if (amdgpu_vm_is_gpu_reset(adev, id))
+		if (amdgpu_vm_had_gpu_reset(adev, id))
 			continue;
 
 		if (atomic64_read(&id->owner) != vm->client_id)
@@ -483,7 +502,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		if (r)
 			goto error;
 
-		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 		vm->ids[ring->idx] = id;
 
@@ -504,9 +522,6 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	if (r)
 		goto error;
 
-	dma_fence_put(id->first);
-	id->first = dma_fence_get(fence);
-
 	dma_fence_put(id->last_flush);
 	id->last_flush = NULL;
 
@@ -557,8 +572,8 @@ static u64 amdgpu_vm_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
 {
 	u64 addr = mc_addr;
 
-	if (adev->mc.mc_funcs && adev->mc.mc_funcs->adjust_mc_addr)
-		addr = adev->mc.mc_funcs->adjust_mc_addr(adev, addr);
+	if (adev->gart.gart_funcs->adjust_mc_addr)
+		addr = adev->gart.gart_funcs->adjust_mc_addr(adev, addr);
 
 	return addr;
 }
@@ -583,60 +598,62 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 		id->gws_size != job->gws_size ||
 		id->oa_base != job->oa_base ||
 		id->oa_size != job->oa_size);
+	bool vm_flush_needed = job->vm_needs_flush ||
+		amdgpu_vm_ring_has_compute_vm_bug(ring);
+	unsigned patch_offset = 0;
 	int r;
 
-	if (job->vm_needs_flush || gds_switch_needed ||
-		amdgpu_vm_is_gpu_reset(adev, id) ||
-		amdgpu_vm_ring_has_compute_vm_bug(ring)) {
-		unsigned patch_offset = 0;
+	if (amdgpu_vm_had_gpu_reset(adev, id)) {
+		gds_switch_needed = true;
+		vm_flush_needed = true;
+	}
 
-		if (ring->funcs->init_cond_exec)
-			patch_offset = amdgpu_ring_init_cond_exec(ring);
+	if (!vm_flush_needed && !gds_switch_needed)
+		return 0;
 
-		if (ring->funcs->emit_pipeline_sync &&
-			(job->vm_needs_flush || gds_switch_needed ||
-			amdgpu_vm_ring_has_compute_vm_bug(ring)))
-			amdgpu_ring_emit_pipeline_sync(ring);
+	if (ring->funcs->init_cond_exec)
+		patch_offset = amdgpu_ring_init_cond_exec(ring);
 
-		if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
-			amdgpu_vm_is_gpu_reset(adev, id))) {
-			struct dma_fence *fence;
-			u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
+	if (ring->funcs->emit_pipeline_sync)
+		amdgpu_ring_emit_pipeline_sync(ring);
 
-			trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
-			amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
+	if (ring->funcs->emit_vm_flush && vm_flush_needed) {
+		u64 pd_addr = amdgpu_vm_adjust_mc_addr(adev, job->vm_pd_addr);
+		struct dma_fence *fence;
 
-			r = amdgpu_fence_emit(ring, &fence);
-			if (r)
-				return r;
+		trace_amdgpu_vm_flush(pd_addr, ring->idx, job->vm_id);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, pd_addr);
 
-			mutex_lock(&adev->vm_manager.lock);
-			dma_fence_put(id->last_flush);
-			id->last_flush = fence;
-			mutex_unlock(&adev->vm_manager.lock);
-		}
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;
 
-		if (gds_switch_needed) {
-			id->gds_base = job->gds_base;
-			id->gds_size = job->gds_size;
-			id->gws_base = job->gws_base;
-			id->gws_size = job->gws_size;
-			id->oa_base = job->oa_base;
-			id->oa_size = job->oa_size;
-			amdgpu_ring_emit_gds_switch(ring, job->vm_id,
-							job->gds_base, job->gds_size,
-							job->gws_base, job->gws_size,
-							job->oa_base, job->oa_size);
-		}
+		mutex_lock(&adev->vm_manager.lock);
+		dma_fence_put(id->last_flush);
+		id->last_flush = fence;
+		mutex_unlock(&adev->vm_manager.lock);
+	}
 
-		if (ring->funcs->patch_cond_exec)
-			amdgpu_ring_patch_cond_exec(ring, patch_offset);
+	if (gds_switch_needed) {
+		id->gds_base = job->gds_base;
+		id->gds_size = job->gds_size;
+		id->gws_base = job->gws_base;
+		id->gws_size = job->gws_size;
+		id->oa_base = job->oa_base;
+		id->oa_size = job->oa_size;
+		amdgpu_ring_emit_gds_switch(ring, job->vm_id, job->gds_base,
+					    job->gds_size, job->gws_base,
+					    job->gws_size, job->oa_base,
+					    job->oa_size);
+	}
 
-		/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
-		if (ring->funcs->emit_switch_buffer) {
-			amdgpu_ring_emit_switch_buffer(ring);
-			amdgpu_ring_emit_switch_buffer(ring);
-		}
+	if (ring->funcs->patch_cond_exec)
+		amdgpu_ring_patch_cond_exec(ring, patch_offset);
+
+	/* the double SWITCH_BUFFER here *cannot* be skipped by COND_EXEC */
+	if (ring->funcs->emit_switch_buffer) {
+		amdgpu_ring_emit_switch_buffer(ring);
+		amdgpu_ring_emit_switch_buffer(ring);
 	}
 	return 0;
 }
@@ -960,7 +977,7 @@ static struct amdgpu_bo *amdgpu_vm_get_pt(struct amdgpu_pte_update_params *p,
 	unsigned idx, level = p->adev->vm_manager.num_level;
 
 	while (entry->entries) {
-		idx = addr >> (amdgpu_vm_block_size * level--);
+		idx = addr >> (p->adev->vm_manager.block_size * level--);
 		idx %= amdgpu_bo_size(entry->bo) / 8;
 		entry = &entry->entries[idx];
 	}
@@ -987,7 +1004,8 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
 				  uint64_t dst, uint64_t flags)
 {
-	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;
+	struct amdgpu_device *adev = params->adev;
+	const uint64_t mask = AMDGPU_VM_PTE_COUNT(adev) - 1;
 
 	uint64_t cur_pe_start, cur_nptes, cur_dst;
 	uint64_t addr; /* next GPU address to be updated */
@@ -1011,7 +1029,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 	if ((addr & ~mask) == (end & ~mask))
 		nptes = end - addr;
 	else
-		nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+		nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
 	cur_pe_start = amdgpu_bo_gpu_offset(pt);
 	cur_pe_start += (addr & mask) * 8;
@@ -1039,7 +1057,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
-			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+			nptes = AMDGPU_VM_PTE_COUNT(adev) - (addr & mask);
 
 		next_pe_start = amdgpu_bo_gpu_offset(pt);
 		next_pe_start += (addr & mask) * 8;
@@ -1186,7 +1204,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	 * reserve space for one command every (1 << BLOCK_SIZE)
 	 *  entries or 2k dwords (whatever is smaller)
 	 */
-	ncmds = (nptes >> min(amdgpu_vm_block_size, 11)) + 1;
+	ncmds = (nptes >> min(adev->vm_manager.block_size, 11u)) + 1;
 
 	/* padding, etc. */
 	ndw = 64;
@@ -1301,7 +1319,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct drm_mm_node *nodes,
 				      struct dma_fence **fence)
 {
-	uint64_t pfn, src = 0, start = mapping->it.start;
+	uint64_t pfn, src = 0, start = mapping->start;
 	int r;
 
 	/* normally,bo_va->flags only contians READABLE and WIRTEABLE bit go here
@@ -1353,7 +1371,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 		addr += pfn << PAGE_SHIFT;
 
-		last = min((uint64_t)mapping->it.last, start + max_entries - 1);
+		last = min((uint64_t)mapping->last, start + max_entries - 1);
 		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
 						src, pages_addr, vm,
 						start, last, flags, addr,
@@ -1368,7 +1386,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		}
 		start = last + 1;
 
-	} while (unlikely(start != mapping->it.last + 1));
+	} while (unlikely(start != mapping->last + 1));
 
 	return 0;
 }
@@ -1518,7 +1536,7 @@ static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
 		if (fence)
 			dma_fence_wait(fence, false);
 
-		amdgpu_vm_prt_put(cb->adev);
+		amdgpu_vm_prt_put(adev);
 	} else {
 		cb->adev = adev;
 		if (!fence || dma_fence_add_callback(fence, &cb->cb,
@@ -1724,9 +1742,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		     uint64_t saddr, uint64_t offset,
 		     uint64_t size, uint64_t flags)
 {
-	struct amdgpu_bo_va_mapping *mapping;
+	struct amdgpu_bo_va_mapping *mapping, *tmp;
 	struct amdgpu_vm *vm = bo_va->vm;
-	struct interval_tree_node *it;
 	uint64_t eaddr;
 
 	/* validate the parameters */
@@ -1743,14 +1760,12 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-	if (it) {
-		struct amdgpu_bo_va_mapping *tmp;
-		tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
+	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+	if (tmp) {
 		/* bo and tmp overlap, invalid addr */
 		dev_err(adev->dev, "bo %p va 0x%010Lx-0x%010Lx conflict with "
-			"0x%010lx-0x%010lx\n", bo_va->bo, saddr, eaddr,
-			tmp->it.start, tmp->it.last + 1);
+			"0x%010Lx-0x%010Lx\n", bo_va->bo, saddr, eaddr,
+			tmp->start, tmp->last + 1);
 		return -EINVAL;
 	}
 
@@ -1759,13 +1774,13 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 		return -ENOMEM;
 
 	INIT_LIST_HEAD(&mapping->list);
-	mapping->it.start = saddr;
-	mapping->it.last = eaddr;
+	mapping->start = saddr;
+	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
 	list_add(&mapping->list, &bo_va->invalids);
-	interval_tree_insert(&mapping->it, &vm->va);
+	amdgpu_vm_it_insert(mapping, &vm->va);
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
@@ -1823,13 +1838,13 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 	eaddr /= AMDGPU_GPU_PAGE_SIZE;
 
-	mapping->it.start = saddr;
-	mapping->it.last = eaddr;
+	mapping->start = saddr;
+	mapping->last = eaddr;
 	mapping->offset = offset;
 	mapping->flags = flags;
 
 	list_add(&mapping->list, &bo_va->invalids);
-	interval_tree_insert(&mapping->it, &vm->va);
+	amdgpu_vm_it_insert(mapping, &vm->va);
 
 	if (flags & AMDGPU_PTE_PRT)
 		amdgpu_vm_prt_get(adev);
@@ -1860,7 +1875,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	saddr /= AMDGPU_GPU_PAGE_SIZE;
 
 	list_for_each_entry(mapping, &bo_va->valids, list) {
-		if (mapping->it.start == saddr)
+		if (mapping->start == saddr)
 			break;
 	}
 
@@ -1868,7 +1883,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 		valid = false;
 
 		list_for_each_entry(mapping, &bo_va->invalids, list) {
-			if (mapping->it.start == saddr)
+			if (mapping->start == saddr)
 				break;
 		}
 
@@ -1877,7 +1892,7 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 	}
 
 	list_del(&mapping->list);
-	interval_tree_remove(&mapping->it, &vm->va);
+	amdgpu_vm_it_remove(mapping, &vm->va);
 	trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 
 	if (valid)
@@ -1905,7 +1920,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				uint64_t saddr, uint64_t size)
 {
 	struct amdgpu_bo_va_mapping *before, *after, *tmp, *next;
-	struct interval_tree_node *it;
 	LIST_HEAD(removed);
 	uint64_t eaddr;
 
@@ -1927,43 +1941,42 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&after->list);
 
 	/* Now gather all removed mappings */
-	it = interval_tree_iter_first(&vm->va, saddr, eaddr);
-	while (it) {
-		tmp = container_of(it, struct amdgpu_bo_va_mapping, it);
-		it = interval_tree_iter_next(it, saddr, eaddr);
-
+	tmp = amdgpu_vm_it_iter_first(&vm->va, saddr, eaddr);
+	while (tmp) {
 		/* Remember mapping split at the start */
-		if (tmp->it.start < saddr) {
-			before->it.start = tmp->it.start;
-			before->it.last = saddr - 1;
+		if (tmp->start < saddr) {
+			before->start = tmp->start;
+			before->last = saddr - 1;
 			before->offset = tmp->offset;
 			before->flags = tmp->flags;
 			list_add(&before->list, &tmp->list);
 		}
 
 		/* Remember mapping split at the end */
-		if (tmp->it.last > eaddr) {
-			after->it.start = eaddr + 1;
-			after->it.last = tmp->it.last;
+		if (tmp->last > eaddr) {
+			after->start = eaddr + 1;
+			after->last = tmp->last;
 			after->offset = tmp->offset;
-			after->offset += after->it.start - tmp->it.start;
+			after->offset += after->start - tmp->start;
 			after->flags = tmp->flags;
 			list_add(&after->list, &tmp->list);
 		}
 
 		list_del(&tmp->list);
 		list_add(&tmp->list, &removed);
+
+		tmp = amdgpu_vm_it_iter_next(tmp, saddr, eaddr);
 	}
 
 	/* And free them up */
 	list_for_each_entry_safe(tmp, next, &removed, list) {
-		interval_tree_remove(&tmp->it, &vm->va);
+		amdgpu_vm_it_remove(tmp, &vm->va);
 		list_del(&tmp->list);
 
-		if (tmp->it.start < saddr)
-		    tmp->it.start = saddr;
-		if (tmp->it.last > eaddr)
-		    tmp->it.last = eaddr;
+		if (tmp->start < saddr)
+		    tmp->start = saddr;
+		if (tmp->last > eaddr)
+		    tmp->last = eaddr;
 
 		list_add(&tmp->list, &vm->freed);
 		trace_amdgpu_vm_bo_unmap(NULL, tmp);
@@ -1971,7 +1984,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
 	/* Insert partial mapping before the range */
 	if (!list_empty(&before->list)) {
-		interval_tree_insert(&before->it, &vm->va);
+		amdgpu_vm_it_insert(before, &vm->va);
 		if (before->flags & AMDGPU_PTE_PRT)
 			amdgpu_vm_prt_get(adev);
 	} else {
@@ -1980,7 +1993,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 
 	/* Insert partial mapping after the range */
 	if (!list_empty(&after->list)) {
-		interval_tree_insert(&after->it, &vm->va);
+		amdgpu_vm_it_insert(after, &vm->va);
 		if (after->flags & AMDGPU_PTE_PRT)
 			amdgpu_vm_prt_get(adev);
 	} else {
@@ -2014,13 +2027,13 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 
 	list_for_each_entry_safe(mapping, next, &bo_va->valids, list) {
 		list_del(&mapping->list);
-		interval_tree_remove(&mapping->it, &vm->va);
+		amdgpu_vm_it_remove(mapping, &vm->va);
 		trace_amdgpu_vm_bo_unmap(bo_va, mapping);
 		list_add(&mapping->list, &vm->freed);
 	}
 	list_for_each_entry_safe(mapping, next, &bo_va->invalids, list) {
 		list_del(&mapping->list);
-		interval_tree_remove(&mapping->it, &vm->va);
+		amdgpu_vm_it_remove(mapping, &vm->va);
 		amdgpu_vm_free_mapping(adev, vm, mapping,
 				       bo_va->last_pt_update);
 	}
@@ -2051,6 +2064,44 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	}
 }
 
+static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
+{
+	/* Total bits covered by PD + PTs */
+	unsigned bits = ilog2(vm_size) + 18;
+
+	/* Make sure the PD is 4K in size up to 8GB address space.
+	   Above that split equal between PD and PTs */
+	if (vm_size <= 8)
+		return (bits - 9);
+	else
+		return ((bits + 3) / 2);
+}
+
+/**
+ * amdgpu_vm_adjust_size - adjust vm size and block size
+ *
+ * @adev: amdgpu_device pointer
+ * @vm_size: the default vm size if it's set auto
+ */
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size)
+{
+	/* adjust vm size firstly */
+	if (amdgpu_vm_size == -1)
+		adev->vm_manager.vm_size = vm_size;
+	else
+		adev->vm_manager.vm_size = amdgpu_vm_size;
+
+	/* block size depends on vm size */
+	if (amdgpu_vm_block_size == -1)
+		adev->vm_manager.block_size =
+			amdgpu_vm_get_block_size(adev->vm_manager.vm_size);
+	else
+		adev->vm_manager.block_size = amdgpu_vm_block_size;
+
+	DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
+		adev->vm_manager.vm_size, adev->vm_manager.block_size);
+}
+
 /**
  * amdgpu_vm_init - initialize a vm instance
  *
@@ -2062,7 +2113,7 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
 	const unsigned align = min(AMDGPU_VM_PTB_ALIGN_SIZE,
-		AMDGPU_VM_PTE_COUNT * 8);
+		AMDGPU_VM_PTE_COUNT(adev) * 8);
 	unsigned ring_instance;
 	struct amdgpu_ring *ring;
 	struct amd_sched_rq *rq;
@@ -2162,9 +2213,9 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	if (!RB_EMPTY_ROOT(&vm->va)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
 	}
-	rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, it.rb) {
+	rbtree_postorder_for_each_entry_safe(mapping, tmp, &vm->va, rb) {
 		list_del(&mapping->list);
-		interval_tree_remove(&mapping->it, &vm->va);
+		amdgpu_vm_it_remove(mapping, &vm->va);
 		kfree(mapping);
 	}
 	list_for_each_entry_safe(mapping, tmp, &vm->freed, list) {
@@ -2227,7 +2278,6 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	for (i = 0; i < AMDGPU_NUM_VM; ++i) {
 		struct amdgpu_vm_id *id = &adev->vm_manager.ids[i];
 
-		dma_fence_put(adev->vm_manager.ids[i].first);
 		amdgpu_sync_free(&adev->vm_manager.ids[i].active);
 		dma_fence_put(id->flushed_updates);
 		dma_fence_put(id->last_flush);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index fbe17bf73a00..d9e57290dc71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -45,7 +45,7 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_MAX_UPDATE_SIZE	0x3FFFF
 
 /* number of entries in page table */
-#define AMDGPU_VM_PTE_COUNT (1 << amdgpu_vm_block_size)
+#define AMDGPU_VM_PTE_COUNT(adev) (1 << (adev)->vm_manager.block_size)
 
 /* PTBs (Page Table Blocks) need to be aligned to 32K */
 #define AMDGPU_VM_PTB_ALIGN_SIZE   32768
@@ -76,6 +76,14 @@ struct amdgpu_bo_list_entry;
 #define AMDGPU_VM_FAULT_STOP_FIRST	1
 #define AMDGPU_VM_FAULT_STOP_ALWAYS	2
 
+/* max number of VMHUB */
+#define AMDGPU_MAX_VMHUBS			2
+#define AMDGPU_GFXHUB				0
+#define AMDGPU_MMHUB				1
+
+/* hardcode that limit for now */
+#define AMDGPU_VA_RESERVED_SIZE			(8 << 20)
+
 struct amdgpu_vm_pt {
 	struct amdgpu_bo	*bo;
 	uint64_t		addr;
@@ -123,7 +131,6 @@ struct amdgpu_vm {
 
 struct amdgpu_vm_id {
 	struct list_head	list;
-	struct dma_fence		*first;
 	struct amdgpu_sync	active;
 	struct dma_fence		*last_flush;
 	atomic64_t		owner;
@@ -155,6 +162,8 @@ struct amdgpu_vm_manager {
 
 	uint64_t				max_pfn;
 	uint32_t				num_level;
+	uint64_t				vm_size;
+	uint32_t				block_size;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
 	/* is vm enabled? */
@@ -225,5 +234,6 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				uint64_t saddr, uint64_t size);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
+void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint64_t vm_size);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
index 9e577e3d3147..a4831fe0223b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vram_mgr.c
@@ -93,7 +93,6 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 			       const struct ttm_place *place,
 			       struct ttm_mem_reg *mem)
 {
-	struct amdgpu_bo *bo = container_of(tbo, struct amdgpu_bo, tbo);
 	struct amdgpu_vram_mgr *mgr = man->priv;
 	struct drm_mm *mm = &mgr->mm;
 	struct drm_mm_node *nodes;
@@ -106,8 +105,8 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (!lpfn)
 		lpfn = man->size;
 
-	if (bo->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS ||
-	    place->lpfn || amdgpu_vram_page_split == -1) {
+	if (place->flags & TTM_PL_FLAG_CONTIGUOUS ||
+	    amdgpu_vram_page_split == -1) {
 		pages_per_node = ~0ul;
 		num_nodes = 1;
 	} else {
@@ -124,12 +123,14 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 	if (place->flags & TTM_PL_FLAG_TOPDOWN)
 		mode = DRM_MM_INSERT_HIGH;
 
+	mem->start = 0;
 	pages_left = mem->num_pages;
 
 	spin_lock(&mgr->lock);
 	for (i = 0; i < num_nodes; ++i) {
 		unsigned long pages = min(pages_left, pages_per_node);
 		uint32_t alignment = mem->page_alignment;
+		unsigned long start;
 
 		if (pages == pages_per_node)
 			alignment = pages_per_node;
@@ -141,11 +142,19 @@ static int amdgpu_vram_mgr_new(struct ttm_mem_type_manager *man,
 		if (unlikely(r))
 			goto error;
 
+		/* Calculate a virtual BO start address to easily check if
+		 * everything is CPU accessible.
+		 */
+		start = nodes[i].start + nodes[i].size;
+		if (start > mem->num_pages)
+			start -= mem->num_pages;
+		else
+			start = 0;
+		mem->start = max(mem->start, start);
 		pages_left -= pages;
 	}
 	spin_unlock(&mgr->lock);
 
-	mem->start = num_nodes == 1 ? nodes[0].start : AMDGPU_BO_INVALID_OFFSET;
 	mem->mm_node = nodes;
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
index f525ae4e0576..ba98d35340a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@@ -1090,23 +1090,10 @@ static u32 dce_v10_0_latency_watermark(struct dce10_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -1214,14 +1201,14 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 {
 	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
 	struct dce10_wm_params wm_low, wm_high;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -1236,7 +1223,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -1275,7 +1262,7 @@ static void dce_v10_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2631,7 +2618,8 @@ static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
 }
 
 static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				    u16 *blue, uint32_t size)
+				    u16 *blue, uint32_t size,
+				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
index 3eac27f24d94..e59bc42df18c 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@@ -1059,23 +1059,10 @@ static u32 dce_v11_0_latency_watermark(struct dce10_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -1183,14 +1170,14 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 {
 	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
 	struct dce10_wm_params wm_low, wm_high;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -1205,7 +1192,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -1244,7 +1231,7 @@ static void dce_v11_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2651,7 +2638,8 @@ static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
 }
 
 static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				    u16 *blue, uint32_t size)
+				    u16 *blue, uint32_t size,
+				    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
index 838cf1a778f2..307269bda4fa 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@@ -861,23 +861,10 @@ static u32 dce_v6_0_latency_watermark(struct dce6_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -986,7 +973,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
 	struct dce6_wm_params wm_low, wm_high;
 	u32 dram_channels;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 priority_a_mark = 0, priority_b_mark = 0;
@@ -996,8 +983,8 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 	fixed20_12 a, b, c;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 		priority_a_cnt = 0;
 		priority_b_cnt = 0;
 
@@ -1016,7 +1003,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -1043,7 +1030,7 @@ static void dce_v6_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -1998,7 +1985,8 @@ static void dce_v6_0_cursor_reset(struct drm_crtc *crtc)
 }
 
 static int dce_v6_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				   u16 *blue, uint32_t size)
+				   u16 *blue, uint32_t size,
+				   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
index 1b0717b11efe..6df7a28e8aac 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@@ -974,23 +974,10 @@ static u32 dce_v8_0_latency_watermark(struct dce8_wm_params *wm)
 	a.full = dfixed_const(available_bandwidth);
 	b.full = dfixed_const(wm->num_heads);
 	a.full = dfixed_div(a, b);
+	tmp = div_u64((u64) dmif_size * (u64) wm->disp_clk, mc_latency + 512);
+	tmp = min(dfixed_trunc(a), tmp);
 
-	b.full = dfixed_const(mc_latency + 512);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(b, c);
-
-	c.full = dfixed_const(dmif_size);
-	b.full = dfixed_div(c, b);
-
-	tmp = min(dfixed_trunc(a), dfixed_trunc(b));
-
-	b.full = dfixed_const(1000);
-	c.full = dfixed_const(wm->disp_clk);
-	b.full = dfixed_div(c, b);
-	c.full = dfixed_const(wm->bytes_per_pixel);
-	b.full = dfixed_mul(b, c);
-
-	lb_fill_bw = min(tmp, dfixed_trunc(b));
+	lb_fill_bw = min(tmp, wm->disp_clk * wm->bytes_per_pixel / 1000);
 
 	a.full = dfixed_const(max_src_lines_per_dst_line * wm->src_width * wm->bytes_per_pixel);
 	b.full = dfixed_const(1000);
@@ -1098,14 +1085,14 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 {
 	struct drm_display_mode *mode = &amdgpu_crtc->base.mode;
 	struct dce8_wm_params wm_low, wm_high;
-	u32 pixel_period;
+	u32 active_time;
 	u32 line_time = 0;
 	u32 latency_watermark_a = 0, latency_watermark_b = 0;
 	u32 tmp, wm_mask, lb_vblank_lead_lines = 0;
 
 	if (amdgpu_crtc->base.enabled && num_heads && mode) {
-		pixel_period = 1000000 / (u32)mode->clock;
-		line_time = min((u32)mode->crtc_htotal * pixel_period, (u32)65535);
+		active_time = 1000000UL * (u32)mode->crtc_hdisplay / (u32)mode->clock;
+		line_time = min((u32) (1000000UL * (u32)mode->crtc_htotal / (u32)mode->clock), (u32)65535);
 
 		/* watermark for high clocks */
 		if (adev->pm.dpm_enabled) {
@@ -1120,7 +1107,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_high.disp_clk = mode->clock;
 		wm_high.src_width = mode->crtc_hdisplay;
-		wm_high.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_high.active_time = active_time;
 		wm_high.blank_time = line_time - wm_high.active_time;
 		wm_high.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -1159,7 +1146,7 @@ static void dce_v8_0_program_watermarks(struct amdgpu_device *adev,
 
 		wm_low.disp_clk = mode->clock;
 		wm_low.src_width = mode->crtc_hdisplay;
-		wm_low.active_time = mode->crtc_hdisplay * pixel_period;
+		wm_low.active_time = active_time;
 		wm_low.blank_time = line_time - wm_low.active_time;
 		wm_low.interlaced = false;
 		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
@@ -2482,7 +2469,8 @@ static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
 }
 
 static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				   u16 *blue, uint32_t size)
+				   u16 *blue, uint32_t size,
+				   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index 5c51f9a97811..81a24b6b4846 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -165,7 +165,8 @@ static void dce_virtual_bandwidth_update(struct amdgpu_device *adev)
 }
 
 static int dce_virtual_crtc_gamma_set(struct drm_crtc *crtc, u16 *red,
-				      u16 *green, u16 *blue, uint32_t size)
+				      u16 *green, u16 *blue, uint32_t size,
+				      struct drm_modeset_acquire_ctx *ctx)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index e0fa0d30e162..dad8a4cd1b37 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4565,6 +4565,7 @@ static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
 		WREG32(mmCP_MEC_CNTL, (CP_MEC_CNTL__MEC_ME1_HALT_MASK | CP_MEC_CNTL__MEC_ME2_HALT_MASK));
 		for (i = 0; i < adev->gfx.num_compute_rings; i++)
 			adev->gfx.compute_ring[i].ready = false;
+		adev->gfx.kiq.ring.ready = false;
 	}
 	udelay(50);
 }
@@ -4721,14 +4722,10 @@ static int gfx_v8_0_mqd_init(struct amdgpu_ring *ring)
 	mqd->cp_hqd_eop_control = tmp;
 
 	/* enable doorbell? */
-	tmp = RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL);
-
-	if (ring->use_doorbell)
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					 DOORBELL_EN, 1);
-	else
-		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
-					 DOORBELL_EN, 0);
+	tmp = REG_SET_FIELD(RREG32(mmCP_HQD_PQ_DOORBELL_CONTROL),
+			    CP_HQD_PQ_DOORBELL_CONTROL,
+			    DOORBELL_EN,
+			    ring->use_doorbell ? 1 : 0);
 
 	mqd->cp_hqd_pq_doorbell_control = tmp;
 
@@ -4816,13 +4813,10 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct vi_mqd *mqd = ring->mqd_ptr;
-	uint32_t tmp;
 	int j;
 
 	/* disable wptr polling */
-	tmp = RREG32(mmCP_PQ_WPTR_POLL_CNTL);
-	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-	WREG32(mmCP_PQ_WPTR_POLL_CNTL, tmp);
+	WREG32_FIELD(CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	WREG32(mmCP_HQD_EOP_BASE_ADDR, mqd->cp_hqd_eop_base_addr_lo);
 	WREG32(mmCP_HQD_EOP_BASE_ADDR_HI, mqd->cp_hqd_eop_base_addr_hi);
@@ -4834,10 +4828,10 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, mqd->cp_hqd_pq_doorbell_control);
 
 	/* disable the queue if it's active */
-	if (RREG32(mmCP_HQD_ACTIVE) & 1) {
+	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
 		WREG32(mmCP_HQD_DEQUEUE_REQUEST, 1);
 		for (j = 0; j < adev->usec_timeout; j++) {
-			if (!(RREG32(mmCP_HQD_ACTIVE) & 1))
+			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
 				break;
 			udelay(1);
 		}
@@ -4894,11 +4888,8 @@ static int gfx_v8_0_kiq_init_register(struct amdgpu_ring *ring)
 	/* activate the queue */
 	WREG32(mmCP_HQD_ACTIVE, mqd->cp_hqd_active);
 
-	if (ring->use_doorbell) {
-		tmp = RREG32(mmCP_PQ_STATUS);
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(mmCP_PQ_STATUS, tmp);
-	}
+	if (ring->use_doorbell)
+		WREG32_FIELD(CP_PQ_STATUS, DOORBELL_ENABLE, 1);
 
 	return 0;
 }
@@ -5471,19 +5462,18 @@ static void gfx_v8_0_inactive_hqd(struct amdgpu_device *adev,
 {
 	int i;
 
+	mutex_lock(&adev->srbm_mutex);
 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 	if (RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK) {
-		u32 tmp;
-		tmp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
-		tmp = REG_SET_FIELD(tmp, CP_HQD_DEQUEUE_REQUEST,
-				    DEQUEUE_REQ, 2);
-		WREG32(mmCP_HQD_DEQUEUE_REQUEST, tmp);
+		WREG32_FIELD(CP_HQD_DEQUEUE_REQUEST, DEQUEUE_REQ, 2);
 		for (i = 0; i < adev->usec_timeout; i++) {
 			if (!(RREG32(mmCP_HQD_ACTIVE) & CP_HQD_ACTIVE__ACTIVE_MASK))
 				break;
 			udelay(1);
 		}
 	}
+	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 }
 
 static int gfx_v8_0_pre_soft_reset(void *handle)
@@ -5589,11 +5579,13 @@ static int gfx_v8_0_soft_reset(void *handle)
 static void gfx_v8_0_init_hqd(struct amdgpu_device *adev,
 			      struct amdgpu_ring *ring)
 {
+	mutex_lock(&adev->srbm_mutex);
 	vi_srbm_select(adev, ring->me, ring->pipe, ring->queue, 0);
 	WREG32(mmCP_HQD_DEQUEUE_REQUEST, 0);
 	WREG32(mmCP_HQD_PQ_RPTR, 0);
 	WREG32(mmCP_HQD_PQ_WPTR, 0);
 	vi_srbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
 }
 
 static int gfx_v8_0_post_soft_reset(void *handle)
@@ -6986,40 +6978,24 @@ static int gfx_v8_0_kiq_set_interrupt_state(struct amdgpu_device *adev,
 					    unsigned int type,
 					    enum amdgpu_interrupt_state state)
 {
-	uint32_t tmp, target;
 	struct amdgpu_ring *ring = &(adev->gfx.kiq.ring);
 
 	BUG_ON(ring->funcs->type != AMDGPU_RING_TYPE_KIQ);
 
-	if (ring->me == 1)
-		target = mmCP_ME1_PIPE0_INT_CNTL;
-	else
-		target = mmCP_ME2_PIPE0_INT_CNTL;
-	target += ring->pipe;
-
 	switch (type) {
 	case AMDGPU_CP_KIQ_IRQ_DRIVER0:
-		if (state == AMDGPU_IRQ_STATE_DISABLE) {
-			tmp = RREG32(mmCPC_INT_CNTL);
-			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
-						 GENERIC2_INT_ENABLE, 0);
-			WREG32(mmCPC_INT_CNTL, tmp);
-
-			tmp = RREG32(target);
-			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
-						 GENERIC2_INT_ENABLE, 0);
-			WREG32(target, tmp);
-		} else {
-			tmp = RREG32(mmCPC_INT_CNTL);
-			tmp = REG_SET_FIELD(tmp, CPC_INT_CNTL,
-						 GENERIC2_INT_ENABLE, 1);
-			WREG32(mmCPC_INT_CNTL, tmp);
-
-			tmp = RREG32(target);
-			tmp = REG_SET_FIELD(tmp, CP_ME2_PIPE0_INT_CNTL,
-						 GENERIC2_INT_ENABLE, 1);
-			WREG32(target, tmp);
-		}
+		WREG32_FIELD(CPC_INT_CNTL, GENERIC2_INT_ENABLE,
+			     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+		if (ring->me == 1)
+			WREG32_FIELD_OFFSET(CP_ME1_PIPE0_INT_CNTL,
+				     ring->pipe,
+				     GENERIC2_INT_ENABLE,
+				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
+		else
+			WREG32_FIELD_OFFSET(CP_ME2_PIPE0_INT_CNTL,
+				     ring->pipe,
+				     GENERIC2_INT_ENABLE,
+				     state == AMDGPU_IRQ_STATE_DISABLE ? 0 : 1);
 		break;
 	default:
 		BUG(); /* kiq only support GENERIC2_INT now */
@@ -7159,8 +7135,6 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
-	.emit_hdp_flush = gfx_v8_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v8_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.test_ib = gfx_v8_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index 669bb98fc45d..a447b70841c9 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -1288,9 +1288,7 @@ static void gfx_v9_0_gpu_init(struct amdgpu_device *adev)
 	u32 tmp;
 	int i;
 
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL));
-	tmp = REG_SET_FIELD(tmp, GRBM_CNTL, READ_TIMEOUT, 0xff);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_CNTL), tmp);
+	WREG32_FIELD15(GC, 0, GRBM_CNTL, READ_TIMEOUT, 0xff);
 
 	gfx_v9_0_tiling_mode_table_init(adev);
 
@@ -1395,13 +1393,9 @@ void gfx_v9_0_rlc_stop(struct amdgpu_device *adev)
 
 static void gfx_v9_0_rlc_reset(struct amdgpu_device *adev)
 {
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET));
-
-	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
 	udelay(50);
-	tmp = REG_SET_FIELD(tmp, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_SOFT_RESET), tmp);
+	WREG32_FIELD15(GC, 0, GRBM_SOFT_RESET, SOFT_RESET_RLC, 0);
 	udelay(50);
 }
 
@@ -1410,10 +1404,8 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 #ifdef AMDGPU_RLC_DEBUG_RETRY
 	u32 rlc_ucode_ver;
 #endif
-	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL));
 
-	tmp = REG_SET_FIELD(tmp, RLC_CNTL, RLC_ENABLE_F32, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL), tmp);
+	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
 
 	/* carrizo do enable cp interrupt after cp inited */
 	if (!(adev->flags & AMD_IS_APU))
@@ -1497,14 +1489,10 @@ static void gfx_v9_0_cp_gfx_enable(struct amdgpu_device *adev, bool enable)
 	int i;
 	u32 tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL));
 
-	if (enable) {
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 0);
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 0);
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 0);
-	} else {
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, 1);
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, 1);
-		tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, ME_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, PFP_HALT, enable ? 0 : 1);
+	tmp = REG_SET_FIELD(tmp, CP_ME_CNTL, CE_HALT, enable ? 0 : 1);
+	if (!enable) {
 		for (i = 0; i < adev->gfx.num_gfx_rings; i++)
 			adev->gfx.gfx_ring[i].ready = false;
 	}
@@ -2020,13 +2008,10 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct v9_mqd *mqd = ring->mqd_ptr;
-	uint32_t tmp;
 	int j;
 
 	/* disable wptr polling */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
-	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
+	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_BASE_ADDR),
 	       mqd->cp_hqd_eop_base_addr_lo);
@@ -2118,11 +2103,8 @@ static int gfx_v9_0_kiq_init_register(struct amdgpu_ring *ring)
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE),
 	       mqd->cp_hqd_active);
 
-	if (ring->use_doorbell) {
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
-	}
+	if (ring->use_doorbell)
+		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
 
 	return 0;
 }
@@ -2366,177 +2348,6 @@ static int gfx_v9_0_wait_for_idle(void *handle)
 	return -ETIMEDOUT;
 }
 
-static void gfx_v9_0_print_status(void *handle)
-{
-	int i;
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-
-	dev_info(adev->dev, "GFX 9.x registers\n");
-	dev_info(adev->dev, "  GRBM_STATUS=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS)));
-	dev_info(adev->dev, "  GRBM_STATUS2=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS2)));
-	dev_info(adev->dev, "  GRBM_STATUS_SE0=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE0)));
-	dev_info(adev->dev, "  GRBM_STATUS_SE1=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE1)));
-	dev_info(adev->dev, "  GRBM_STATUS_SE2=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE2)));
-	dev_info(adev->dev, "  GRBM_STATUS_SE3=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_STATUS_SE3)));
-	dev_info(adev->dev, "  CP_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STAT)));
-	dev_info(adev->dev, "  CP_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT1)));
-	dev_info(adev->dev, "  CP_STALLED_STAT2 = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT2)));
-	dev_info(adev->dev, "  CP_STALLED_STAT3 = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_STALLED_STAT3)));
-	dev_info(adev->dev, "  CP_CPF_BUSY_STAT = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_BUSY_STAT)));
-	dev_info(adev->dev, "  CP_CPF_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STALLED_STAT1)));
-	dev_info(adev->dev, "  CP_CPF_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPF_STATUS)));
-	dev_info(adev->dev, "  CP_CPC_BUSY_STAT = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_BUSY_STAT)));
-	dev_info(adev->dev, "  CP_CPC_STALLED_STAT1 = 0x%08x\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STALLED_STAT1)));
-	dev_info(adev->dev, "  CP_CPC_STATUS = 0x%08x\n", RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_CPC_STATUS)));
-
-	for (i = 0; i < 32; i++) {
-		dev_info(adev->dev, "  GB_TILE_MODE%d=0x%08X\n",
-			 i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_TILE_MODE0 ) + i*4));
-	}
-	for (i = 0; i < 16; i++) {
-		dev_info(adev->dev, "  GB_MACROTILE_MODE%d=0x%08X\n",
-			 i, RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_MACROTILE_MODE0) + i*4));
-	}
-	for (i = 0; i < adev->gfx.config.max_shader_engines; i++) {
-		dev_info(adev->dev, "  se: %d\n", i);
-		gfx_v9_0_select_se_sh(adev, i, 0xffffffff, 0xffffffff);
-		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG=0x%08X\n",
-			 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG)));
-		dev_info(adev->dev, "  PA_SC_RASTER_CONFIG_1=0x%08X\n",
-			 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_RASTER_CONFIG_1)));
-	}
-	gfx_v9_0_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
-
-	dev_info(adev->dev, "  GB_ADDR_CONFIG=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmGB_ADDR_CONFIG)));
-
-	dev_info(adev->dev, "  CP_MEQ_THRESHOLDS=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEQ_THRESHOLDS)));
-	dev_info(adev->dev, "  SX_DEBUG_1=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSX_DEBUG_1)));
-	dev_info(adev->dev, "  TA_CNTL_AUX=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmTA_CNTL_AUX)));
-	dev_info(adev->dev, "  SPI_CONFIG_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL)));
-	dev_info(adev->dev, "  SQ_CONFIG=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CONFIG)));
-	dev_info(adev->dev, "  DB_DEBUG=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG)));
-	dev_info(adev->dev, "  DB_DEBUG2=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG2)));
-	dev_info(adev->dev, "  DB_DEBUG3=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmDB_DEBUG3)));
-	dev_info(adev->dev, "  CB_HW_CONTROL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCB_HW_CONTROL)));
-	dev_info(adev->dev, "  SPI_CONFIG_CNTL_1=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSPI_CONFIG_CNTL_1)));
-	dev_info(adev->dev, "  PA_SC_FIFO_SIZE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FIFO_SIZE)));
-	dev_info(adev->dev, "  VGT_NUM_INSTANCES=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_NUM_INSTANCES)));
-	dev_info(adev->dev, "  CP_PERFMON_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PERFMON_CNTL)));
-	dev_info(adev->dev, "  PA_SC_FORCE_EOV_MAX_CNTS=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_FORCE_EOV_MAX_CNTS)));
-	dev_info(adev->dev, "  VGT_CACHE_INVALIDATION=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_CACHE_INVALIDATION)));
-	dev_info(adev->dev, "  VGT_GS_VERTEX_REUSE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmVGT_GS_VERTEX_REUSE)));
-	dev_info(adev->dev, "  PA_SC_LINE_STIPPLE_STATE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_LINE_STIPPLE_STATE)));
-	dev_info(adev->dev, "  PA_CL_ENHANCE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_CL_ENHANCE)));
-	dev_info(adev->dev, "  PA_SC_ENHANCE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmPA_SC_ENHANCE)));
-
-	dev_info(adev->dev, "  CP_ME_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_ME_CNTL)));
-	dev_info(adev->dev, "  CP_MAX_CONTEXT=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MAX_CONTEXT)));
-	dev_info(adev->dev, "  CP_DEVICE_ID=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_DEVICE_ID)));
-
-	dev_info(adev->dev, "  CP_SEM_WAIT_TIMER=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_SEM_WAIT_TIMER)));
-
-	dev_info(adev->dev, "  CP_RB_WPTR_DELAY=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_WPTR_DELAY)));
-	dev_info(adev->dev, "  CP_RB_VMID=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB_VMID)));
-	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
-	dev_info(adev->dev, "  CP_RB0_WPTR=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_WPTR)));
-	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR)));
-	dev_info(adev->dev, "  CP_RB0_RPTR_ADDR_HI=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_RPTR_ADDR_HI)));
-	dev_info(adev->dev, "  CP_RB0_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_CNTL)));
-	dev_info(adev->dev, "  CP_RB0_BASE=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE)));
-	dev_info(adev->dev, "  CP_RB0_BASE_HI=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_RB0_BASE_HI)));
-	dev_info(adev->dev, "  CP_MEC_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_MEC_CNTL)));
-
-	dev_info(adev->dev, "  SCRATCH_ADDR=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_ADDR)));
-	dev_info(adev->dev, "  SCRATCH_UMSK=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmSCRATCH_UMSK)));
-
-	dev_info(adev->dev, "  CP_INT_CNTL_RING0=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0)));
-	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
-	dev_info(adev->dev, "  RLC_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CNTL)));
-	dev_info(adev->dev, "  RLC_CGCG_CGLS_CTRL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CGCG_CGLS_CTRL)));
-	dev_info(adev->dev, "  RLC_LB_CNTR_INIT=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_INIT)));
-	dev_info(adev->dev, "  RLC_LB_CNTR_MAX=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTR_MAX)));
-	dev_info(adev->dev, "  RLC_LB_INIT_CU_MASK=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_INIT_CU_MASK)));
-	dev_info(adev->dev, "  RLC_LB_PARAMS=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_PARAMS)));
-	dev_info(adev->dev, "  RLC_LB_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_LB_CNTL)));
-	dev_info(adev->dev, "  RLC_UCODE_CNTL=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_UCODE_CNTL)));
-
-	dev_info(adev->dev, "  RLC_GPM_GENERAL_6=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_6)));
-	dev_info(adev->dev, "  RLC_GPM_GENERAL_12=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_GENERAL_12)));
-	dev_info(adev->dev, "  RLC_GPM_TIMER_INT_3=0x%08X\n",
-		 RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_GPM_TIMER_INT_3)));
-	mutex_lock(&adev->srbm_mutex);
-	for (i = 0; i < 16; i++) {
-		soc15_grbm_select(adev, 0, 0, 0, i);
-		dev_info(adev->dev, "  VM %d:\n", i);
-		dev_info(adev->dev, "  SH_MEM_CONFIG=0x%08X\n",
-			 RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG)));
-		dev_info(adev->dev, "  SH_MEM_BASES=0x%08X\n",
-			 RREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES)));
-	}
-	soc15_grbm_select(adev, 0, 0, 0, 0);
-	mutex_unlock(&adev->srbm_mutex);
-}
-
 static int gfx_v9_0_soft_reset(void *handle)
 {
 	u32 grbm_soft_reset = 0;
@@ -2569,8 +2380,7 @@ static int gfx_v9_0_soft_reset(void *handle)
 						GRBM_SOFT_RESET, SOFT_RESET_RLC, 1);
 
 
-	if (grbm_soft_reset ) {
-		gfx_v9_0_print_status((void *)adev);
+	if (grbm_soft_reset) {
 		/* stop the rlc */
 		gfx_v9_0_rlc_stop(adev);
 
@@ -2596,7 +2406,6 @@ static int gfx_v9_0_soft_reset(void *handle)
 
 		/* Wait a little for things to settle down */
 		udelay(50);
-		gfx_v9_0_print_status((void *)adev);
 	}
 	return 0;
 }
@@ -3148,6 +2957,7 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
 	int usepfp = (ring->funcs->type == AMDGPU_RING_TYPE_GFX);
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
 	unsigned i;
 
@@ -3157,7 +2967,6 @@ static void gfx_v9_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-		uint32_t req = hub->get_invalidate_req(vm_id);
 
 		gfx_v9_0_write_data_to_reg(ring, usepfp, true,
 					   hub->ctx0_ptb_addr_lo32
@@ -3376,21 +3185,12 @@ static void gfx_v9_0_ring_emit_wreg(struct amdgpu_ring *ring, uint32_t reg,
 static void gfx_v9_0_set_gfx_eop_interrupt_state(struct amdgpu_device *adev,
 						 enum amdgpu_interrupt_state state)
 {
-	u32 cp_int_cntl;
-
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    TIME_STAMP_INT_ENABLE, 0);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
-		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl =
-			REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-				      TIME_STAMP_INT_ENABLE, 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+			       TIME_STAMP_INT_ENABLE,
+			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 		break;
 	default:
 		break;
@@ -3446,20 +3246,12 @@ static int gfx_v9_0_set_priv_reg_fault_state(struct amdgpu_device *adev,
 					     unsigned type,
 					     enum amdgpu_interrupt_state state)
 {
-	u32 cp_int_cntl;
-
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_REG_INT_ENABLE, 0);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
-		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_REG_INT_ENABLE, 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
+		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+			       PRIV_REG_INT_ENABLE,
+			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 		break;
 	default:
 		break;
@@ -3473,21 +3265,12 @@ static int gfx_v9_0_set_priv_inst_fault_state(struct amdgpu_device *adev,
 					      unsigned type,
 					      enum amdgpu_interrupt_state state)
 {
-	u32 cp_int_cntl;
-
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_INSTR_INT_ENABLE, 0);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
-		break;
 	case AMDGPU_IRQ_STATE_ENABLE:
-		cp_int_cntl = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0));
-		cp_int_cntl = REG_SET_FIELD(cp_int_cntl, CP_INT_CNTL_RING0,
-					    PRIV_INSTR_INT_ENABLE, 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_INT_CNTL_RING0), cp_int_cntl);
-		break;
+		WREG32_FIELD15(GC, 0, CP_INT_CNTL_RING0,
+			       PRIV_INSTR_INT_ENABLE,
+			       state == AMDGPU_IRQ_STATE_ENABLE ? 1 : 0);
 	default:
 		break;
 	}
@@ -3759,8 +3542,6 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
-	.emit_hdp_flush = gfx_v9_0_ring_emit_hdp_flush,
-	.emit_hdp_invalidate = gfx_v9_0_ring_emit_hdp_invalidate,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.test_ib = gfx_v9_0_ring_test_ib,
 	.insert_nop = amdgpu_ring_insert_nop,
@@ -3975,9 +3756,7 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 			       ring->pipe,
 			       ring->queue, 0);
 	/* disable wptr polling */
-	tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL));
-	tmp = REG_SET_FIELD(tmp, CP_PQ_WPTR_POLL_CNTL, EN, 0);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL), tmp);
+	WREG32_FIELD15(GC, 0, CP_PQ_WPTR_POLL_CNTL, EN, 0);
 
 	/* write the EOP addr */
 	BUG_ON(ring->me != 1 || ring->pipe != 0); /* can't handle other cases eop address */
@@ -4121,11 +3900,8 @@ static int gfx_v9_0_init_queue(struct amdgpu_ring *ring)
 	amdgpu_bo_kunmap(ring->mqd_obj);
 	amdgpu_bo_unreserve(ring->mqd_obj);
 
-	if (use_doorbell) {
-		tmp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS));
-		tmp = REG_SET_FIELD(tmp, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_STATUS), tmp);
-	}
+	if (use_doorbell)
+		WREG32_FIELD15(GC, 0, CP_PQ_STATUS, DOORBELL_ENABLE, 1);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
index 30ef3126c8a9..005075ff00f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfxhub_v1_0.c
@@ -222,7 +222,7 @@ int gfxhub_v1_0_gart_enable(struct amdgpu_device *adev)
 				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				PAGE_TABLE_BLOCK_SIZE,
-				    amdgpu_vm_block_size - 9);
+				adev->vm_manager.block_size - 9);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
@@ -299,36 +299,6 @@ void gfxhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev,
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
 }
 
-static uint32_t gfxhub_v1_0_get_invalidate_req(unsigned int vm_id)
-{
-	u32 req = 0;
-
-	/* invalidate using legacy mode on vm_id*/
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
-			    PER_VMID_INVALIDATE_REQ, 1 << vm_id);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
-			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
-
-	return req;
-}
-
-static uint32_t gfxhub_v1_0_get_vm_protection_bits(void)
-{
-	return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
-}
-
 static int gfxhub_v1_0_early_init(void *handle)
 {
 	return 0;
@@ -361,9 +331,6 @@ static int gfxhub_v1_0_sw_init(void *handle)
 	hub->vm_l2_pro_fault_cntl =
 		SOC15_REG_OFFSET(GC, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 
-	hub->get_invalidate_req = gfxhub_v1_0_get_invalidate_req;
-	hub->get_vm_protection_bits = gfxhub_v1_0_get_vm_protection_bits;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
index d9586601a437..631aef38126d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v6_0.c
@@ -543,7 +543,8 @@ static int gmc_v6_0_gart_enable(struct amdgpu_device *adev)
 	WREG32(mmVM_CONTEXT1_CNTL,
 	       VM_CONTEXT1_CNTL__ENABLE_CONTEXT_MASK |
 	       (1UL << VM_CONTEXT1_CNTL__PAGE_TABLE_DEPTH__SHIFT) |
-	       ((amdgpu_vm_block_size - 9) << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
+	       ((adev->vm_manager.block_size - 9)
+	       << VM_CONTEXT1_CNTL__PAGE_TABLE_BLOCK_SIZE__SHIFT));
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		gmc_v6_0_set_fault_enable_default(adev, false);
 	else
@@ -848,7 +849,8 @@ static int gmc_v6_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
+	amdgpu_vm_adjust_size(adev, 64);
+	adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
 	adev->mc.mc_mask = 0xffffffffffULL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
index 0c0a6015cca5..92abe12d92bb 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v7_0.c
@@ -37,6 +37,8 @@
 #include "oss/oss_2_0_d.h"
 #include "oss/oss_2_0_sh_mask.h"
 
+#include "amdgpu_atombios.h"
+
 static void gmc_v7_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v7_0_set_irq_funcs(struct amdgpu_device *adev);
 static int gmc_v7_0_wait_for_idle(void *handle);
@@ -325,48 +327,51 @@ static void gmc_v7_0_mc_program(struct amdgpu_device *adev)
  */
 static int gmc_v7_0_mc_init(struct amdgpu_device *adev)
 {
-	u32 tmp;
-	int chansize, numchan;
-
-	/* Get VRAM informations */
-	tmp = RREG32(mmMC_ARB_RAMCFG);
-	if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
-		chansize = 64;
-	} else {
-		chansize = 32;
-	}
-	tmp = RREG32(mmMC_SHARED_CHMAP);
-	switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
-	case 0:
-	default:
-		numchan = 1;
-		break;
-	case 1:
-		numchan = 2;
-		break;
-	case 2:
-		numchan = 4;
-		break;
-	case 3:
-		numchan = 8;
-		break;
-	case 4:
-		numchan = 3;
-		break;
-	case 5:
-		numchan = 6;
-		break;
-	case 6:
-		numchan = 10;
-		break;
-	case 7:
-		numchan = 12;
-		break;
-	case 8:
-		numchan = 16;
-		break;
+	adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
+	if (!adev->mc.vram_width) {
+		u32 tmp;
+		int chansize, numchan;
+
+		/* Get VRAM informations */
+		tmp = RREG32(mmMC_ARB_RAMCFG);
+		if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+			chansize = 64;
+		} else {
+			chansize = 32;
+		}
+		tmp = RREG32(mmMC_SHARED_CHMAP);
+		switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+		case 0:
+		default:
+			numchan = 1;
+			break;
+		case 1:
+			numchan = 2;
+			break;
+		case 2:
+			numchan = 4;
+			break;
+		case 3:
+			numchan = 8;
+			break;
+		case 4:
+			numchan = 3;
+			break;
+		case 5:
+			numchan = 6;
+			break;
+		case 6:
+			numchan = 10;
+			break;
+		case 7:
+			numchan = 12;
+			break;
+		case 8:
+			numchan = 16;
+			break;
+		}
+		adev->mc.vram_width = numchan * chansize;
 	}
-	adev->mc.vram_width = numchan * chansize;
 	/* Could aper size report 0 ? */
 	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
 	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
@@ -639,7 +644,7 @@ static int gmc_v7_0_gart_enable(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, ENABLE_CONTEXT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_DEPTH, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
-			    amdgpu_vm_block_size - 9);
+			    adev->vm_manager.block_size - 9);
 	WREG32(mmVM_CONTEXT1_CNTL, tmp);
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		gmc_v7_0_set_fault_enable_default(adev, false);
@@ -998,7 +1003,8 @@ static int gmc_v7_0_sw_init(void *handle)
 	 * Currently set to 4GB ((1 << 20) 4k pages).
 	 * Max GPUVM size for cayman and SI is 40 bits.
 	 */
-	adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
+	amdgpu_vm_adjust_size(adev, 64);
+	adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
 	/* Set the internal MC address mask
 	 * This is the max address of the GPU's
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index d19d1c5e2847..f2ccefc66fd4 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -38,6 +38,8 @@
 #include "vid.h"
 #include "vi.h"
 
+#include "amdgpu_atombios.h"
+
 
 static void gmc_v8_0_set_gart_funcs(struct amdgpu_device *adev);
 static void gmc_v8_0_set_irq_funcs(struct amdgpu_device *adev);
@@ -487,48 +489,51 @@ static void gmc_v8_0_mc_program(struct amdgpu_device *adev)
  */
 static int gmc_v8_0_mc_init(struct amdgpu_device *adev)
 {
-	u32 tmp;
-	int chansize, numchan;
-
-	/* Get VRAM informations */
-	tmp = RREG32(mmMC_ARB_RAMCFG);
-	if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
-		chansize = 64;
-	} else {
-		chansize = 32;
-	}
-	tmp = RREG32(mmMC_SHARED_CHMAP);
-	switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
-	case 0:
-	default:
-		numchan = 1;
-		break;
-	case 1:
-		numchan = 2;
-		break;
-	case 2:
-		numchan = 4;
-		break;
-	case 3:
-		numchan = 8;
-		break;
-	case 4:
-		numchan = 3;
-		break;
-	case 5:
-		numchan = 6;
-		break;
-	case 6:
-		numchan = 10;
-		break;
-	case 7:
-		numchan = 12;
-		break;
-	case 8:
-		numchan = 16;
-		break;
+	adev->mc.vram_width = amdgpu_atombios_get_vram_width(adev);
+	if (!adev->mc.vram_width) {
+		u32 tmp;
+		int chansize, numchan;
+
+		/* Get VRAM informations */
+		tmp = RREG32(mmMC_ARB_RAMCFG);
+		if (REG_GET_FIELD(tmp, MC_ARB_RAMCFG, CHANSIZE)) {
+			chansize = 64;
+		} else {
+			chansize = 32;
+		}
+		tmp = RREG32(mmMC_SHARED_CHMAP);
+		switch (REG_GET_FIELD(tmp, MC_SHARED_CHMAP, NOOFCHAN)) {
+		case 0:
+		default:
+			numchan = 1;
+			break;
+		case 1:
+			numchan = 2;
+			break;
+		case 2:
+			numchan = 4;
+			break;
+		case 3:
+			numchan = 8;
+			break;
+		case 4:
+			numchan = 3;
+			break;
+		case 5:
+			numchan = 6;
+			break;
+		case 6:
+			numchan = 10;
+			break;
+		case 7:
+			numchan = 12;
+			break;
+		case 8:
+			numchan = 16;
+			break;
+		}
+		adev->mc.vram_width = numchan * chansize;
 	}
-	adev->mc.vram_width = numchan * chansize;
 	/* Could aper size report 0 ? */
 	adev->mc.aper_base = pci_resource_start(adev->pdev, 0);
 	adev->mc.aper_size = pci_resource_len(adev->pdev, 0);
@@ -848,7 +853,7 @@ static int gmc_v8_0_gart_enable(struct amdgpu_device *adev)
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, WRITE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 	tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL, PAGE_TABLE_BLOCK_SIZE,
-			    amdgpu_vm_block_size - 9);
+			    adev->vm_manager.block_size - 9);
 	WREG32(mmVM_CONTEXT1_CNTL, tmp);
 	if (amdgpu_vm_fault_stop == AMDGPU_VM_FAULT_STOP_ALWAYS)
 		gmc_v8_0_set_fault_enable_default(adev, false);
@@ -1082,7 +1087,8 @@ static int gmc_v8_0_sw_init(void *handle)
 	 * Currently set to 4GB ((1 << 20) 4k pages).
 	 * Max GPUVM size for cayman and SI is 40 bits.
 	 */
-	adev->vm_manager.max_pfn = amdgpu_vm_size << 18;
+	amdgpu_vm_adjust_size(adev, 64);
+	adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
 	/* Set the internal MC address mask
 	 * This is the max address of the GPU's
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index df69aae99df4..3b045e0b114e 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -75,11 +75,18 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 	struct amdgpu_vmhub *hub;
 	u32 tmp, reg, bits, i;
 
+	bits = VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
+		VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK;
+
 	switch (state) {
 	case AMDGPU_IRQ_STATE_DISABLE:
 		/* MM HUB */
 		hub = &adev->vmhub[AMDGPU_MMHUB];
-		bits = hub->get_vm_protection_bits();
 		for (i = 0; i< 16; i++) {
 			reg = hub->vm_context0_cntl + i;
 			tmp = RREG32(reg);
@@ -89,7 +96,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 
 		/* GFX HUB */
 		hub = &adev->vmhub[AMDGPU_GFXHUB];
-		bits = hub->get_vm_protection_bits();
 		for (i = 0; i < 16; i++) {
 			reg = hub->vm_context0_cntl + i;
 			tmp = RREG32(reg);
@@ -100,7 +106,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 	case AMDGPU_IRQ_STATE_ENABLE:
 		/* MM HUB */
 		hub = &adev->vmhub[AMDGPU_MMHUB];
-		bits = hub->get_vm_protection_bits();
 		for (i = 0; i< 16; i++) {
 			reg = hub->vm_context0_cntl + i;
 			tmp = RREG32(reg);
@@ -110,7 +115,6 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 
 		/* GFX HUB */
 		hub = &adev->vmhub[AMDGPU_GFXHUB];
-		bits = hub->get_vm_protection_bits();
 		for (i = 0; i < 16; i++) {
 			reg = hub->vm_context0_cntl + i;
 			tmp = RREG32(reg);
@@ -129,8 +133,7 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 				struct amdgpu_irq_src *source,
 				struct amdgpu_iv_entry *entry)
 {
-	struct amdgpu_vmhub *gfxhub = &adev->vmhub[AMDGPU_GFXHUB];
-	struct amdgpu_vmhub *mmhub = &adev->vmhub[AMDGPU_MMHUB];
+	struct amdgpu_vmhub *hub = &adev->vmhub[entry->vm_id_src];
 	uint32_t status = 0;
 	u64 addr;
 
@@ -138,13 +141,8 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
 	if (!amdgpu_sriov_vf(adev)) {
-		if (entry->vm_id_src) {
-			status = RREG32(mmhub->vm_l2_pro_fault_status);
-			WREG32_P(mmhub->vm_l2_pro_fault_cntl, 1, ~1);
-		} else {
-			status = RREG32(gfxhub->vm_l2_pro_fault_status);
-			WREG32_P(gfxhub->vm_l2_pro_fault_cntl, 1, ~1);
-		}
+		status = RREG32(hub->vm_l2_pro_fault_status);
+		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
 	}
 
 	if (printk_ratelimit()) {
@@ -175,6 +173,25 @@ static void gmc_v9_0_set_irq_funcs(struct amdgpu_device *adev)
 	adev->mc.vm_fault.funcs = &gmc_v9_0_irq_funcs;
 }
 
+static uint32_t gmc_v9_0_get_invalidate_req(unsigned int vm_id)
+{
+	u32 req = 0;
+
+	/* invalidate using legacy mode on vm_id*/
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+			    PER_VMID_INVALIDATE_REQ, 1 << vm_id);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
+	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
+			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
+
+	return req;
+}
+
 /*
  * GART
  * VMID 0 is the physical GPU addresses as used by the kernel.
@@ -204,7 +221,7 @@ static void gmc_v9_0_gart_flush_gpu_tlb(struct amdgpu_device *adev,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &adev->vmhub[i];
-		u32 tmp = hub->get_invalidate_req(vmid);
+		u32 tmp = gmc_v9_0_get_invalidate_req(vmid);
 
 		WREG32_NO_KIQ(hub->vm_inv_eng0_req + eng, tmp);
 
@@ -337,30 +354,23 @@ static uint64_t gmc_v9_0_get_vm_pte_flags(struct amdgpu_device *adev,
 	return pte_flag;
 }
 
-static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
-	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
-	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
-	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags
-};
-
-static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
-{
-	if (adev->gart.gart_funcs == NULL)
-		adev->gart.gart_funcs = &gmc_v9_0_gart_funcs;
-}
-
 static u64 gmc_v9_0_adjust_mc_addr(struct amdgpu_device *adev, u64 mc_addr)
 {
 	return adev->vm_manager.vram_base_offset + mc_addr - adev->mc.vram_start;
 }
 
-static const struct amdgpu_mc_funcs gmc_v9_0_mc_funcs = {
+static const struct amdgpu_gart_funcs gmc_v9_0_gart_funcs = {
+	.flush_gpu_tlb = gmc_v9_0_gart_flush_gpu_tlb,
+	.set_pte_pde = gmc_v9_0_gart_set_pte_pde,
+	.get_vm_pte_flags = gmc_v9_0_get_vm_pte_flags,
 	.adjust_mc_addr = gmc_v9_0_adjust_mc_addr,
+	.get_invalidate_req = gmc_v9_0_get_invalidate_req,
 };
 
-static void gmc_v9_0_set_mc_funcs(struct amdgpu_device *adev)
+static void gmc_v9_0_set_gart_funcs(struct amdgpu_device *adev)
 {
-	adev->mc.mc_funcs = &gmc_v9_0_mc_funcs;
+	if (adev->gart.gart_funcs == NULL)
+		adev->gart.gart_funcs = &gmc_v9_0_gart_funcs;
 }
 
 static int gmc_v9_0_early_init(void *handle)
@@ -368,7 +378,6 @@ static int gmc_v9_0_early_init(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 
 	gmc_v9_0_set_gart_funcs(adev);
-	gmc_v9_0_set_mc_funcs(adev);
 	gmc_v9_0_set_irq_funcs(adev);
 
 	return 0;
@@ -511,7 +520,12 @@ static int gmc_v9_0_vm_init(struct amdgpu_device *adev)
 	 * amdkfd will use VMIDs 8-15
 	 */
 	adev->vm_manager.num_ids = AMDGPU_NUM_OF_VMIDS;
-	adev->vm_manager.num_level = 3;
+
+	/* TODO: fix num_level for APU when updating vm size and block size */
+	if (adev->flags & AMD_IS_APU)
+		adev->vm_manager.num_level = 1;
+	else
+		adev->vm_manager.num_level = 3;
 	amdgpu_vm_manager_init(adev);
 
 	/* base offset of vram pages */
@@ -543,9 +557,20 @@ static int gmc_v9_0_sw_init(void *handle)
 
 	if (adev->flags & AMD_IS_APU) {
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
+		amdgpu_vm_adjust_size(adev, 64);
 	} else {
 		/* XXX Don't know how to get VRAM type yet. */
 		adev->mc.vram_type = AMDGPU_VRAM_TYPE_HBM;
+		/*
+		 * To fulfill 4-level page support,
+		 * vm size is 256TB (48bit), maximum size of Vega10,
+		 * block size 512 (9bit)
+		 */
+		adev->vm_manager.vm_size = 1U << 18;
+		adev->vm_manager.block_size = 9;
+		DRM_INFO("vm size is %llu GB, block size is %u-bit\n",
+				adev->vm_manager.vm_size,
+				adev->vm_manager.block_size);
 	}
 
 	/* This interrupt is VMC page fault.*/
@@ -557,14 +582,7 @@ static int gmc_v9_0_sw_init(void *handle)
 	if (r)
 		return r;
 
-	/* Because of four level VMPTs, vm size is at least 512GB.
-	 * The maximum size is 256TB (48bit).
-	 */
-	if (amdgpu_vm_size < 512) {
-		DRM_WARN("VM size is at least 512GB!\n");
-		amdgpu_vm_size = 512;
-	}
-	adev->vm_manager.max_pfn = (uint64_t)amdgpu_vm_size << 18;
+	adev->vm_manager.max_pfn = adev->vm_manager.vm_size << 18;
 
 	/* Set the internal MC address mask
 	 * This is the max address of the GPU's
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 266a0f47a908..62684510ddcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -242,7 +242,7 @@ int mmhub_v1_0_gart_enable(struct amdgpu_device *adev)
 				EXECUTE_PROTECTION_FAULT_ENABLE_DEFAULT, 1);
 		tmp = REG_SET_FIELD(tmp, VM_CONTEXT1_CNTL,
 				PAGE_TABLE_BLOCK_SIZE,
-				amdgpu_vm_block_size - 9);
+				adev->vm_manager.block_size - 9);
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_CNTL) + i, tmp);
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_LO32) + i*2, 0);
 		WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_CONTEXT1_PAGE_TABLE_START_ADDR_HI32) + i*2, 0);
@@ -317,36 +317,6 @@ void mmhub_v1_0_set_fault_enable_default(struct amdgpu_device *adev, bool value)
 	WREG32(SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL), tmp);
 }
 
-static uint32_t mmhub_v1_0_get_invalidate_req(unsigned int vm_id)
-{
-	u32 req = 0;
-
-	/* invalidate using legacy mode on vm_id*/
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
-			    PER_VMID_INVALIDATE_REQ, 1 << vm_id);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, FLUSH_TYPE, 0);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PTES, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE0, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE1, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L2_PDE2, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ, INVALIDATE_L1_PTES, 1);
-	req = REG_SET_FIELD(req, VM_INVALIDATE_ENG0_REQ,
-			    CLEAR_PROTECTION_FAULT_STATUS_ADDR,	0);
-
-	return req;
-}
-
-static uint32_t mmhub_v1_0_get_vm_protection_bits(void)
-{
-	return (VM_CONTEXT1_CNTL__RANGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__DUMMY_PAGE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__PDE0_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__VALID_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__READ_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__WRITE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK |
-		    VM_CONTEXT1_CNTL__EXECUTE_PROTECTION_FAULT_ENABLE_INTERRUPT_MASK);
-}
-
 static int mmhub_v1_0_early_init(void *handle)
 {
 	return 0;
@@ -379,9 +349,6 @@ static int mmhub_v1_0_sw_init(void *handle)
 	hub->vm_l2_pro_fault_cntl =
 		SOC15_REG_OFFSET(MMHUB, 0, mmVM_L2_PROTECTION_FAULT_CNTL);
 
-	hub->get_invalidate_req = mmhub_v1_0_get_invalidate_req;
-	hub->get_vm_protection_bits = mmhub_v1_0_get_vm_protection_bits;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index cfd5e54777bb..1493301b6a94 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -28,6 +28,7 @@
 #include "vega10/GC/gc_9_0_offset.h"
 #include "vega10/GC/gc_9_0_sh_mask.h"
 #include "soc15.h"
+#include "vega10_ih.h"
 #include "soc15_common.h"
 #include "mxgpu_ai.h"
 
@@ -133,7 +134,7 @@ static int xgpu_ai_poll_ack(struct amdgpu_device *adev)
 	return r;
 }
 
-static int xgpu_vi_poll_msg(struct amdgpu_device *adev, enum idh_event event)
+static int xgpu_ai_poll_msg(struct amdgpu_device *adev, enum idh_event event)
 {
 	int r = 0, timeout = AI_MAILBOX_TIMEDOUT;
 
@@ -172,7 +173,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	if (req == IDH_REQ_GPU_INIT_ACCESS ||
 		req == IDH_REQ_GPU_FINI_ACCESS ||
 		req == IDH_REQ_GPU_RESET_ACCESS) {
-		r = xgpu_vi_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
+		r = xgpu_ai_poll_msg(adev, IDH_READY_TO_ACCESS_GPU);
 		if (r)
 			return r;
 	}
@@ -180,6 +181,11 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 	return 0;
 }
 
+static int xgpu_ai_request_reset(struct amdgpu_device *adev)
+{
+	return xgpu_ai_send_access_requests(adev, IDH_REQ_GPU_RESET_ACCESS);
+}
+
 static int xgpu_ai_request_full_gpu_access(struct amdgpu_device *adev,
 					   bool init)
 {
@@ -201,7 +207,134 @@ static int xgpu_ai_release_full_gpu_access(struct amdgpu_device *adev,
 	return r;
 }
 
+static int xgpu_ai_mailbox_ack_irq(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					struct amdgpu_iv_entry *entry)
+{
+	DRM_DEBUG("get ack intr and do nothing.\n");
+	return 0;
+}
+
+static int xgpu_ai_set_mailbox_ack_irq(struct amdgpu_device *adev,
+					struct amdgpu_irq_src *source,
+					unsigned type,
+					enum amdgpu_interrupt_state state)
+{
+	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
+
+	tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, ACK_INT_EN,
+				(state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
+
+	return 0;
+}
+
+static void xgpu_ai_mailbox_flr_work(struct work_struct *work)
+{
+	struct amdgpu_virt *virt = container_of(work, struct amdgpu_virt, flr_work);
+	struct amdgpu_device *adev = container_of(virt, struct amdgpu_device, virt);
+
+	/* wait until RCV_MSG become 3 */
+	if (xgpu_ai_poll_msg(adev, IDH_FLR_NOTIFICATION_CMPL)) {
+		pr_err("failed to recieve FLR_CMPL\n");
+		return;
+	}
+
+	/* Trigger recovery due to world switch failure */
+	amdgpu_sriov_gpu_reset(adev, false);
+}
+
+static int xgpu_ai_set_mailbox_rcv_irq(struct amdgpu_device *adev,
+				       struct amdgpu_irq_src *src,
+				       unsigned type,
+				       enum amdgpu_interrupt_state state)
+{
+	u32 tmp = RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL));
+
+	tmp = REG_SET_FIELD(tmp, BIF_BX_PF0_MAILBOX_INT_CNTL, VALID_INT_EN,
+			    (state == AMDGPU_IRQ_STATE_ENABLE) ? 1 : 0);
+	WREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0, mmBIF_BX_PF0_MAILBOX_INT_CNTL), tmp);
+
+	return 0;
+}
+
+static int xgpu_ai_mailbox_rcv_irq(struct amdgpu_device *adev,
+				   struct amdgpu_irq_src *source,
+				   struct amdgpu_iv_entry *entry)
+{
+	int r;
+
+	/* see what event we get */
+	r = xgpu_ai_mailbox_rcv_msg(adev, IDH_FLR_NOTIFICATION);
+
+	/* only handle FLR_NOTIFY now */
+	if (!r)
+		schedule_work(&adev->virt.flr_work);
+
+	return 0;
+}
+
+static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_ack_irq_funcs = {
+	.set = xgpu_ai_set_mailbox_ack_irq,
+	.process = xgpu_ai_mailbox_ack_irq,
+};
+
+static const struct amdgpu_irq_src_funcs xgpu_ai_mailbox_rcv_irq_funcs = {
+	.set = xgpu_ai_set_mailbox_rcv_irq,
+	.process = xgpu_ai_mailbox_rcv_irq,
+};
+
+void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev)
+{
+	adev->virt.ack_irq.num_types = 1;
+	adev->virt.ack_irq.funcs = &xgpu_ai_mailbox_ack_irq_funcs;
+	adev->virt.rcv_irq.num_types = 1;
+	adev->virt.rcv_irq.funcs = &xgpu_ai_mailbox_rcv_irq_funcs;
+}
+
+int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 135, &adev->virt.rcv_irq);
+	if (r)
+		return r;
+
+	r = amdgpu_irq_add_id(adev, AMDGPU_IH_CLIENTID_LEGACY, 138, &adev->virt.ack_irq);
+	if (r) {
+		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+		return r;
+	}
+
+	return 0;
+}
+
+int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_irq_get(adev, &adev->virt.rcv_irq, 0);
+	if (r)
+		return r;
+	r = amdgpu_irq_get(adev, &adev->virt.ack_irq, 0);
+	if (r) {
+		amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+		return r;
+	}
+
+	INIT_WORK(&adev->virt.flr_work, xgpu_ai_mailbox_flr_work);
+
+	return 0;
+}
+
+void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev)
+{
+	amdgpu_irq_put(adev, &adev->virt.ack_irq, 0);
+	amdgpu_irq_put(adev, &adev->virt.rcv_irq, 0);
+}
+
 const struct amdgpu_virt_ops xgpu_ai_virt_ops = {
 	.req_full_gpu	= xgpu_ai_request_full_gpu_access,
 	.rel_full_gpu	= xgpu_ai_release_full_gpu_access,
+	.reset_gpu = xgpu_ai_request_reset,
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
index bf8ab8fd4367..9aefc44d2c34 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.h
@@ -24,7 +24,7 @@
 #ifndef __MXGPU_AI_H__
 #define __MXGPU_AI_H__
 
-#define AI_MAILBOX_TIMEDOUT	150000
+#define AI_MAILBOX_TIMEDOUT	5000
 
 enum idh_request {
 	IDH_REQ_GPU_INIT_ACCESS = 1,
@@ -44,4 +44,9 @@ enum idh_event {
 
 extern const struct amdgpu_virt_ops xgpu_ai_virt_ops;
 
+void xgpu_ai_mailbox_set_irq_funcs(struct amdgpu_device *adev);
+int xgpu_ai_mailbox_add_irq_id(struct amdgpu_device *adev);
+int xgpu_ai_mailbox_get_irq(struct amdgpu_device *adev);
+void xgpu_ai_mailbox_put_irq(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 5191c45ffdf3..c3588d1c7cb0 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -491,7 +491,7 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
 
 	ucode_size = ucode->ucode_size;
 	ucode_mem = (uint32_t *)ucode->kaddr;
-	while (!ucode_size) {
+	while (ucode_size) {
 		fw_sram_reg_val = RREG32(fw_sram_data_reg_offset);
 
 		if (*ucode_mem != fw_sram_reg_val)
@@ -508,14 +508,10 @@ bool psp_v3_1_compare_sram_data(struct psp_context *psp,
 bool psp_v3_1_smu_reload_quirk(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
-	uint32_t reg, reg_val;
+	uint32_t reg;
 
-	reg_val = (smnMP1_FIRMWARE_FLAGS & 0xffffffff) | 0x03b00000;
-	WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg_val);
+	reg = smnMP1_FIRMWARE_FLAGS | 0x03b00000;
+	WREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_INDEX2), reg);
 	reg = RREG32(SOC15_REG_OFFSET(NBIO, 0, mmPCIE_DATA2));
-	if ((reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) >>
-	     MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED__SHIFT)
-		return true;
-
-	return false;
+	return (reg & MP1_FIRMWARE_FLAGS__INTERRUPTS_ENABLED_MASK) ? true : false;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 2dd2b20d727e..21f38d882335 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -1039,6 +1039,7 @@ static void sdma_v4_0_ring_emit_pipeline_sync(struct amdgpu_ring *ring)
 static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					 unsigned vm_id, uint64_t pd_addr)
 {
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
 	unsigned i;
 
@@ -1048,7 +1049,6 @@ static void sdma_v4_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-		uint32_t req = hub->get_invalidate_req(vm_id);
 
 		amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_SRBM_WRITE) |
 				  SDMA_PKT_SRBM_WRITE_HEADER_BYTE_EN(0xf));
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index bb14a45997b5..385de8617075 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -106,6 +106,8 @@ static u32 soc15_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 
 	if (adev->asic_type == CHIP_VEGA10)
 		nbio_pcie_id = &nbio_v6_1_pcie_index_data;
+	else
+		BUG();
 
 	address = nbio_pcie_id->index_offset;
 	data = nbio_pcie_id->data_offset;
@@ -125,6 +127,8 @@ static void soc15_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 
 	if (adev->asic_type == CHIP_VEGA10)
 		nbio_pcie_id = &nbio_v6_1_pcie_index_data;
+	else
+		BUG();
 
 	address = nbio_pcie_id->index_offset;
 	data = nbio_pcie_id->data_offset;
@@ -493,7 +497,8 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_ip_block_add(adev, &mmhub_v1_0_ip_block);
 		amdgpu_ip_block_add(adev, &gmc_v9_0_ip_block);
 		amdgpu_ip_block_add(adev, &vega10_ih_ip_block);
-		amdgpu_ip_block_add(adev, &psp_v3_1_ip_block);
+		if (amdgpu_fw_load_type == 2 || amdgpu_fw_load_type == -1)
+			amdgpu_ip_block_add(adev, &psp_v3_1_ip_block);
 		if (!amdgpu_sriov_vf(adev))
 			amdgpu_ip_block_add(adev, &amdgpu_pp_ip_block);
 		if (adev->enable_virtual_display || amdgpu_sriov_vf(adev))
@@ -558,6 +563,7 @@ static int soc15_common_early_init(void *handle)
 
 	if (amdgpu_sriov_vf(adev)) {
 		amdgpu_virt_init_setting(adev);
+		xgpu_ai_mailbox_set_irq_funcs(adev);
 	}
 
 	/*
@@ -610,8 +616,23 @@ static int soc15_common_early_init(void *handle)
 	return 0;
 }
 
+static int soc15_common_late_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_ai_mailbox_get_irq(adev);
+
+	return 0;
+}
+
 static int soc15_common_sw_init(void *handle)
 {
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+
+	if (amdgpu_sriov_vf(adev))
+		xgpu_ai_mailbox_add_irq_id(adev);
+
 	return 0;
 }
 
@@ -642,6 +663,8 @@ static int soc15_common_hw_fini(void *handle)
 
 	/* disable the doorbell aperture */
 	soc15_enable_doorbell_aperture(adev, false);
+	if (amdgpu_sriov_vf(adev))
+		xgpu_ai_mailbox_put_irq(adev);
 
 	return 0;
 }
@@ -855,7 +878,7 @@ static int soc15_common_set_powergating_state(void *handle,
 const struct amd_ip_funcs soc15_common_ip_funcs = {
 	.name = "soc15_common",
 	.early_init = soc15_common_early_init,
-	.late_init = NULL,
+	.late_init = soc15_common_late_init,
 	.sw_init = soc15_common_sw_init,
 	.sw_fini = soc15_common_sw_fini,
 	.hw_init = soc15_common_hw_init,
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
index 9a4129d881aa..8ab0f78794a5 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v4_2.c
@@ -135,12 +135,9 @@ static int uvd_v4_2_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_sw_fini(adev);
 }
+
 static void uvd_v4_2_enable_mgcg(struct amdgpu_device *adev,
 				 bool enable);
 /**
@@ -230,11 +227,7 @@ static int uvd_v4_2_suspend(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_suspend(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_suspend(adev);
 }
 
 static int uvd_v4_2_resume(void *handle)
@@ -246,11 +239,7 @@ static int uvd_v4_2_resume(void *handle)
 	if (r)
 		return r;
 
-	r = uvd_v4_2_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return uvd_v4_2_hw_init(adev);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
index e448f7d86bc0..bb6d46e168a3 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v5_0.c
@@ -131,11 +131,7 @@ static int uvd_v5_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_sw_fini(adev);
 }
 
 /**
@@ -228,11 +224,7 @@ static int uvd_v5_0_suspend(void *handle)
 		return r;
 	uvd_v5_0_set_clockgating_state(adev, AMD_CG_STATE_GATE);
 
-	r = amdgpu_uvd_suspend(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_suspend(adev);
 }
 
 static int uvd_v5_0_resume(void *handle)
@@ -244,11 +236,7 @@ static int uvd_v5_0_resume(void *handle)
 	if (r)
 		return r;
 
-	r = uvd_v5_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return uvd_v5_0_hw_init(adev);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
index 5679a4249bd9..31db356476f8 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v6_0.c
@@ -134,11 +134,7 @@ static int uvd_v6_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_uvd_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_sw_fini(adev);
 }
 
 /**
@@ -230,11 +226,8 @@ static int uvd_v6_0_suspend(void *handle)
 		return r;
 
 	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU)) {
+	if (!(adev->flags & AMD_IS_APU))
 		r = amdgpu_uvd_suspend(adev);
-		if (r)
-			return r;
-	}
 
 	return r;
 }
@@ -250,11 +243,7 @@ static int uvd_v6_0_resume(void *handle)
 		if (r)
 			return r;
 	}
-	r = uvd_v6_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return uvd_v6_0_hw_init(adev);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 13f52e0af9b8..9bcf01469282 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -438,11 +438,7 @@ static int uvd_v7_0_sw_fini(void *handle)
 	for (i = 0; i < adev->uvd.num_enc_rings; ++i)
 		amdgpu_ring_fini(&adev->uvd.ring_enc[i]);
 
-	r = amdgpu_uvd_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_uvd_sw_fini(adev);
 }
 
 /**
@@ -547,11 +543,8 @@ static int uvd_v7_0_suspend(void *handle)
 		return r;
 
 	/* Skip this for APU for now */
-	if (!(adev->flags & AMD_IS_APU)) {
+	if (!(adev->flags & AMD_IS_APU))
 		r = amdgpu_uvd_suspend(adev);
-		if (r)
-			return r;
-	}
 
 	return r;
 }
@@ -567,11 +560,7 @@ static int uvd_v7_0_resume(void *handle)
 		if (r)
 			return r;
 	}
-	r = uvd_v7_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return uvd_v7_0_hw_init(adev);
 }
 
 /**
@@ -1045,6 +1034,7 @@ static void uvd_v7_0_vm_reg_wait(struct amdgpu_ring *ring,
 static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 					unsigned vm_id, uint64_t pd_addr)
 {
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	uint32_t data0, data1, mask;
 	unsigned eng = ring->idx;
 	unsigned i;
@@ -1055,7 +1045,6 @@ static void uvd_v7_0_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-		uint32_t req = hub->get_invalidate_req(vm_id);
 
 		data0 = (hub->ctx0_ptb_addr_hi32 + vm_id * 2) << 2;
 		data1 = upper_32_bits(pd_addr);
@@ -1091,6 +1080,7 @@ static void uvd_v7_0_enc_ring_insert_end(struct amdgpu_ring *ring)
 static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
 	unsigned i;
 
@@ -1100,7 +1090,6 @@ static void uvd_v7_0_enc_ring_emit_vm_flush(struct amdgpu_ring *ring,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-		uint32_t req = hub->get_invalidate_req(vm_id);
 
 		amdgpu_ring_write(ring, HEVC_ENC_CMD_REG_WRITE);
 		amdgpu_ring_write(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
index 49a6c45e65be..47f70827195b 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v2_0.c
@@ -451,11 +451,7 @@ static int vce_v2_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_sw_fini(adev);
 }
 
 static int vce_v2_0_hw_init(void *handle)
@@ -495,11 +491,7 @@ static int vce_v2_0_suspend(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_suspend(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_suspend(adev);
 }
 
 static int vce_v2_0_resume(void *handle)
@@ -511,11 +503,7 @@ static int vce_v2_0_resume(void *handle)
 	if (r)
 		return r;
 
-	r = vce_v2_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return vce_v2_0_hw_init(adev);
 }
 
 static int vce_v2_0_soft_reset(void *handle)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
index db0adac073c6..fb0819359909 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v3_0.c
@@ -417,11 +417,7 @@ static int vce_v3_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_sw_fini(adev);
 }
 
 static int vce_v3_0_hw_init(void *handle)
@@ -471,11 +467,7 @@ static int vce_v3_0_suspend(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_suspend(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_suspend(adev);
 }
 
 static int vce_v3_0_resume(void *handle)
@@ -487,11 +479,7 @@ static int vce_v3_0_resume(void *handle)
 	if (r)
 		return r;
 
-	r = vce_v3_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return vce_v3_0_hw_init(adev);
 }
 
 static void vce_v3_0_mc_resume(struct amdgpu_device *adev, int idx)
diff --git a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
index becc5f744a98..edde5fe938d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vce_v4_0.c
@@ -527,11 +527,7 @@ static int vce_v4_0_sw_fini(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_sw_fini(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_sw_fini(adev);
 }
 
 static int vce_v4_0_hw_init(void *handle)
@@ -584,11 +580,7 @@ static int vce_v4_0_suspend(void *handle)
 	if (r)
 		return r;
 
-	r = amdgpu_vce_suspend(adev);
-	if (r)
-		return r;
-
-	return r;
+	return amdgpu_vce_suspend(adev);
 }
 
 static int vce_v4_0_resume(void *handle)
@@ -600,11 +592,7 @@ static int vce_v4_0_resume(void *handle)
 	if (r)
 		return r;
 
-	r = vce_v4_0_hw_init(adev);
-	if (r)
-		return r;
-
-	return r;
+	return vce_v4_0_hw_init(adev);
 }
 
 static void vce_v4_0_mc_resume(struct amdgpu_device *adev)
@@ -985,6 +973,7 @@ static void vce_v4_0_ring_insert_end(struct amdgpu_ring *ring)
 static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 			 unsigned int vm_id, uint64_t pd_addr)
 {
+	uint32_t req = ring->adev->gart.gart_funcs->get_invalidate_req(vm_id);
 	unsigned eng = ring->idx;
 	unsigned i;
 
@@ -994,7 +983,6 @@ static void vce_v4_0_emit_vm_flush(struct amdgpu_ring *ring,
 
 	for (i = 0; i < AMDGPU_MAX_VMHUBS; ++i) {
 		struct amdgpu_vmhub *hub = &ring->adev->vmhub[i];
-		uint32_t req = hub->get_invalidate_req(vm_id);
 
 		amdgpu_ring_write(ring, VCE_CMD_REG_WRITE);
 		amdgpu_ring_write(ring,
diff --git a/drivers/gpu/drm/amd/amdgpu/vid.h b/drivers/gpu/drm/amd/amdgpu/vid.h
index b3a86e0e96e6..5f2ab9c1609a 100644
--- a/drivers/gpu/drm/amd/amdgpu/vid.h
+++ b/drivers/gpu/drm/amd/amdgpu/vid.h
@@ -362,7 +362,89 @@
 #define	PACKET3_WAIT_ON_DE_COUNTER_DIFF			0x88
 #define	PACKET3_SWITCH_BUFFER				0x8B
 #define	PACKET3_SET_RESOURCES				0xA0
+/* 1. header
+ * 2. CONTROL
+ * 3. QUEUE_MASK_LO [31:0]
+ * 4. QUEUE_MASK_HI [31:0]
+ * 5. GWS_MASK_LO [31:0]
+ * 6. GWS_MASK_HI [31:0]
+ * 7. OAC_MASK [15:0]
+ * 8. GDS_HEAP_SIZE [16:11] | GDS_HEAP_BASE [5:0]
+ */
+#              define PACKET3_SET_RESOURCES_VMID_MASK(x)     ((x) << 0)
+#              define PACKET3_SET_RESOURCES_UNMAP_LATENTY(x) ((x) << 16)
+#              define PACKET3_SET_RESOURCES_QUEUE_TYPE(x)    ((x) << 29)
 #define	PACKET3_MAP_QUEUES				0xA2
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. MQD_ADDR_LO [31:0]
+ * 5. MQD_ADDR_HI [31:0]
+ * 6. WPTR_ADDR_LO [31:0]
+ * 7. WPTR_ADDR_HI [31:0]
+ */
+/* CONTROL */
+#              define PACKET3_MAP_QUEUES_QUEUE_SEL(x)       ((x) << 4)
+#              define PACKET3_MAP_QUEUES_VMID(x)            ((x) << 8)
+#              define PACKET3_MAP_QUEUES_QUEUE_TYPE(x)      ((x) << 21)
+#              define PACKET3_MAP_QUEUES_ALLOC_FORMAT(x)    ((x) << 24)
+#              define PACKET3_MAP_QUEUES_ENGINE_SEL(x)      ((x) << 26)
+#              define PACKET3_MAP_QUEUES_NUM_QUEUES(x)      ((x) << 29)
+/* CONTROL2 */
+#              define PACKET3_MAP_QUEUES_CHECK_DISABLE(x)   ((x) << 1)
+#              define PACKET3_MAP_QUEUES_DOORBELL_OFFSET(x) ((x) << 2)
+#              define PACKET3_MAP_QUEUES_QUEUE(x)           ((x) << 26)
+#              define PACKET3_MAP_QUEUES_PIPE(x)            ((x) << 29)
+#              define PACKET3_MAP_QUEUES_ME(x)              ((x) << 31)
+#define	PACKET3_UNMAP_QUEUES				0xA3
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. CONTROL3
+ * 5. CONTROL4
+ * 6. CONTROL5
+ */
+/* CONTROL */
+#              define PACKET3_UNMAP_QUEUES_ACTION(x)           ((x) << 0)
+		/* 0 - PREEMPT_QUEUES
+		 * 1 - RESET_QUEUES
+		 * 2 - DISABLE_PROCESS_QUEUES
+		 * 3 - PREEMPT_QUEUES_NO_UNMAP
+		 */
+#              define PACKET3_UNMAP_QUEUES_QUEUE_SEL(x)        ((x) << 4)
+#              define PACKET3_UNMAP_QUEUES_ENGINE_SEL(x)       ((x) << 26)
+#              define PACKET3_UNMAP_QUEUES_NUM_QUEUES(x)       ((x) << 29)
+/* CONTROL2a */
+#              define PACKET3_UNMAP_QUEUES_PASID(x)            ((x) << 0)
+/* CONTROL2b */
+#              define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET0(x) ((x) << 2)
+/* CONTROL3a */
+#              define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET1(x) ((x) << 2)
+/* CONTROL3b */
+#              define PACKET3_UNMAP_QUEUES_RB_WPTR(x)          ((x) << 0)
+/* CONTROL4 */
+#              define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET2(x) ((x) << 2)
+/* CONTROL5 */
+#              define PACKET3_UNMAP_QUEUES_DOORBELL_OFFSET3(x) ((x) << 2)
+#define	PACKET3_QUERY_STATUS				0xA4
+/* 1. header
+ * 2. CONTROL
+ * 3. CONTROL2
+ * 4. ADDR_LO [31:0]
+ * 5. ADDR_HI [31:0]
+ * 6. DATA_LO [31:0]
+ * 7. DATA_HI [31:0]
+ */
+/* CONTROL */
+#              define PACKET3_QUERY_STATUS_CONTEXT_ID(x)       ((x) << 0)
+#              define PACKET3_QUERY_STATUS_INTERRUPT_SEL(x)    ((x) << 28)
+#              define PACKET3_QUERY_STATUS_COMMAND(x)          ((x) << 30)
+/* CONTROL2a */
+#              define PACKET3_QUERY_STATUS_PASID(x)            ((x) << 0)
+/* CONTROL2b */
+#              define PACKET3_QUERY_STATUS_DOORBELL_OFFSET(x)  ((x) << 2)
+#              define PACKET3_QUERY_STATUS_ENG_SEL(x)          ((x) << 25)
+
 
 #define VCE_CMD_NO_OP		0x00000000
 #define VCE_CMD_END		0x00000001
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index dfd4fe6f0578..9da5b0bb66d8 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -493,8 +493,10 @@ static int pp_dpm_dispatch_tasks(void *handle, enum amd_pp_event event_id,
 	{
 		enum amd_pm_state_type  ps;
 
-		if (input == NULL)
-			return -EINVAL;
+		if (input == NULL) {
+			ret = -EINVAL;
+			break;
+		}
 		ps = *(unsigned long *)input;
 
 		data.requested_ui_label = power_state_convert(ps);
@@ -539,15 +541,19 @@ static enum amd_pm_state_type pp_dpm_get_current_power_state(void *handle)
 	switch (state->classification.ui_label) {
 	case PP_StateUILabel_Battery:
 		pm_type = POWER_STATE_TYPE_BATTERY;
+		break;
 	case PP_StateUILabel_Balanced:
 		pm_type = POWER_STATE_TYPE_BALANCED;
+		break;
 	case PP_StateUILabel_Performance:
 		pm_type = POWER_STATE_TYPE_PERFORMANCE;
+		break;
 	default:
 		if (state->classification.flags & PP_StateClassificationFlag_Boot)
 			pm_type = POWER_STATE_TYPE_INTERNAL_BOOT;
 		else
 			pm_type = POWER_STATE_TYPE_DEFAULT;
+		break;
 	}
 	mutex_unlock(&pp_handle->pp_lock);
 
@@ -894,7 +900,7 @@ static int pp_dpm_set_sclk_od(void *handle, uint32_t value)
 
 	mutex_lock(&pp_handle->pp_lock);
 	ret = hwmgr->hwmgr_func->set_sclk_od(hwmgr, value);
-	mutex_lock(&pp_handle->pp_lock);
+	mutex_unlock(&pp_handle->pp_lock);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h
index 8e53d3a5e725..6a907c93fd9c 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_pptable.h
@@ -250,6 +250,29 @@ typedef struct _ATOM_Vega10_Fan_Table {
 	USHORT  usFanStartTemperature;
 } ATOM_Vega10_Fan_Table;
 
+typedef struct _ATOM_Vega10_Fan_Table_V2 {
+	UCHAR   ucRevId;
+	USHORT  usFanOutputSensitivity;
+	USHORT  usFanAcousticLimitRpm;
+	USHORT  usThrottlingRPM;
+	USHORT  usTargetTemperature;
+	USHORT  usMinimumPWMLimit;
+	USHORT  usTargetGfxClk;
+	USHORT  usFanGainEdge;
+	USHORT  usFanGainHotspot;
+	USHORT  usFanGainLiquid;
+	USHORT  usFanGainVrVddc;
+	USHORT  usFanGainVrMvdd;
+	USHORT  usFanGainPlx;
+	USHORT  usFanGainHbm;
+	UCHAR   ucEnableZeroRPM;
+	USHORT  usFanStopTemperature;
+	USHORT  usFanStartTemperature;
+	UCHAR   ucFanParameters;
+	UCHAR   ucFanMinRPM;
+	UCHAR   ucFanMaxRPM;
+} ATOM_Vega10_Fan_Table_V2;
+
 typedef struct _ATOM_Vega10_Thermal_Controller {
 	UCHAR ucRevId;
 	UCHAR ucType;           /* one of ATOM_VEGA10_PP_THERMALCONTROLLER_*/
@@ -305,6 +328,33 @@ typedef struct _ATOM_Vega10_PowerTune_Table {
 	USHORT usTemperatureLimitTedge;
 } ATOM_Vega10_PowerTune_Table;
 
+typedef struct _ATOM_Vega10_PowerTune_Table_V2
+{
+	UCHAR  ucRevId;
+	USHORT usSocketPowerLimit;
+	USHORT usBatteryPowerLimit;
+	USHORT usSmallPowerLimit;
+	USHORT usTdcLimit;
+	USHORT usEdcLimit;
+	USHORT usSoftwareShutdownTemp;
+	USHORT usTemperatureLimitHotSpot;
+	USHORT usTemperatureLimitLiquid1;
+	USHORT usTemperatureLimitLiquid2;
+	USHORT usTemperatureLimitHBM;
+	USHORT usTemperatureLimitVrSoc;
+	USHORT usTemperatureLimitVrMem;
+	USHORT usTemperatureLimitPlx;
+	USHORT usLoadLineResistance;
+	UCHAR ucLiquid1_I2C_address;
+	UCHAR ucLiquid2_I2C_address;
+	UCHAR ucLiquid_I2C_Line;
+	UCHAR ucVr_I2C_address;
+	UCHAR ucVr_I2C_Line;
+	UCHAR ucPlx_I2C_address;
+	UCHAR ucPlx_I2C_Line;
+	USHORT usTemperatureLimitTedge;
+} ATOM_Vega10_PowerTune_Table_V2;
+
 typedef struct _ATOM_Vega10_Hard_Limit_Record {
     ULONG  ulSOCCLKLimit;
     ULONG  ulGFXCLKLimit;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index 518634f995e7..8b55ae01132d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -116,14 +116,16 @@ static int init_thermal_controller(
 		const ATOM_Vega10_POWERPLAYTABLE *powerplay_table)
 {
 	const ATOM_Vega10_Thermal_Controller *thermal_controller;
-	const ATOM_Vega10_Fan_Table *fan_table;
+	const Vega10_PPTable_Generic_SubTable_Header *header;
+	const ATOM_Vega10_Fan_Table *fan_table_v1;
+	const ATOM_Vega10_Fan_Table_V2 *fan_table_v2;
 
 	thermal_controller = (ATOM_Vega10_Thermal_Controller *)
 			(((unsigned long)powerplay_table) +
 			le16_to_cpu(powerplay_table->usThermalControllerOffset));
 
 	PP_ASSERT_WITH_CODE((powerplay_table->usThermalControllerOffset != 0),
-			"Thermal controller table not set!", return -1);
+			"Thermal controller table not set!", return -EINVAL);
 
 	hwmgr->thermal_controller.ucType = thermal_controller->ucType;
 	hwmgr->thermal_controller.ucI2cLine = thermal_controller->ucI2cLine;
@@ -142,6 +144,9 @@ static int init_thermal_controller(
 	hwmgr->thermal_controller.fanInfo.ulMaxRPM =
 			thermal_controller->ucFanMaxRPM * 100UL;
 
+	hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay
+			= 100000;
+
 	set_hw_cap(
 			hwmgr,
 			ATOM_VEGA10_PP_THERMALCONTROLLER_NONE != hwmgr->thermal_controller.ucType,
@@ -150,54 +155,101 @@ static int init_thermal_controller(
 	if (!powerplay_table->usFanTableOffset)
 		return 0;
 
-	fan_table = (const ATOM_Vega10_Fan_Table *)
+	header = (const Vega10_PPTable_Generic_SubTable_Header *)
 			(((unsigned long)powerplay_table) +
 			le16_to_cpu(powerplay_table->usFanTableOffset));
 
-	PP_ASSERT_WITH_CODE((fan_table->ucRevId >= 8),
-		"Invalid Input Fan Table!", return -1);
+	if (header->ucRevId == 10) {
+		fan_table_v1 = (ATOM_Vega10_Fan_Table *)header;
 
-	hwmgr->thermal_controller.advanceFanControlParameters.ulCycleDelay
-		= 100000;
-	phm_cap_set(hwmgr->platform_descriptor.platformCaps,
-		PHM_PlatformCaps_MicrocodeFanControl);
-
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
-			le16_to_cpu(fan_table->usFanOutputSensitivity);
-	hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM =
-			le16_to_cpu(fan_table->usFanRPMMax);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMMaxLimit =
-			le16_to_cpu(fan_table->usThrottlingRPM);
-	hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit =
-			le32_to_cpu((uint32_t)(fan_table->usFanAcousticLimit));
-	hwmgr->thermal_controller.advanceFanControlParameters.usTMax =
-			le16_to_cpu(fan_table->usTargetTemperature);
-	hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin =
-			le16_to_cpu(fan_table->usMinimumPWMLimit);
-	hwmgr->thermal_controller.advanceFanControlParameters.ulTargetGfxClk =
-			le32_to_cpu((uint32_t)(fan_table->usTargetGfxClk));
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainEdge =
-			le16_to_cpu(fan_table->usFanGainEdge);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHotspot =
-			le16_to_cpu(fan_table->usFanGainHotspot);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainLiquid =
-			le16_to_cpu(fan_table->usFanGainLiquid);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrVddc =
-			le16_to_cpu(fan_table->usFanGainVrVddc);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrMvdd =
-			le16_to_cpu(fan_table->usFanGainVrMvdd);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainPlx =
-			le16_to_cpu(fan_table->usFanGainPlx);
-	hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHbm =
-			le16_to_cpu(fan_table->usFanGainHbm);
-
-	hwmgr->thermal_controller.advanceFanControlParameters.ucEnableZeroRPM =
-			fan_table->ucEnableZeroRPM;
-	hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStopTemperature =
-			le16_to_cpu(fan_table->usFanStopTemperature);
-	hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature =
-			le16_to_cpu(fan_table->usFanStartTemperature);
+		PP_ASSERT_WITH_CODE((fan_table_v1->ucRevId >= 8),
+				"Invalid Input Fan Table!", return -EINVAL);
+
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_MicrocodeFanControl);
+
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
+				le16_to_cpu(fan_table_v1->usFanOutputSensitivity);
+		hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM =
+				le16_to_cpu(fan_table_v1->usFanRPMMax);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMMaxLimit =
+				le16_to_cpu(fan_table_v1->usThrottlingRPM);
+		hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit =
+				le16_to_cpu(fan_table_v1->usFanAcousticLimit);
+		hwmgr->thermal_controller.advanceFanControlParameters.usTMax =
+				le16_to_cpu(fan_table_v1->usTargetTemperature);
+		hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin =
+				le16_to_cpu(fan_table_v1->usMinimumPWMLimit);
+		hwmgr->thermal_controller.advanceFanControlParameters.ulTargetGfxClk =
+				le16_to_cpu(fan_table_v1->usTargetGfxClk);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainEdge =
+				le16_to_cpu(fan_table_v1->usFanGainEdge);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHotspot =
+				le16_to_cpu(fan_table_v1->usFanGainHotspot);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainLiquid =
+				le16_to_cpu(fan_table_v1->usFanGainLiquid);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrVddc =
+				le16_to_cpu(fan_table_v1->usFanGainVrVddc);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrMvdd =
+				le16_to_cpu(fan_table_v1->usFanGainVrMvdd);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainPlx =
+				le16_to_cpu(fan_table_v1->usFanGainPlx);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHbm =
+				le16_to_cpu(fan_table_v1->usFanGainHbm);
+
+		hwmgr->thermal_controller.advanceFanControlParameters.ucEnableZeroRPM =
+				fan_table_v1->ucEnableZeroRPM;
+		hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStopTemperature =
+				le16_to_cpu(fan_table_v1->usFanStopTemperature);
+		hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature =
+				le16_to_cpu(fan_table_v1->usFanStartTemperature);
+	} else if (header->ucRevId > 10) {
+		fan_table_v2 = (ATOM_Vega10_Fan_Table_V2 *)header;
+
+		hwmgr->thermal_controller.fanInfo.ucTachometerPulsesPerRevolution =
+				fan_table_v2->ucFanParameters & ATOM_VEGA10_PP_FANPARAMETERS_TACHOMETER_PULSES_PER_REVOLUTION_MASK;
+		hwmgr->thermal_controller.fanInfo.ulMinRPM = fan_table_v2->ucFanMinRPM * 100UL;
+		hwmgr->thermal_controller.fanInfo.ulMaxRPM = fan_table_v2->ucFanMaxRPM * 100UL;
 
+		phm_cap_set(hwmgr->platform_descriptor.platformCaps,
+				PHM_PlatformCaps_MicrocodeFanControl);
+
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanOutputSensitivity =
+				le16_to_cpu(fan_table_v2->usFanOutputSensitivity);
+		hwmgr->thermal_controller.advanceFanControlParameters.usMaxFanRPM =
+				fan_table_v2->ucFanMaxRPM * 100UL;
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanRPMMaxLimit =
+				le16_to_cpu(fan_table_v2->usThrottlingRPM);
+		hwmgr->thermal_controller.advanceFanControlParameters.ulMinFanSCLKAcousticLimit =
+				le16_to_cpu(fan_table_v2->usFanAcousticLimitRpm);
+		hwmgr->thermal_controller.advanceFanControlParameters.usTMax =
+				le16_to_cpu(fan_table_v2->usTargetTemperature);
+		hwmgr->thermal_controller.advanceFanControlParameters.usPWMMin =
+				le16_to_cpu(fan_table_v2->usMinimumPWMLimit);
+		hwmgr->thermal_controller.advanceFanControlParameters.ulTargetGfxClk =
+				le16_to_cpu(fan_table_v2->usTargetGfxClk);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainEdge =
+				le16_to_cpu(fan_table_v2->usFanGainEdge);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHotspot =
+				le16_to_cpu(fan_table_v2->usFanGainHotspot);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainLiquid =
+				le16_to_cpu(fan_table_v2->usFanGainLiquid);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrVddc =
+				le16_to_cpu(fan_table_v2->usFanGainVrVddc);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainVrMvdd =
+				le16_to_cpu(fan_table_v2->usFanGainVrMvdd);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainPlx =
+				le16_to_cpu(fan_table_v2->usFanGainPlx);
+		hwmgr->thermal_controller.advanceFanControlParameters.usFanGainHbm =
+				le16_to_cpu(fan_table_v2->usFanGainHbm);
+
+		hwmgr->thermal_controller.advanceFanControlParameters.ucEnableZeroRPM =
+				fan_table_v2->ucEnableZeroRPM;
+		hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStopTemperature =
+				le16_to_cpu(fan_table_v2->usFanStopTemperature);
+		hwmgr->thermal_controller.advanceFanControlParameters.usZeroRPMStartTemperature =
+				le16_to_cpu(fan_table_v2->usFanStartTemperature);
+	}
 	return 0;
 }
 
@@ -261,6 +313,48 @@ static int get_mm_clock_voltage_table(
 	return 0;
 }
 
+static void get_scl_sda_value(uint8_t line, uint8_t *scl, uint8_t* sda)
+{
+	switch(line){
+	case Vega10_I2CLineID_DDC1:
+		*scl = Vega10_I2C_DDC1CLK;
+		*sda = Vega10_I2C_DDC1DATA;
+		break;
+	case Vega10_I2CLineID_DDC2:
+		*scl = Vega10_I2C_DDC2CLK;
+		*sda = Vega10_I2C_DDC2DATA;
+		break;
+	case Vega10_I2CLineID_DDC3:
+		*scl = Vega10_I2C_DDC3CLK;
+		*sda = Vega10_I2C_DDC3DATA;
+		break;
+	case Vega10_I2CLineID_DDC4:
+		*scl = Vega10_I2C_DDC4CLK;
+		*sda = Vega10_I2C_DDC4DATA;
+		break;
+	case Vega10_I2CLineID_DDC5:
+		*scl = Vega10_I2C_DDC5CLK;
+		*sda = Vega10_I2C_DDC5DATA;
+		break;
+	case Vega10_I2CLineID_DDC6:
+		*scl = Vega10_I2C_DDC6CLK;
+		*sda = Vega10_I2C_DDC6DATA;
+		break;
+	case Vega10_I2CLineID_SCLSDA:
+		*scl = Vega10_I2C_SCL;
+		*sda = Vega10_I2C_SDA;
+		break;
+	case Vega10_I2CLineID_DDCVGA:
+		*scl = Vega10_I2C_DDCVGACLK;
+		*sda = Vega10_I2C_DDCVGADATA;
+		break;
+	default:
+		*scl = 0;
+		*sda = 0;
+		break;
+	}
+}
+
 static int get_tdp_table(
 		struct pp_hwmgr *hwmgr,
 		struct phm_tdp_table **info_tdp_table,
@@ -268,59 +362,99 @@ static int get_tdp_table(
 {
 	uint32_t table_size;
 	struct phm_tdp_table *tdp_table;
-
-	const ATOM_Vega10_PowerTune_Table *power_tune_table =
-			(ATOM_Vega10_PowerTune_Table *)table;
-
-	table_size = sizeof(uint32_t) + sizeof(struct phm_cac_tdp_table);
-	hwmgr->dyn_state.cac_dtp_table = (struct phm_cac_tdp_table *)
-			kzalloc(table_size, GFP_KERNEL);
-
-	if (!hwmgr->dyn_state.cac_dtp_table)
-		return -ENOMEM;
+	uint8_t scl;
+	uint8_t sda;
+	const ATOM_Vega10_PowerTune_Table *power_tune_table;
+	const ATOM_Vega10_PowerTune_Table_V2 *power_tune_table_v2;
 
 	table_size = sizeof(uint32_t) + sizeof(struct phm_tdp_table);
+
 	tdp_table = kzalloc(table_size, GFP_KERNEL);
 
-	if (!tdp_table) {
-		kfree(hwmgr->dyn_state.cac_dtp_table);
-		hwmgr->dyn_state.cac_dtp_table = NULL;
+	if (!tdp_table)
 		return -ENOMEM;
-	}
 
-	tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table->usSocketPowerLimit);
-	tdp_table->usTDC = le16_to_cpu(power_tune_table->usTdcLimit);
-	tdp_table->usEDCLimit = le16_to_cpu(power_tune_table->usEdcLimit);
-	tdp_table->usSoftwareShutdownTemp =
-			le16_to_cpu(power_tune_table->usSoftwareShutdownTemp);
-	tdp_table->usTemperatureLimitTedge =
-			le16_to_cpu(power_tune_table->usTemperatureLimitTedge);
-	tdp_table->usTemperatureLimitHotspot =
-			le16_to_cpu(power_tune_table->usTemperatureLimitHotSpot);
-	tdp_table->usTemperatureLimitLiquid1 =
-			le16_to_cpu(power_tune_table->usTemperatureLimitLiquid1);
-	tdp_table->usTemperatureLimitLiquid2 =
-			le16_to_cpu(power_tune_table->usTemperatureLimitLiquid2);
-	tdp_table->usTemperatureLimitHBM =
-			le16_to_cpu(power_tune_table->usTemperatureLimitHBM);
-	tdp_table->usTemperatureLimitVrVddc =
-			le16_to_cpu(power_tune_table->usTemperatureLimitVrSoc);
-	tdp_table->usTemperatureLimitVrMvdd =
-			le16_to_cpu(power_tune_table->usTemperatureLimitVrMem);
-	tdp_table->usTemperatureLimitPlx =
-			le16_to_cpu(power_tune_table->usTemperatureLimitPlx);
-	tdp_table->ucLiquid1_I2C_address = power_tune_table->ucLiquid1_I2C_address;
-	tdp_table->ucLiquid2_I2C_address = power_tune_table->ucLiquid2_I2C_address;
-	tdp_table->ucLiquid_I2C_Line = power_tune_table->ucLiquid_I2C_LineSCL;
-	tdp_table->ucLiquid_I2C_LineSDA = power_tune_table->ucLiquid_I2C_LineSDA;
-	tdp_table->ucVr_I2C_address = power_tune_table->ucVr_I2C_address;
-	tdp_table->ucVr_I2C_Line = power_tune_table->ucVr_I2C_LineSCL;
-	tdp_table->ucVr_I2C_LineSDA = power_tune_table->ucVr_I2C_LineSDA;
-	tdp_table->ucPlx_I2C_address = power_tune_table->ucPlx_I2C_address;
-	tdp_table->ucPlx_I2C_Line = power_tune_table->ucPlx_I2C_LineSCL;
-	tdp_table->ucPlx_I2C_LineSDA = power_tune_table->ucPlx_I2C_LineSDA;
-
-	hwmgr->platform_descriptor.LoadLineSlope = power_tune_table->usLoadLineResistance;
+	if (table->ucRevId == 5) {
+		power_tune_table = (ATOM_Vega10_PowerTune_Table *)table;
+		tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table->usSocketPowerLimit);
+		tdp_table->usTDC = le16_to_cpu(power_tune_table->usTdcLimit);
+		tdp_table->usEDCLimit = le16_to_cpu(power_tune_table->usEdcLimit);
+		tdp_table->usSoftwareShutdownTemp =
+				le16_to_cpu(power_tune_table->usSoftwareShutdownTemp);
+		tdp_table->usTemperatureLimitTedge =
+				le16_to_cpu(power_tune_table->usTemperatureLimitTedge);
+		tdp_table->usTemperatureLimitHotspot =
+				le16_to_cpu(power_tune_table->usTemperatureLimitHotSpot);
+		tdp_table->usTemperatureLimitLiquid1 =
+				le16_to_cpu(power_tune_table->usTemperatureLimitLiquid1);
+		tdp_table->usTemperatureLimitLiquid2 =
+				le16_to_cpu(power_tune_table->usTemperatureLimitLiquid2);
+		tdp_table->usTemperatureLimitHBM =
+				le16_to_cpu(power_tune_table->usTemperatureLimitHBM);
+		tdp_table->usTemperatureLimitVrVddc =
+				le16_to_cpu(power_tune_table->usTemperatureLimitVrSoc);
+		tdp_table->usTemperatureLimitVrMvdd =
+				le16_to_cpu(power_tune_table->usTemperatureLimitVrMem);
+		tdp_table->usTemperatureLimitPlx =
+				le16_to_cpu(power_tune_table->usTemperatureLimitPlx);
+		tdp_table->ucLiquid1_I2C_address = power_tune_table->ucLiquid1_I2C_address;
+		tdp_table->ucLiquid2_I2C_address = power_tune_table->ucLiquid2_I2C_address;
+		tdp_table->ucLiquid_I2C_Line = power_tune_table->ucLiquid_I2C_LineSCL;
+		tdp_table->ucLiquid_I2C_LineSDA = power_tune_table->ucLiquid_I2C_LineSDA;
+		tdp_table->ucVr_I2C_address = power_tune_table->ucVr_I2C_address;
+		tdp_table->ucVr_I2C_Line = power_tune_table->ucVr_I2C_LineSCL;
+		tdp_table->ucVr_I2C_LineSDA = power_tune_table->ucVr_I2C_LineSDA;
+		tdp_table->ucPlx_I2C_address = power_tune_table->ucPlx_I2C_address;
+		tdp_table->ucPlx_I2C_Line = power_tune_table->ucPlx_I2C_LineSCL;
+		tdp_table->ucPlx_I2C_LineSDA = power_tune_table->ucPlx_I2C_LineSDA;
+		hwmgr->platform_descriptor.LoadLineSlope = power_tune_table->usLoadLineResistance;
+	} else {
+		power_tune_table_v2 = (ATOM_Vega10_PowerTune_Table_V2 *)table;
+		tdp_table->usMaximumPowerDeliveryLimit = le16_to_cpu(power_tune_table_v2->usSocketPowerLimit);
+		tdp_table->usTDC = le16_to_cpu(power_tune_table_v2->usTdcLimit);
+		tdp_table->usEDCLimit = le16_to_cpu(power_tune_table_v2->usEdcLimit);
+		tdp_table->usSoftwareShutdownTemp =
+				le16_to_cpu(power_tune_table_v2->usSoftwareShutdownTemp);
+		tdp_table->usTemperatureLimitTedge =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitTedge);
+		tdp_table->usTemperatureLimitHotspot =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitHotSpot);
+		tdp_table->usTemperatureLimitLiquid1 =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitLiquid1);
+		tdp_table->usTemperatureLimitLiquid2 =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitLiquid2);
+		tdp_table->usTemperatureLimitHBM =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitHBM);
+		tdp_table->usTemperatureLimitVrVddc =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitVrSoc);
+		tdp_table->usTemperatureLimitVrMvdd =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitVrMem);
+		tdp_table->usTemperatureLimitPlx =
+				le16_to_cpu(power_tune_table_v2->usTemperatureLimitPlx);
+		tdp_table->ucLiquid1_I2C_address = power_tune_table_v2->ucLiquid1_I2C_address;
+		tdp_table->ucLiquid2_I2C_address = power_tune_table_v2->ucLiquid2_I2C_address;
+
+		get_scl_sda_value(power_tune_table_v2->ucLiquid_I2C_Line, &scl, &sda);
+
+		tdp_table->ucLiquid_I2C_Line = scl;
+		tdp_table->ucLiquid_I2C_LineSDA = sda;
+
+		tdp_table->ucVr_I2C_address = power_tune_table_v2->ucVr_I2C_address;
+
+		get_scl_sda_value(power_tune_table_v2->ucVr_I2C_Line, &scl, &sda);
+
+		tdp_table->ucVr_I2C_Line = scl;
+		tdp_table->ucVr_I2C_LineSDA = sda;
+		tdp_table->ucPlx_I2C_address = power_tune_table_v2->ucPlx_I2C_address;
+
+		get_scl_sda_value(power_tune_table_v2->ucPlx_I2C_Line, &scl, &sda);
+
+		tdp_table->ucPlx_I2C_Line = scl;
+		tdp_table->ucPlx_I2C_LineSDA = sda;
+
+		hwmgr->platform_descriptor.LoadLineSlope =
+					power_tune_table_v2->usLoadLineResistance;
+	}
 
 	*info_tdp_table = tdp_table;
 
@@ -836,7 +970,7 @@ static int init_dpm_2_parameters(
 				(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usVddcLookupTableOffset));
 		result = get_vddc_lookup_table(hwmgr,
-				&pp_table_info->vddc_lookup_table, vddc_table, 16);
+				&pp_table_info->vddc_lookup_table, vddc_table, 8);
 	}
 
 	if (powerplay_table->usVddmemLookupTableOffset) {
@@ -845,7 +979,7 @@ static int init_dpm_2_parameters(
 				(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usVddmemLookupTableOffset));
 		result = get_vddc_lookup_table(hwmgr,
-				&pp_table_info->vddmem_lookup_table, vdd_mem_table, 16);
+				&pp_table_info->vddmem_lookup_table, vdd_mem_table, 4);
 	}
 
 	if (powerplay_table->usVddciLookupTableOffset) {
@@ -854,7 +988,7 @@ static int init_dpm_2_parameters(
 				(((unsigned long)powerplay_table) +
 				le16_to_cpu(powerplay_table->usVddciLookupTableOffset));
 		result = get_vddc_lookup_table(hwmgr,
-				&pp_table_info->vddci_lookup_table, vddci_table, 16);
+				&pp_table_info->vddci_lookup_table, vddci_table, 4);
 	}
 
 	return result;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
index 995d133ba6aa..d83ed2af7aa3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.h
@@ -26,6 +26,34 @@
 
 #include "hwmgr.h"
 
+enum Vega10_I2CLineID {
+	Vega10_I2CLineID_DDC1 = 0x90,
+	Vega10_I2CLineID_DDC2 = 0x91,
+	Vega10_I2CLineID_DDC3 = 0x92,
+	Vega10_I2CLineID_DDC4 = 0x93,
+	Vega10_I2CLineID_DDC5 = 0x94,
+	Vega10_I2CLineID_DDC6 = 0x95,
+	Vega10_I2CLineID_SCLSDA = 0x96,
+	Vega10_I2CLineID_DDCVGA = 0x97
+};
+
+#define Vega10_I2C_DDC1DATA          0
+#define Vega10_I2C_DDC1CLK           1
+#define Vega10_I2C_DDC2DATA          2
+#define Vega10_I2C_DDC2CLK           3
+#define Vega10_I2C_DDC3DATA          4
+#define Vega10_I2C_DDC3CLK           5
+#define Vega10_I2C_SDA               40
+#define Vega10_I2C_SCL               41
+#define Vega10_I2C_DDC4DATA          65
+#define Vega10_I2C_DDC4CLK           66
+#define Vega10_I2C_DDC5DATA          0x48
+#define Vega10_I2C_DDC5CLK           0x49
+#define Vega10_I2C_DDC6DATA          0x4a
+#define Vega10_I2C_DDC6CLK           0x4b
+#define Vega10_I2C_DDCVGADATA        0x4c
+#define Vega10_I2C_DDCVGACLK         0x4d
+
 extern const struct pp_table_func vega10_pptable_funcs;
 extern int vega10_get_number_of_powerplay_table_entries(struct pp_hwmgr *hwmgr);
 extern int vega10_get_powerplay_table_entry(struct pp_hwmgr *hwmgr, uint32_t entry_index,
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
index aee021451d35..2037910adcb1 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu9_driver_if.h
@@ -30,7 +30,9 @@
  * SMU TEAM: Always increment the interface version if
  * any structure is changed in this file
  */
-#define SMU9_DRIVER_IF_VERSION 0xa
+#define SMU9_DRIVER_IF_VERSION 0xB
+
+#define PPTABLE_V10_SMU_VERSION 1
 
 #define NUM_GFXCLK_DPM_LEVELS  8
 #define NUM_UVD_DPM_LEVELS     8
@@ -87,6 +89,11 @@ typedef struct {
   int32_t a0;
   int32_t a1;
   int32_t a2;
+
+  uint8_t a0_shift;
+  uint8_t a1_shift;
+  uint8_t a2_shift;
+  uint8_t padding;
 } GbVdroopTable_t;
 
 typedef struct {
@@ -293,7 +300,9 @@ typedef struct {
   uint16_t     Platform_sigma;
   uint16_t     PSM_Age_CompFactor;
 
-  uint32_t     Reserved[20];
+  uint32_t     DpmLevelPowerDelta;
+
+  uint32_t     Reserved[19];
 
   /* Padding - ignore */
   uint32_t     MmHubPadding[7]; /* SMU internal use */
@@ -350,8 +359,8 @@ typedef struct {
 typedef struct {
   uint16_t avgPsmCount[30];
   uint16_t minPsmCount[30];
-  uint16_t avgPsmVoltage[30]; /* in mV with 2 fractional bits */
-  uint16_t minPsmVoltage[30]; /* in mV with 2 fractional bits */
+  float    avgPsmVoltage[30];
+  float    minPsmVoltage[30];
 
   uint32_t MmHubPadding[7]; /* SMU internal use */
 } AvfsDebugTable_t;
@@ -414,5 +423,45 @@ typedef struct {
 #define UCLK_SWITCH_SLOW 0
 #define UCLK_SWITCH_FAST 1
 
+/* GFX DIDT Configuration */
+#define SQ_Enable_MASK 0x1
+#define SQ_IR_MASK 0x2
+#define SQ_PCC_MASK 0x4
+#define SQ_EDC_MASK 0x8
+
+#define TCP_Enable_MASK 0x100
+#define TCP_IR_MASK 0x200
+#define TCP_PCC_MASK 0x400
+#define TCP_EDC_MASK 0x800
+
+#define TD_Enable_MASK 0x10000
+#define TD_IR_MASK 0x20000
+#define TD_PCC_MASK 0x40000
+#define TD_EDC_MASK 0x80000
+
+#define DB_Enable_MASK 0x1000000
+#define DB_IR_MASK 0x2000000
+#define DB_PCC_MASK 0x4000000
+#define DB_EDC_MASK 0x8000000
+
+#define SQ_Enable_SHIFT 0
+#define SQ_IR_SHIFT 1
+#define SQ_PCC_SHIFT 2
+#define SQ_EDC_SHIFT 3
+
+#define TCP_Enable_SHIFT 8
+#define TCP_IR_SHIFT 9
+#define TCP_PCC_SHIFT 10
+#define TCP_EDC_SHIFT 11
+
+#define TD_Enable_SHIFT 16
+#define TD_IR_SHIFT 17
+#define TD_PCC_SHIFT 18
+#define TD_EDC_SHIFT 19
+
+#define DB_Enable_SHIFT 24
+#define DB_IR_SHIFT 25
+#define DB_PCC_SHIFT 26
+#define DB_EDC_SHIFT 27
 
 #endif
diff --git a/drivers/gpu/drm/arm/hdlcd_drv.c b/drivers/gpu/drm/arm/hdlcd_drv.c
index 0e74880b5e94..0f49c4b12772 100644
--- a/drivers/gpu/drm/arm/hdlcd_drv.c
+++ b/drivers/gpu/drm/arm/hdlcd_drv.c
@@ -392,29 +392,13 @@ static int compare_dev(struct device *dev, void *data)
 
 static int hdlcd_probe(struct platform_device *pdev)
 {
-	struct device_node *port, *ep;
+	struct device_node *port;
 	struct component_match *match = NULL;
 
-	if (!pdev->dev.of_node)
-		return -ENODEV;
-
 	/* there is only one output port inside each device, find it */
-	ep = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
-	if (!ep)
-		return -ENODEV;
-
-	if (!of_device_is_available(ep)) {
-		of_node_put(ep);
+	port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
+	if (!port)
 		return -ENODEV;
-	}
-
-	/* add the remote encoder port as component */
-	port = of_graph_get_remote_port_parent(ep);
-	of_node_put(ep);
-	if (!port || !of_device_is_available(port)) {
-		of_node_put(port);
-		return -EAGAIN;
-	}
 
 	drm_of_component_match_add(&pdev->dev, &match, compare_dev, port);
 	of_node_put(port);
diff --git a/drivers/gpu/drm/arm/malidp_crtc.c b/drivers/gpu/drm/arm/malidp_crtc.c
index f9d665550d3e..9446a673d469 100644
--- a/drivers/gpu/drm/arm/malidp_crtc.c
+++ b/drivers/gpu/drm/arm/malidp_crtc.c
@@ -16,6 +16,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <linux/clk.h>
+#include <linux/pm_runtime.h>
 #include <video/videomode.h>
 
 #include "malidp_drv.h"
@@ -35,13 +36,6 @@ static bool malidp_crtc_mode_fixup(struct drm_crtc *crtc,
 	long rate, req_rate = mode->crtc_clock * 1000;
 
 	if (req_rate) {
-		rate = clk_round_rate(hwdev->mclk, req_rate);
-		if (rate < req_rate) {
-			DRM_DEBUG_DRIVER("mclk clock unable to reach %d kHz\n",
-					 mode->crtc_clock);
-			return false;
-		}
-
 		rate = clk_round_rate(hwdev->pxlclk, req_rate);
 		if (rate != req_rate) {
 			DRM_DEBUG_DRIVER("pxlclk doesn't support %ld Hz\n",
@@ -58,9 +52,14 @@ static void malidp_crtc_enable(struct drm_crtc *crtc)
 	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
 	struct malidp_hw_device *hwdev = malidp->dev;
 	struct videomode vm;
+	int err = pm_runtime_get_sync(crtc->dev->dev);
 
-	drm_display_mode_to_videomode(&crtc->state->adjusted_mode, &vm);
+	if (err < 0) {
+		DRM_DEBUG_DRIVER("Failed to enable runtime power management: %d\n", err);
+		return;
+	}
 
+	drm_display_mode_to_videomode(&crtc->state->adjusted_mode, &vm);
 	clk_prepare_enable(hwdev->pxlclk);
 
 	/* We rely on firmware to set mclk to a sensible level. */
@@ -75,10 +74,254 @@ static void malidp_crtc_disable(struct drm_crtc *crtc)
 {
 	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
 	struct malidp_hw_device *hwdev = malidp->dev;
+	int err;
 
 	drm_crtc_vblank_off(crtc);
 	hwdev->enter_config_mode(hwdev);
 	clk_disable_unprepare(hwdev->pxlclk);
+
+	err = pm_runtime_put(crtc->dev->dev);
+	if (err < 0) {
+		DRM_DEBUG_DRIVER("Failed to disable runtime power management: %d\n", err);
+	}
+}
+
+static const struct gamma_curve_segment {
+	u16 start;
+	u16 end;
+} segments[MALIDP_COEFFTAB_NUM_COEFFS] = {
+	/* sector 0 */
+	{    0,    0 }, {    1,    1 }, {    2,    2 }, {    3,    3 },
+	{    4,    4 }, {    5,    5 }, {    6,    6 }, {    7,    7 },
+	{    8,    8 }, {    9,    9 }, {   10,   10 }, {   11,   11 },
+	{   12,   12 }, {   13,   13 }, {   14,   14 }, {   15,   15 },
+	/* sector 1 */
+	{   16,   19 }, {   20,   23 }, {   24,   27 }, {   28,   31 },
+	/* sector 2 */
+	{   32,   39 }, {   40,   47 }, {   48,   55 }, {   56,   63 },
+	/* sector 3 */
+	{   64,   79 }, {   80,   95 }, {   96,  111 }, {  112,  127 },
+	/* sector 4 */
+	{  128,  159 }, {  160,  191 }, {  192,  223 }, {  224,  255 },
+	/* sector 5 */
+	{  256,  319 }, {  320,  383 }, {  384,  447 }, {  448,  511 },
+	/* sector 6 */
+	{  512,  639 }, {  640,  767 }, {  768,  895 }, {  896, 1023 },
+	{ 1024, 1151 }, { 1152, 1279 }, { 1280, 1407 }, { 1408, 1535 },
+	{ 1536, 1663 }, { 1664, 1791 }, { 1792, 1919 }, { 1920, 2047 },
+	{ 2048, 2175 }, { 2176, 2303 }, { 2304, 2431 }, { 2432, 2559 },
+	{ 2560, 2687 }, { 2688, 2815 }, { 2816, 2943 }, { 2944, 3071 },
+	{ 3072, 3199 }, { 3200, 3327 }, { 3328, 3455 }, { 3456, 3583 },
+	{ 3584, 3711 }, { 3712, 3839 }, { 3840, 3967 }, { 3968, 4095 },
+};
+
+#define DE_COEFTAB_DATA(a, b) ((((a) & 0xfff) << 16) | (((b) & 0xfff)))
+
+static void malidp_generate_gamma_table(struct drm_property_blob *lut_blob,
+					u32 coeffs[MALIDP_COEFFTAB_NUM_COEFFS])
+{
+	struct drm_color_lut *lut = (struct drm_color_lut *)lut_blob->data;
+	int i;
+
+	for (i = 0; i < MALIDP_COEFFTAB_NUM_COEFFS; ++i) {
+		u32 a, b, delta_in, out_start, out_end;
+
+		delta_in = segments[i].end - segments[i].start;
+		/* DP has 12-bit internal precision for its LUTs. */
+		out_start = drm_color_lut_extract(lut[segments[i].start].green,
+						  12);
+		out_end = drm_color_lut_extract(lut[segments[i].end].green, 12);
+		a = (delta_in == 0) ? 0 : ((out_end - out_start) * 256) / delta_in;
+		b = out_start;
+		coeffs[i] = DE_COEFTAB_DATA(a, b);
+	}
+}
+
+/*
+ * Check if there is a new gamma LUT and if it is of an acceptable size. Also,
+ * reject any LUTs that use distinct red, green, and blue curves.
+ */
+static int malidp_crtc_atomic_check_gamma(struct drm_crtc *crtc,
+					  struct drm_crtc_state *state)
+{
+	struct malidp_crtc_state *mc = to_malidp_crtc_state(state);
+	struct drm_color_lut *lut;
+	size_t lut_size;
+	int i;
+
+	if (!state->color_mgmt_changed || !state->gamma_lut)
+		return 0;
+
+	if (crtc->state->gamma_lut &&
+	    (crtc->state->gamma_lut->base.id == state->gamma_lut->base.id))
+		return 0;
+
+	if (state->gamma_lut->length % sizeof(struct drm_color_lut))
+		return -EINVAL;
+
+	lut_size = state->gamma_lut->length / sizeof(struct drm_color_lut);
+	if (lut_size != MALIDP_GAMMA_LUT_SIZE)
+		return -EINVAL;
+
+	lut = (struct drm_color_lut *)state->gamma_lut->data;
+	for (i = 0; i < lut_size; ++i)
+		if (!((lut[i].red == lut[i].green) &&
+		      (lut[i].red == lut[i].blue)))
+			return -EINVAL;
+
+	if (!state->mode_changed) {
+		int ret;
+
+		state->mode_changed = true;
+		/*
+		 * Kerneldoc for drm_atomic_helper_check_modeset mandates that
+		 * it be invoked when the driver sets ->mode_changed. Since
+		 * changing the gamma LUT doesn't depend on any external
+		 * resources, it is safe to call it only once.
+		 */
+		ret = drm_atomic_helper_check_modeset(crtc->dev, state->state);
+		if (ret)
+			return ret;
+	}
+
+	malidp_generate_gamma_table(state->gamma_lut, mc->gamma_coeffs);
+	return 0;
+}
+
+/*
+ * Check if there is a new CTM and if it contains valid input. Valid here means
+ * that the number is inside the representable range for a Q3.12 number,
+ * excluding truncating the fractional part of the input data.
+ *
+ * The COLORADJ registers can be changed atomically.
+ */
+static int malidp_crtc_atomic_check_ctm(struct drm_crtc *crtc,
+					struct drm_crtc_state *state)
+{
+	struct malidp_crtc_state *mc = to_malidp_crtc_state(state);
+	struct drm_color_ctm *ctm;
+	int i;
+
+	if (!state->color_mgmt_changed)
+		return 0;
+
+	if (!state->ctm)
+		return 0;
+
+	if (crtc->state->ctm && (crtc->state->ctm->base.id ==
+				 state->ctm->base.id))
+		return 0;
+
+	/*
+	 * The size of the ctm is checked in
+	 * drm_atomic_replace_property_blob_from_id.
+	 */
+	ctm = (struct drm_color_ctm *)state->ctm->data;
+	for (i = 0; i < ARRAY_SIZE(ctm->matrix); ++i) {
+		/* Convert from S31.32 to Q3.12. */
+		s64 val = ctm->matrix[i];
+		u32 mag = ((((u64)val) & ~BIT_ULL(63)) >> 20) &
+			  GENMASK_ULL(14, 0);
+
+		/*
+		 * Convert to 2s complement and check the destination's top bit
+		 * for overflow. NB: Can't check before converting or it'd
+		 * incorrectly reject the case:
+		 * sign == 1
+		 * mag == 0x2000
+		 */
+		if (val & BIT_ULL(63))
+			mag = ~mag + 1;
+		if (!!(val & BIT_ULL(63)) != !!(mag & BIT(14)))
+			return -EINVAL;
+		mc->coloradj_coeffs[i] = mag;
+	}
+
+	return 0;
+}
+
+static int malidp_crtc_atomic_check_scaling(struct drm_crtc *crtc,
+					    struct drm_crtc_state *state)
+{
+	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
+	struct malidp_hw_device *hwdev = malidp->dev;
+	struct malidp_crtc_state *cs = to_malidp_crtc_state(state);
+	struct malidp_se_config *s = &cs->scaler_config;
+	struct drm_plane *plane;
+	struct videomode vm;
+	const struct drm_plane_state *pstate;
+	u32 h_upscale_factor = 0; /* U16.16 */
+	u32 v_upscale_factor = 0; /* U16.16 */
+	u8 scaling = cs->scaled_planes_mask;
+	int ret;
+
+	if (!scaling) {
+		s->scale_enable = false;
+		goto mclk_calc;
+	}
+
+	/* The scaling engine can only handle one plane at a time. */
+	if (scaling & (scaling - 1))
+		return -EINVAL;
+
+	drm_atomic_crtc_state_for_each_plane_state(plane, pstate, state) {
+		struct malidp_plane *mp = to_malidp_plane(plane);
+		u32 phase;
+
+		if (!(mp->layer->id & scaling))
+			continue;
+
+		/*
+		 * Convert crtc_[w|h] to U32.32, then divide by U16.16 src_[w|h]
+		 * to get the U16.16 result.
+		 */
+		h_upscale_factor = div_u64((u64)pstate->crtc_w << 32,
+					   pstate->src_w);
+		v_upscale_factor = div_u64((u64)pstate->crtc_h << 32,
+					   pstate->src_h);
+
+		s->enhancer_enable = ((h_upscale_factor >> 16) >= 2 ||
+				      (v_upscale_factor >> 16) >= 2);
+
+		s->input_w = pstate->src_w >> 16;
+		s->input_h = pstate->src_h >> 16;
+		s->output_w = pstate->crtc_w;
+		s->output_h = pstate->crtc_h;
+
+#define SE_N_PHASE 4
+#define SE_SHIFT_N_PHASE 12
+		/* Calculate initial_phase and delta_phase for horizontal. */
+		phase = s->input_w;
+		s->h_init_phase =
+				((phase << SE_N_PHASE) / s->output_w + 1) / 2;
+
+		phase = s->input_w;
+		phase <<= (SE_SHIFT_N_PHASE + SE_N_PHASE);
+		s->h_delta_phase = phase / s->output_w;
+
+		/* Same for vertical. */
+		phase = s->input_h;
+		s->v_init_phase =
+				((phase << SE_N_PHASE) / s->output_h + 1) / 2;
+
+		phase = s->input_h;
+		phase <<= (SE_SHIFT_N_PHASE + SE_N_PHASE);
+		s->v_delta_phase = phase / s->output_h;
+#undef SE_N_PHASE
+#undef SE_SHIFT_N_PHASE
+		s->plane_src_id = mp->layer->id;
+	}
+
+	s->scale_enable = true;
+	s->hcoeff = malidp_se_select_coeffs(h_upscale_factor);
+	s->vcoeff = malidp_se_select_coeffs(v_upscale_factor);
+
+mclk_calc:
+	drm_display_mode_to_videomode(&state->adjusted_mode, &vm);
+	ret = hwdev->se_calc_mclk(hwdev, s, &vm);
+	if (ret < 0)
+		return -EINVAL;
+	return 0;
 }
 
 static int malidp_crtc_atomic_check(struct drm_crtc *crtc,
@@ -90,6 +333,7 @@ static int malidp_crtc_atomic_check(struct drm_crtc *crtc,
 	const struct drm_plane_state *pstate;
 	u32 rot_mem_free, rot_mem_usable;
 	int rotated_planes = 0;
+	int ret;
 
 	/*
 	 * check if there is enough rotation memory available for planes
@@ -156,7 +400,11 @@ static int malidp_crtc_atomic_check(struct drm_crtc *crtc,
 		}
 	}
 
-	return 0;
+	ret = malidp_crtc_atomic_check_gamma(crtc, state);
+	ret = ret ? ret : malidp_crtc_atomic_check_ctm(crtc, state);
+	ret = ret ? ret : malidp_crtc_atomic_check_scaling(crtc, state);
+
+	return ret;
 }
 
 static const struct drm_crtc_helper_funcs malidp_crtc_helper_funcs = {
@@ -166,6 +414,60 @@ static const struct drm_crtc_helper_funcs malidp_crtc_helper_funcs = {
 	.atomic_check = malidp_crtc_atomic_check,
 };
 
+static struct drm_crtc_state *malidp_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+	struct malidp_crtc_state *state, *old_state;
+
+	if (WARN_ON(!crtc->state))
+		return NULL;
+
+	old_state = to_malidp_crtc_state(crtc->state);
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &state->base);
+	memcpy(state->gamma_coeffs, old_state->gamma_coeffs,
+	       sizeof(state->gamma_coeffs));
+	memcpy(state->coloradj_coeffs, old_state->coloradj_coeffs,
+	       sizeof(state->coloradj_coeffs));
+	memcpy(&state->scaler_config, &old_state->scaler_config,
+	       sizeof(state->scaler_config));
+	state->scaled_planes_mask = 0;
+
+	return &state->base;
+}
+
+static void malidp_crtc_reset(struct drm_crtc *crtc)
+{
+	struct malidp_crtc_state *state = NULL;
+
+	if (crtc->state) {
+		state = to_malidp_crtc_state(crtc->state);
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+	}
+
+	kfree(state);
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		crtc->state = &state->base;
+		crtc->state->crtc = crtc;
+	}
+}
+
+static void malidp_crtc_destroy_state(struct drm_crtc *crtc,
+				      struct drm_crtc_state *state)
+{
+	struct malidp_crtc_state *mali_state = NULL;
+
+	if (state) {
+		mali_state = to_malidp_crtc_state(state);
+		__drm_atomic_helper_crtc_destroy_state(state);
+	}
+
+	kfree(mali_state);
+}
+
 static int malidp_crtc_enable_vblank(struct drm_crtc *crtc)
 {
 	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
@@ -186,12 +488,13 @@ static void malidp_crtc_disable_vblank(struct drm_crtc *crtc)
 }
 
 static const struct drm_crtc_funcs malidp_crtc_funcs = {
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.destroy = drm_crtc_cleanup,
 	.set_config = drm_atomic_helper_set_config,
 	.page_flip = drm_atomic_helper_page_flip,
-	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.reset = malidp_crtc_reset,
+	.atomic_duplicate_state = malidp_crtc_duplicate_state,
+	.atomic_destroy_state = malidp_crtc_destroy_state,
 	.enable_vblank = malidp_crtc_enable_vblank,
 	.disable_vblank = malidp_crtc_disable_vblank,
 };
@@ -223,11 +526,17 @@ int malidp_crtc_init(struct drm_device *drm)
 
 	ret = drm_crtc_init_with_planes(drm, &malidp->crtc, primary, NULL,
 					&malidp_crtc_funcs, NULL);
+	if (ret)
+		goto crtc_cleanup_planes;
 
-	if (!ret) {
-		drm_crtc_helper_add(&malidp->crtc, &malidp_crtc_helper_funcs);
-		return 0;
-	}
+	drm_crtc_helper_add(&malidp->crtc, &malidp_crtc_helper_funcs);
+	drm_mode_crtc_set_gamma_size(&malidp->crtc, MALIDP_GAMMA_LUT_SIZE);
+	/* No inverse-gamma: it is per-plane. */
+	drm_crtc_enable_color_mgmt(&malidp->crtc, 0, true, MALIDP_GAMMA_LUT_SIZE);
+
+	malidp_se_set_enh_coeffs(malidp->dev);
+
+	return 0;
 
 crtc_cleanup_planes:
 	malidp_de_planes_destroy(drm);
diff --git a/drivers/gpu/drm/arm/malidp_drv.c b/drivers/gpu/drm/arm/malidp_drv.c
index ea2546f766c2..0d3eb537d08b 100644
--- a/drivers/gpu/drm/arm/malidp_drv.c
+++ b/drivers/gpu/drm/arm/malidp_drv.c
@@ -13,9 +13,11 @@
 #include <linux/module.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/console.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
 #include <linux/of_reserved_mem.h>
+#include <linux/pm_runtime.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
@@ -32,6 +34,131 @@
 
 #define MALIDP_CONF_VALID_TIMEOUT	250
 
+static void malidp_write_gamma_table(struct malidp_hw_device *hwdev,
+				     u32 data[MALIDP_COEFFTAB_NUM_COEFFS])
+{
+	int i;
+	/* Update all channels with a single gamma curve. */
+	const u32 gamma_write_mask = GENMASK(18, 16);
+	/*
+	 * Always write an entire table, so the address field in
+	 * DE_COEFFTAB_ADDR is 0 and we can use the gamma_write_mask bitmask
+	 * directly.
+	 */
+	malidp_hw_write(hwdev, gamma_write_mask,
+			hwdev->map.coeffs_base + MALIDP_COEF_TABLE_ADDR);
+	for (i = 0; i < MALIDP_COEFFTAB_NUM_COEFFS; ++i)
+		malidp_hw_write(hwdev, data[i],
+				hwdev->map.coeffs_base +
+				MALIDP_COEF_TABLE_DATA);
+}
+
+static void malidp_atomic_commit_update_gamma(struct drm_crtc *crtc,
+					      struct drm_crtc_state *old_state)
+{
+	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
+	struct malidp_hw_device *hwdev = malidp->dev;
+
+	if (!crtc->state->color_mgmt_changed)
+		return;
+
+	if (!crtc->state->gamma_lut) {
+		malidp_hw_clearbits(hwdev,
+				    MALIDP_DISP_FUNC_GAMMA,
+				    MALIDP_DE_DISPLAY_FUNC);
+	} else {
+		struct malidp_crtc_state *mc =
+			to_malidp_crtc_state(crtc->state);
+
+		if (!old_state->gamma_lut || (crtc->state->gamma_lut->base.id !=
+					      old_state->gamma_lut->base.id))
+			malidp_write_gamma_table(hwdev, mc->gamma_coeffs);
+
+		malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_GAMMA,
+				  MALIDP_DE_DISPLAY_FUNC);
+	}
+}
+
+static
+void malidp_atomic_commit_update_coloradj(struct drm_crtc *crtc,
+					  struct drm_crtc_state *old_state)
+{
+	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
+	struct malidp_hw_device *hwdev = malidp->dev;
+	int i;
+
+	if (!crtc->state->color_mgmt_changed)
+		return;
+
+	if (!crtc->state->ctm) {
+		malidp_hw_clearbits(hwdev, MALIDP_DISP_FUNC_CADJ,
+				    MALIDP_DE_DISPLAY_FUNC);
+	} else {
+		struct malidp_crtc_state *mc =
+			to_malidp_crtc_state(crtc->state);
+
+		if (!old_state->ctm || (crtc->state->ctm->base.id !=
+					old_state->ctm->base.id))
+			for (i = 0; i < MALIDP_COLORADJ_NUM_COEFFS; ++i)
+				malidp_hw_write(hwdev,
+						mc->coloradj_coeffs[i],
+						hwdev->map.coeffs_base +
+						MALIDP_COLOR_ADJ_COEF + 4 * i);
+
+		malidp_hw_setbits(hwdev, MALIDP_DISP_FUNC_CADJ,
+				  MALIDP_DE_DISPLAY_FUNC);
+	}
+}
+
+static void malidp_atomic_commit_se_config(struct drm_crtc *crtc,
+					   struct drm_crtc_state *old_state)
+{
+	struct malidp_crtc_state *cs = to_malidp_crtc_state(crtc->state);
+	struct malidp_crtc_state *old_cs = to_malidp_crtc_state(old_state);
+	struct malidp_drm *malidp = crtc_to_malidp_device(crtc);
+	struct malidp_hw_device *hwdev = malidp->dev;
+	struct malidp_se_config *s = &cs->scaler_config;
+	struct malidp_se_config *old_s = &old_cs->scaler_config;
+	u32 se_control = hwdev->map.se_base +
+			 ((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ?
+			 0x10 : 0xC);
+	u32 layer_control = se_control + MALIDP_SE_LAYER_CONTROL;
+	u32 scr = se_control + MALIDP_SE_SCALING_CONTROL;
+	u32 val;
+
+	/* Set SE_CONTROL */
+	if (!s->scale_enable) {
+		val = malidp_hw_read(hwdev, se_control);
+		val &= ~MALIDP_SE_SCALING_EN;
+		malidp_hw_write(hwdev, val, se_control);
+		return;
+	}
+
+	hwdev->se_set_scaling_coeffs(hwdev, s, old_s);
+	val = malidp_hw_read(hwdev, se_control);
+	val |= MALIDP_SE_SCALING_EN | MALIDP_SE_ALPHA_EN;
+
+	val &= ~MALIDP_SE_ENH(MALIDP_SE_ENH_MASK);
+	val |= s->enhancer_enable ? MALIDP_SE_ENH(3) : 0;
+
+	val |= MALIDP_SE_RGBO_IF_EN;
+	malidp_hw_write(hwdev, val, se_control);
+
+	/* Set IN_SIZE & OUT_SIZE. */
+	val = MALIDP_SE_SET_V_SIZE(s->input_h) |
+	      MALIDP_SE_SET_H_SIZE(s->input_w);
+	malidp_hw_write(hwdev, val, layer_control + MALIDP_SE_L0_IN_SIZE);
+	val = MALIDP_SE_SET_V_SIZE(s->output_h) |
+	      MALIDP_SE_SET_H_SIZE(s->output_w);
+	malidp_hw_write(hwdev, val, layer_control + MALIDP_SE_L0_OUT_SIZE);
+
+	/* Set phase regs. */
+	malidp_hw_write(hwdev, s->h_init_phase, scr + MALIDP_SE_H_INIT_PH);
+	malidp_hw_write(hwdev, s->h_delta_phase, scr + MALIDP_SE_H_DELTA_PH);
+	malidp_hw_write(hwdev, s->v_init_phase, scr + MALIDP_SE_V_INIT_PH);
+	malidp_hw_write(hwdev, s->v_delta_phase, scr + MALIDP_SE_V_DELTA_PH);
+}
+
 /*
  * set the "config valid" bit and wait until the hardware acts on it
  */
@@ -66,10 +193,12 @@ static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state)
 	struct drm_pending_vblank_event *event;
 	struct drm_device *drm = state->dev;
 	struct malidp_drm *malidp = drm->dev_private;
-	int ret = malidp_set_and_wait_config_valid(drm);
 
-	if (ret)
-		DRM_DEBUG_DRIVER("timed out waiting for updated configuration\n");
+	if (malidp->crtc.enabled) {
+		/* only set config_valid if the CRTC is enabled */
+		if (malidp_set_and_wait_config_valid(drm))
+			DRM_DEBUG_DRIVER("timed out waiting for updated configuration\n");
+	}
 
 	event = malidp->crtc.state->event;
 	if (event) {
@@ -88,15 +217,30 @@ static void malidp_atomic_commit_hw_done(struct drm_atomic_state *state)
 static void malidp_atomic_commit_tail(struct drm_atomic_state *state)
 {
 	struct drm_device *drm = state->dev;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *old_crtc_state;
+	int i;
+
+	pm_runtime_get_sync(drm->dev);
 
 	drm_atomic_helper_commit_modeset_disables(drm, state);
-	drm_atomic_helper_commit_modeset_enables(drm, state);
+
+	for_each_crtc_in_state(state, crtc, old_crtc_state, i) {
+		malidp_atomic_commit_update_gamma(crtc, old_crtc_state);
+		malidp_atomic_commit_update_coloradj(crtc, old_crtc_state);
+		malidp_atomic_commit_se_config(crtc, old_crtc_state);
+	}
+
 	drm_atomic_helper_commit_planes(drm, state, 0);
 
+	drm_atomic_helper_commit_modeset_enables(drm, state);
+
 	malidp_atomic_commit_hw_done(state);
 
 	drm_atomic_helper_wait_for_vblanks(drm, state);
 
+	pm_runtime_put(drm->dev);
+
 	drm_atomic_helper_cleanup_planes(drm, state);
 }
 
@@ -277,13 +421,69 @@ static bool malidp_has_sufficient_address_space(const struct resource *res,
 	return true;
 }
 
+static ssize_t core_id_show(struct device *dev, struct device_attribute *attr,
+			    char *buf)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct malidp_drm *malidp = drm->dev_private;
+
+	return snprintf(buf, PAGE_SIZE, "%08x\n", malidp->core_id);
+}
+
+DEVICE_ATTR_RO(core_id);
+
+static int malidp_init_sysfs(struct device *dev)
+{
+	int ret = device_create_file(dev, &dev_attr_core_id);
+
+	if (ret)
+		DRM_ERROR("failed to create device file for core_id\n");
+
+	return ret;
+}
+
+static void malidp_fini_sysfs(struct device *dev)
+{
+	device_remove_file(dev, &dev_attr_core_id);
+}
+
 #define MAX_OUTPUT_CHANNELS	3
 
+static int malidp_runtime_pm_suspend(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_hw_device *hwdev = malidp->dev;
+
+	/* we can only suspend if the hardware is in config mode */
+	WARN_ON(!hwdev->in_config_mode(hwdev));
+
+	hwdev->pm_suspended = true;
+	clk_disable_unprepare(hwdev->mclk);
+	clk_disable_unprepare(hwdev->aclk);
+	clk_disable_unprepare(hwdev->pclk);
+
+	return 0;
+}
+
+static int malidp_runtime_pm_resume(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct malidp_drm *malidp = drm->dev_private;
+	struct malidp_hw_device *hwdev = malidp->dev;
+
+	clk_prepare_enable(hwdev->pclk);
+	clk_prepare_enable(hwdev->aclk);
+	clk_prepare_enable(hwdev->mclk);
+	hwdev->pm_suspended = false;
+
+	return 0;
+}
+
 static int malidp_bind(struct device *dev)
 {
 	struct resource *res;
 	struct drm_device *drm;
-	struct device_node *ep;
 	struct malidp_drm *malidp;
 	struct malidp_hw_device *hwdev;
 	struct platform_device *pdev = to_platform_device(dev);
@@ -308,7 +508,6 @@ static int malidp_bind(struct device *dev)
 	memcpy(hwdev, of_device_get_match_data(dev), sizeof(*hwdev));
 	malidp->dev = hwdev;
 
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	hwdev->regs = devm_ioremap_resource(dev, res);
 	if (IS_ERR(hwdev->regs))
@@ -341,14 +540,17 @@ static int malidp_bind(struct device *dev)
 		goto alloc_fail;
 	}
 
-	/* Enable APB clock in order to get access to the registers */
-	clk_prepare_enable(hwdev->pclk);
-	/*
-	 * Enable AXI clock and main clock so that prefetch can start once
-	 * the registers are set
-	 */
-	clk_prepare_enable(hwdev->aclk);
-	clk_prepare_enable(hwdev->mclk);
+	drm->dev_private = malidp;
+	dev_set_drvdata(dev, drm);
+
+	/* Enable power management */
+	pm_runtime_enable(dev);
+
+	/* Resume device to enable the clocks */
+	if (pm_runtime_enabled(dev))
+		pm_runtime_get_sync(dev);
+	else
+		malidp_runtime_pm_resume(dev);
 
 	dev_id = of_match_device(malidp_drm_of_match, dev);
 	if (!dev_id) {
@@ -377,6 +579,8 @@ static int malidp_bind(struct device *dev)
 	DRM_INFO("found ARM Mali-DP%3x version r%dp%d\n", version >> 16,
 		 (version >> 12) & 0xf, (version >> 8) & 0xf);
 
+	malidp->core_id = version;
+
 	/* set the number of lines used for output of RGB data */
 	ret = of_property_read_u8_array(dev->of_node,
 					"arm,malidp-output-port-lines",
@@ -388,22 +592,19 @@ static int malidp_bind(struct device *dev)
 		out_depth = (out_depth << 8) | (output_width[i] & 0xf);
 	malidp_hw_write(hwdev, out_depth, hwdev->map.out_depth_base);
 
-	drm->dev_private = malidp;
-	dev_set_drvdata(dev, drm);
 	atomic_set(&malidp->config_valid, 0);
 	init_waitqueue_head(&malidp->wq);
 
 	ret = malidp_init(drm);
 	if (ret < 0)
+		goto query_hw_fail;
+
+	ret = malidp_init_sysfs(dev);
+	if (ret)
 		goto init_fail;
 
 	/* Set the CRTC's port so that the encoder component can find it */
-	ep = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (!ep) {
-		ret = -EINVAL;
-		goto port_fail;
-	}
-	malidp->crtc.port = of_get_next_parent(ep);
+	malidp->crtc.port = of_graph_get_port_by_id(dev->of_node, 0);
 
 	ret = component_bind_all(dev, drm);
 	if (ret) {
@@ -422,6 +623,7 @@ static int malidp_bind(struct device *dev)
 		DRM_ERROR("failed to initialise vblank\n");
 		goto vblank_fail;
 	}
+	pm_runtime_put(dev);
 
 	drm_mode_config_reset(drm);
 
@@ -447,7 +649,9 @@ register_fail:
 		drm_fbdev_cma_fini(malidp->fbdev);
 		malidp->fbdev = NULL;
 	}
+	drm_kms_helper_poll_fini(drm);
 fbdev_fail:
+	pm_runtime_get_sync(dev);
 	drm_vblank_cleanup(drm);
 vblank_fail:
 	malidp_se_irq_fini(drm);
@@ -458,15 +662,17 @@ irq_init_fail:
 bind_fail:
 	of_node_put(malidp->crtc.port);
 	malidp->crtc.port = NULL;
-port_fail:
-	malidp_fini(drm);
 init_fail:
+	malidp_fini_sysfs(dev);
+	malidp_fini(drm);
+query_hw_fail:
+	pm_runtime_put(dev);
+	if (pm_runtime_enabled(dev))
+		pm_runtime_disable(dev);
+	else
+		malidp_runtime_pm_suspend(dev);
 	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
-query_hw_fail:
-	clk_disable_unprepare(hwdev->mclk);
-	clk_disable_unprepare(hwdev->aclk);
-	clk_disable_unprepare(hwdev->pclk);
 	drm_dev_unref(drm);
 alloc_fail:
 	of_reserved_mem_device_release(dev);
@@ -478,7 +684,6 @@ static void malidp_unbind(struct device *dev)
 {
 	struct drm_device *drm = dev_get_drvdata(dev);
 	struct malidp_drm *malidp = drm->dev_private;
-	struct malidp_hw_device *hwdev = malidp->dev;
 
 	drm_dev_unregister(drm);
 	if (malidp->fbdev) {
@@ -486,18 +691,22 @@ static void malidp_unbind(struct device *dev)
 		malidp->fbdev = NULL;
 	}
 	drm_kms_helper_poll_fini(drm);
+	pm_runtime_get_sync(dev);
+	drm_vblank_cleanup(drm);
 	malidp_se_irq_fini(drm);
 	malidp_de_irq_fini(drm);
-	drm_vblank_cleanup(drm);
 	component_unbind_all(dev, drm);
 	of_node_put(malidp->crtc.port);
 	malidp->crtc.port = NULL;
+	malidp_fini_sysfs(dev);
 	malidp_fini(drm);
+	pm_runtime_put(dev);
+	if (pm_runtime_enabled(dev))
+		pm_runtime_disable(dev);
+	else
+		malidp_runtime_pm_suspend(dev);
 	drm->dev_private = NULL;
 	dev_set_drvdata(dev, NULL);
-	clk_disable_unprepare(hwdev->mclk);
-	clk_disable_unprepare(hwdev->aclk);
-	clk_disable_unprepare(hwdev->pclk);
 	drm_dev_unref(drm);
 	of_reserved_mem_device_release(dev);
 }
@@ -516,30 +725,17 @@ static int malidp_compare_dev(struct device *dev, void *data)
 
 static int malidp_platform_probe(struct platform_device *pdev)
 {
-	struct device_node *port, *ep;
+	struct device_node *port;
 	struct component_match *match = NULL;
 
 	if (!pdev->dev.of_node)
 		return -ENODEV;
 
 	/* there is only one output port inside each device, find it */
-	ep = of_graph_get_next_endpoint(pdev->dev.of_node, NULL);
-	if (!ep)
+	port = of_graph_get_remote_node(pdev->dev.of_node, 0, 0);
+	if (!port)
 		return -ENODEV;
 
-	if (!of_device_is_available(ep)) {
-		of_node_put(ep);
-		return -ENODEV;
-	}
-
-	/* add the remote encoder port as component */
-	port = of_graph_get_remote_port_parent(ep);
-	of_node_put(ep);
-	if (!port || !of_device_is_available(port)) {
-		of_node_put(port);
-		return -EAGAIN;
-	}
-
 	drm_of_component_match_add(&pdev->dev, &match, malidp_compare_dev,
 				   port);
 	of_node_put(port);
@@ -553,11 +749,52 @@ static int malidp_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static int __maybe_unused malidp_pm_suspend(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct malidp_drm *malidp = drm->dev_private;
+
+	drm_kms_helper_poll_disable(drm);
+	console_lock();
+	drm_fbdev_cma_set_suspend(malidp->fbdev, 1);
+	console_unlock();
+	malidp->pm_state = drm_atomic_helper_suspend(drm);
+	if (IS_ERR(malidp->pm_state)) {
+		console_lock();
+		drm_fbdev_cma_set_suspend(malidp->fbdev, 0);
+		console_unlock();
+		drm_kms_helper_poll_enable(drm);
+		return PTR_ERR(malidp->pm_state);
+	}
+
+	return 0;
+}
+
+static int __maybe_unused malidp_pm_resume(struct device *dev)
+{
+	struct drm_device *drm = dev_get_drvdata(dev);
+	struct malidp_drm *malidp = drm->dev_private;
+
+	drm_atomic_helper_resume(drm, malidp->pm_state);
+	console_lock();
+	drm_fbdev_cma_set_suspend(malidp->fbdev, 0);
+	console_unlock();
+	drm_kms_helper_poll_enable(drm);
+
+	return 0;
+}
+
+static const struct dev_pm_ops malidp_pm_ops = {
+	SET_SYSTEM_SLEEP_PM_OPS(malidp_pm_suspend, malidp_pm_resume) \
+	SET_RUNTIME_PM_OPS(malidp_runtime_pm_suspend, malidp_runtime_pm_resume, NULL)
+};
+
 static struct platform_driver malidp_platform_driver = {
 	.probe		= malidp_platform_probe,
 	.remove		= malidp_platform_remove,
 	.driver	= {
 		.name = "mali-dp",
+		.pm = &malidp_pm_ops,
 		.of_match_table	= malidp_drm_of_match,
 	},
 };
diff --git a/drivers/gpu/drm/arm/malidp_drv.h b/drivers/gpu/drm/arm/malidp_drv.h
index dbc617c6e4ef..040311ffcaec 100644
--- a/drivers/gpu/drm/arm/malidp_drv.h
+++ b/drivers/gpu/drm/arm/malidp_drv.h
@@ -24,6 +24,8 @@ struct malidp_drm {
 	struct drm_crtc crtc;
 	wait_queue_head_t wq;
 	atomic_t config_valid;
+	struct drm_atomic_state *pm_state;
+	u32 core_id;
 };
 
 #define crtc_to_malidp_device(x) container_of(x, struct malidp_drm, crtc)
@@ -47,6 +49,17 @@ struct malidp_plane_state {
 #define to_malidp_plane(x) container_of(x, struct malidp_plane, base)
 #define to_malidp_plane_state(x) container_of(x, struct malidp_plane_state, base)
 
+struct malidp_crtc_state {
+	struct drm_crtc_state base;
+	u32 gamma_coeffs[MALIDP_COEFFTAB_NUM_COEFFS];
+	u32 coloradj_coeffs[MALIDP_COLORADJ_NUM_COEFFS];
+	struct malidp_se_config scaler_config;
+	/* Bitfield of all the planes that have requested a scaled output. */
+	u8 scaled_planes_mask;
+};
+
+#define to_malidp_crtc_state(x) container_of(x, struct malidp_crtc_state, base)
+
 int malidp_de_planes_init(struct drm_device *drm);
 void malidp_de_planes_destroy(struct drm_device *drm);
 int malidp_crtc_init(struct drm_device *drm);
diff --git a/drivers/gpu/drm/arm/malidp_hw.c b/drivers/gpu/drm/arm/malidp_hw.c
index 9f5513006eee..28360b8542f7 100644
--- a/drivers/gpu/drm/arm/malidp_hw.c
+++ b/drivers/gpu/drm/arm/malidp_hw.c
@@ -12,6 +12,7 @@
  * in an attempt to provide to the rest of the driver code a unified view
  */
 
+#include <linux/clk.h>
 #include <linux/types.h>
 #include <linux/io.h>
 #include <drm/drmP.h>
@@ -86,6 +87,80 @@ static const struct malidp_layer malidp550_layers[] = {
 	{ DE_SMART, MALIDP550_DE_LS_BASE, MALIDP550_DE_LS_PTR_BASE, MALIDP550_DE_LS_R1_STRIDE },
 };
 
+#define SE_N_SCALING_COEFFS	96
+static const u16 dp500_se_scaling_coeffs[][SE_N_SCALING_COEFFS] = {
+	[MALIDP_UPSCALING_COEFFS - 1] = {
+		0x0000, 0x0001, 0x0007, 0x0011, 0x001e, 0x002e, 0x003f, 0x0052,
+		0x0064, 0x0073, 0x007d, 0x0080, 0x007a, 0x006c, 0x0053, 0x002f,
+		0x0000, 0x3fc6, 0x3f83, 0x3f39, 0x3eea, 0x3e9b, 0x3e4f, 0x3e0a,
+		0x3dd4, 0x3db0, 0x3da2, 0x3db1, 0x3dde, 0x3e2f, 0x3ea5, 0x3f40,
+		0x0000, 0x00e5, 0x01ee, 0x0315, 0x0456, 0x05aa, 0x0709, 0x086c,
+		0x09c9, 0x0b15, 0x0c4a, 0x0d5d, 0x0e4a, 0x0f06, 0x0f91, 0x0fe5,
+		0x1000, 0x0fe5, 0x0f91, 0x0f06, 0x0e4a, 0x0d5d, 0x0c4a, 0x0b15,
+		0x09c9, 0x086c, 0x0709, 0x05aa, 0x0456, 0x0315, 0x01ee, 0x00e5,
+		0x0000, 0x3f40, 0x3ea5, 0x3e2f, 0x3dde, 0x3db1, 0x3da2, 0x3db0,
+		0x3dd4, 0x3e0a, 0x3e4f, 0x3e9b, 0x3eea, 0x3f39, 0x3f83, 0x3fc6,
+		0x0000, 0x002f, 0x0053, 0x006c, 0x007a, 0x0080, 0x007d, 0x0073,
+		0x0064, 0x0052, 0x003f, 0x002e, 0x001e, 0x0011, 0x0007, 0x0001
+	},
+	[MALIDP_DOWNSCALING_1_5_COEFFS - 1] = {
+		0x0059, 0x004f, 0x0041, 0x002e, 0x0016, 0x3ffb, 0x3fd9, 0x3fb4,
+		0x3f8c, 0x3f62, 0x3f36, 0x3f09, 0x3edd, 0x3eb3, 0x3e8d, 0x3e6c,
+		0x3e52, 0x3e3f, 0x3e35, 0x3e37, 0x3e46, 0x3e61, 0x3e8c, 0x3ec5,
+		0x3f0f, 0x3f68, 0x3fd1, 0x004a, 0x00d3, 0x0169, 0x020b, 0x02b8,
+		0x036e, 0x042d, 0x04f2, 0x05b9, 0x0681, 0x0745, 0x0803, 0x08ba,
+		0x0965, 0x0a03, 0x0a91, 0x0b0d, 0x0b75, 0x0bc6, 0x0c00, 0x0c20,
+		0x0c28, 0x0c20, 0x0c00, 0x0bc6, 0x0b75, 0x0b0d, 0x0a91, 0x0a03,
+		0x0965, 0x08ba, 0x0803, 0x0745, 0x0681, 0x05b9, 0x04f2, 0x042d,
+		0x036e, 0x02b8, 0x020b, 0x0169, 0x00d3, 0x004a, 0x3fd1, 0x3f68,
+		0x3f0f, 0x3ec5, 0x3e8c, 0x3e61, 0x3e46, 0x3e37, 0x3e35, 0x3e3f,
+		0x3e52, 0x3e6c, 0x3e8d, 0x3eb3, 0x3edd, 0x3f09, 0x3f36, 0x3f62,
+		0x3f8c, 0x3fb4, 0x3fd9, 0x3ffb, 0x0016, 0x002e, 0x0041, 0x004f
+	},
+	[MALIDP_DOWNSCALING_2_COEFFS - 1] = {
+		0x3f19, 0x3f03, 0x3ef0, 0x3edf, 0x3ed0, 0x3ec5, 0x3ebd, 0x3eb9,
+		0x3eb9, 0x3ebf, 0x3eca, 0x3ed9, 0x3eef, 0x3f0a, 0x3f2c, 0x3f52,
+		0x3f7f, 0x3fb0, 0x3fe8, 0x0026, 0x006a, 0x00b4, 0x0103, 0x0158,
+		0x01b1, 0x020d, 0x026c, 0x02cd, 0x032f, 0x0392, 0x03f4, 0x0455,
+		0x04b4, 0x051e, 0x0585, 0x05eb, 0x064c, 0x06a8, 0x06fe, 0x074e,
+		0x0796, 0x07d5, 0x080c, 0x0839, 0x085c, 0x0875, 0x0882, 0x0887,
+		0x0881, 0x0887, 0x0882, 0x0875, 0x085c, 0x0839, 0x080c, 0x07d5,
+		0x0796, 0x074e, 0x06fe, 0x06a8, 0x064c, 0x05eb, 0x0585, 0x051e,
+		0x04b4, 0x0455, 0x03f4, 0x0392, 0x032f, 0x02cd, 0x026c, 0x020d,
+		0x01b1, 0x0158, 0x0103, 0x00b4, 0x006a, 0x0026, 0x3fe8, 0x3fb0,
+		0x3f7f, 0x3f52, 0x3f2c, 0x3f0a, 0x3eef, 0x3ed9, 0x3eca, 0x3ebf,
+		0x3eb9, 0x3eb9, 0x3ebd, 0x3ec5, 0x3ed0, 0x3edf, 0x3ef0, 0x3f03
+	},
+	[MALIDP_DOWNSCALING_2_75_COEFFS - 1] = {
+		0x3f51, 0x3f60, 0x3f71, 0x3f84, 0x3f98, 0x3faf, 0x3fc8, 0x3fe3,
+		0x0000, 0x001f, 0x0040, 0x0064, 0x008a, 0x00b1, 0x00da, 0x0106,
+		0x0133, 0x0160, 0x018e, 0x01bd, 0x01ec, 0x021d, 0x024e, 0x0280,
+		0x02b2, 0x02e4, 0x0317, 0x0349, 0x037c, 0x03ad, 0x03df, 0x0410,
+		0x0440, 0x0468, 0x048f, 0x04b3, 0x04d6, 0x04f8, 0x0516, 0x0533,
+		0x054e, 0x0566, 0x057c, 0x0590, 0x05a0, 0x05ae, 0x05ba, 0x05c3,
+		0x05c9, 0x05c3, 0x05ba, 0x05ae, 0x05a0, 0x0590, 0x057c, 0x0566,
+		0x054e, 0x0533, 0x0516, 0x04f8, 0x04d6, 0x04b3, 0x048f, 0x0468,
+		0x0440, 0x0410, 0x03df, 0x03ad, 0x037c, 0x0349, 0x0317, 0x02e4,
+		0x02b2, 0x0280, 0x024e, 0x021d, 0x01ec, 0x01bd, 0x018e, 0x0160,
+		0x0133, 0x0106, 0x00da, 0x00b1, 0x008a, 0x0064, 0x0040, 0x001f,
+		0x0000, 0x3fe3, 0x3fc8, 0x3faf, 0x3f98, 0x3f84, 0x3f71, 0x3f60
+	},
+	[MALIDP_DOWNSCALING_4_COEFFS - 1] = {
+		0x0094, 0x00a9, 0x00be, 0x00d4, 0x00ea, 0x0101, 0x0118, 0x012f,
+		0x0148, 0x0160, 0x017a, 0x0193, 0x01ae, 0x01c8, 0x01e4, 0x01ff,
+		0x021c, 0x0233, 0x024a, 0x0261, 0x0278, 0x028f, 0x02a6, 0x02bd,
+		0x02d4, 0x02eb, 0x0302, 0x0319, 0x032f, 0x0346, 0x035d, 0x0374,
+		0x038a, 0x0397, 0x03a3, 0x03af, 0x03bb, 0x03c6, 0x03d1, 0x03db,
+		0x03e4, 0x03ed, 0x03f6, 0x03fe, 0x0406, 0x040d, 0x0414, 0x041a,
+		0x0420, 0x041a, 0x0414, 0x040d, 0x0406, 0x03fe, 0x03f6, 0x03ed,
+		0x03e4, 0x03db, 0x03d1, 0x03c6, 0x03bb, 0x03af, 0x03a3, 0x0397,
+		0x038a, 0x0374, 0x035d, 0x0346, 0x032f, 0x0319, 0x0302, 0x02eb,
+		0x02d4, 0x02bd, 0x02a6, 0x028f, 0x0278, 0x0261, 0x024a, 0x0233,
+		0x021c, 0x01ff, 0x01e4, 0x01c8, 0x01ae, 0x0193, 0x017a, 0x0160,
+		0x0148, 0x012f, 0x0118, 0x0101, 0x00ea, 0x00d4, 0x00be, 0x00a9
+	},
+};
+
 #define MALIDP_DE_DEFAULT_PREFETCH_START	5
 
 static int malidp500_query_hw(struct malidp_hw_device *hwdev)
@@ -211,6 +286,88 @@ static int malidp500_rotmem_required(struct malidp_hw_device *hwdev, u16 w, u16
 	return w * drm_format_plane_cpp(fmt, 0) * 8;
 }
 
+static void malidp500_se_write_pp_coefftab(struct malidp_hw_device *hwdev,
+					   u32 direction,
+					   u16 addr,
+					   u8 coeffs_id)
+{
+	int i;
+	u16 scaling_control = MALIDP500_SE_CONTROL + MALIDP_SE_SCALING_CONTROL;
+
+	malidp_hw_write(hwdev,
+			direction | (addr & MALIDP_SE_COEFFTAB_ADDR_MASK),
+			scaling_control + MALIDP_SE_COEFFTAB_ADDR);
+	for (i = 0; i < ARRAY_SIZE(dp500_se_scaling_coeffs); ++i)
+		malidp_hw_write(hwdev, MALIDP_SE_SET_COEFFTAB_DATA(
+				dp500_se_scaling_coeffs[coeffs_id][i]),
+				scaling_control + MALIDP_SE_COEFFTAB_DATA);
+}
+
+static int malidp500_se_set_scaling_coeffs(struct malidp_hw_device *hwdev,
+					   struct malidp_se_config *se_config,
+					   struct malidp_se_config *old_config)
+{
+	/* Get array indices into dp500_se_scaling_coeffs. */
+	u8 h = (u8)se_config->hcoeff - 1;
+	u8 v = (u8)se_config->vcoeff - 1;
+
+	if (WARN_ON(h >= ARRAY_SIZE(dp500_se_scaling_coeffs) ||
+		    v >= ARRAY_SIZE(dp500_se_scaling_coeffs)))
+		return -EINVAL;
+
+	if ((h == v) && (se_config->hcoeff != old_config->hcoeff ||
+			 se_config->vcoeff != old_config->vcoeff)) {
+		malidp500_se_write_pp_coefftab(hwdev,
+					       (MALIDP_SE_V_COEFFTAB |
+						MALIDP_SE_H_COEFFTAB),
+					       0, v);
+	} else {
+		if (se_config->vcoeff != old_config->vcoeff)
+			malidp500_se_write_pp_coefftab(hwdev,
+						       MALIDP_SE_V_COEFFTAB,
+						       0, v);
+		if (se_config->hcoeff != old_config->hcoeff)
+			malidp500_se_write_pp_coefftab(hwdev,
+						       MALIDP_SE_H_COEFFTAB,
+						       0, h);
+	}
+
+	return 0;
+}
+
+static long malidp500_se_calc_mclk(struct malidp_hw_device *hwdev,
+				   struct malidp_se_config *se_config,
+				   struct videomode *vm)
+{
+	unsigned long mclk;
+	unsigned long pxlclk = vm->pixelclock; /* Hz */
+	unsigned long htotal = vm->hactive + vm->hfront_porch +
+			       vm->hback_porch + vm->hsync_len;
+	unsigned long input_size = se_config->input_w * se_config->input_h;
+	unsigned long a = 10;
+	long ret;
+
+	/*
+	 * mclk = max(a, 1.5) * pxlclk
+	 *
+	 * To avoid float calculaiton, using 15 instead of 1.5 and div by
+	 * 10 to get mclk.
+	 */
+	if (se_config->scale_enable) {
+		a = 15 * input_size / (htotal * se_config->output_h);
+		if (a < 15)
+			a = 15;
+	}
+	mclk = a * pxlclk / 10;
+	ret = clk_get_rate(hwdev->mclk);
+	if (ret < mclk) {
+		DRM_DEBUG_DRIVER("mclk requirement of %lu kHz can't be met.\n",
+				 mclk / 1000);
+		return -EINVAL;
+	}
+	return ret;
+}
+
 static int malidp550_query_hw(struct malidp_hw_device *hwdev)
 {
 	u32 conf = malidp_hw_read(hwdev, MALIDP550_CONFIG_ID);
@@ -384,6 +541,53 @@ static int malidp550_rotmem_required(struct malidp_hw_device *hwdev, u16 w, u16
 	return w * bytes_per_col;
 }
 
+static int malidp550_se_set_scaling_coeffs(struct malidp_hw_device *hwdev,
+					   struct malidp_se_config *se_config,
+					   struct malidp_se_config *old_config)
+{
+	u32 mask = MALIDP550_SE_CTL_VCSEL(MALIDP550_SE_CTL_SEL_MASK) |
+		   MALIDP550_SE_CTL_HCSEL(MALIDP550_SE_CTL_SEL_MASK);
+	u32 new_value = MALIDP550_SE_CTL_VCSEL(se_config->vcoeff) |
+			MALIDP550_SE_CTL_HCSEL(se_config->hcoeff);
+
+	malidp_hw_clearbits(hwdev, mask, MALIDP550_SE_CONTROL);
+	malidp_hw_setbits(hwdev, new_value, MALIDP550_SE_CONTROL);
+	return 0;
+}
+
+static long malidp550_se_calc_mclk(struct malidp_hw_device *hwdev,
+				   struct malidp_se_config *se_config,
+				   struct videomode *vm)
+{
+	unsigned long mclk;
+	unsigned long pxlclk = vm->pixelclock;
+	unsigned long htotal = vm->hactive + vm->hfront_porch +
+			       vm->hback_porch + vm->hsync_len;
+	unsigned long numerator = 1, denominator = 1;
+	long ret;
+
+	if (se_config->scale_enable) {
+		numerator = max(se_config->input_w, se_config->output_w) *
+			    se_config->input_h;
+		numerator += se_config->output_w *
+			     (se_config->output_h -
+			      min(se_config->input_h, se_config->output_h));
+		denominator = (htotal - 2) * se_config->output_h;
+	}
+
+	/* mclk can't be slower than pxlclk. */
+	if (numerator < denominator)
+		numerator = denominator = 1;
+	mclk = (pxlclk * numerator) / denominator;
+	ret = clk_get_rate(hwdev->mclk);
+	if (ret < mclk) {
+		DRM_DEBUG_DRIVER("mclk requirement of %lu kHz can't be met.\n",
+				 mclk / 1000);
+		return -EINVAL;
+	}
+	return ret;
+}
+
 static int malidp650_query_hw(struct malidp_hw_device *hwdev)
 {
 	u32 conf = malidp_hw_read(hwdev, MALIDP550_CONFIG_ID);
@@ -415,6 +619,7 @@ static int malidp650_query_hw(struct malidp_hw_device *hwdev)
 const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = {
 	[MALIDP_500] = {
 		.map = {
+			.coeffs_base = MALIDP500_COEFFS_BASE,
 			.se_base = MALIDP500_SE_BASE,
 			.dc_base = MALIDP500_DC_BASE,
 			.out_depth_base = MALIDP500_OUTPUT_DEPTH,
@@ -447,10 +652,13 @@ const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = {
 		.set_config_valid = malidp500_set_config_valid,
 		.modeset = malidp500_modeset,
 		.rotmem_required = malidp500_rotmem_required,
+		.se_set_scaling_coeffs = malidp500_se_set_scaling_coeffs,
+		.se_calc_mclk = malidp500_se_calc_mclk,
 		.features = MALIDP_DEVICE_LV_HAS_3_STRIDES,
 	},
 	[MALIDP_550] = {
 		.map = {
+			.coeffs_base = MALIDP550_COEFFS_BASE,
 			.se_base = MALIDP550_SE_BASE,
 			.dc_base = MALIDP550_DC_BASE,
 			.out_depth_base = MALIDP550_DE_OUTPUT_DEPTH,
@@ -481,10 +689,13 @@ const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = {
 		.set_config_valid = malidp550_set_config_valid,
 		.modeset = malidp550_modeset,
 		.rotmem_required = malidp550_rotmem_required,
+		.se_set_scaling_coeffs = malidp550_se_set_scaling_coeffs,
+		.se_calc_mclk = malidp550_se_calc_mclk,
 		.features = 0,
 	},
 	[MALIDP_650] = {
 		.map = {
+			.coeffs_base = MALIDP550_COEFFS_BASE,
 			.se_base = MALIDP550_SE_BASE,
 			.dc_base = MALIDP550_DC_BASE,
 			.out_depth_base = MALIDP550_DE_OUTPUT_DEPTH,
@@ -516,6 +727,8 @@ const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES] = {
 		.set_config_valid = malidp550_set_config_valid,
 		.modeset = malidp550_modeset,
 		.rotmem_required = malidp550_rotmem_required,
+		.se_set_scaling_coeffs = malidp550_se_set_scaling_coeffs,
+		.se_calc_mclk = malidp550_se_calc_mclk,
 		.features = 0,
 	},
 };
diff --git a/drivers/gpu/drm/arm/malidp_hw.h b/drivers/gpu/drm/arm/malidp_hw.h
index 00974b59407d..849ad9a30c3a 100644
--- a/drivers/gpu/drm/arm/malidp_hw.h
+++ b/drivers/gpu/drm/arm/malidp_hw.h
@@ -61,12 +61,34 @@ struct malidp_layer {
 	u16 stride_offset;	/* Offset to the first stride register. */
 };
 
+enum malidp_scaling_coeff_set {
+	MALIDP_UPSCALING_COEFFS = 1,
+	MALIDP_DOWNSCALING_1_5_COEFFS = 2,
+	MALIDP_DOWNSCALING_2_COEFFS = 3,
+	MALIDP_DOWNSCALING_2_75_COEFFS = 4,
+	MALIDP_DOWNSCALING_4_COEFFS = 5,
+};
+
+struct malidp_se_config {
+	u8 scale_enable : 1;
+	u8 enhancer_enable : 1;
+	u8 hcoeff : 3;
+	u8 vcoeff : 3;
+	u8 plane_src_id;
+	u16 input_w, input_h;
+	u16 output_w, output_h;
+	u32 h_init_phase, h_delta_phase;
+	u32 v_init_phase, v_delta_phase;
+};
+
 /* regmap features */
 #define MALIDP_REGMAP_HAS_CLEARIRQ	(1 << 0)
 
 struct malidp_hw_regmap {
 	/* address offset of the DE register bank */
 	/* is always 0x0000 */
+	/* address offset of the DE coefficients registers */
+	const u16 coeffs_base;
 	/* address offset of the SE registers bank */
 	const u16 se_base;
 	/* address offset of the DC registers bank */
@@ -151,11 +173,22 @@ struct malidp_hw_device {
 	 */
 	int (*rotmem_required)(struct malidp_hw_device *hwdev, u16 w, u16 h, u32 fmt);
 
+	int (*se_set_scaling_coeffs)(struct malidp_hw_device *hwdev,
+				     struct malidp_se_config *se_config,
+				     struct malidp_se_config *old_config);
+
+	long (*se_calc_mclk)(struct malidp_hw_device *hwdev,
+			     struct malidp_se_config *se_config,
+			     struct videomode *vm);
+
 	u8 features;
 
 	u8 min_line_size;
 	u16 max_line_size;
 
+	/* track the device PM state */
+	bool pm_suspended;
+
 	/* size of memory used for rotating layers, up to two banks available */
 	u32 rotation_memory[2];
 };
@@ -173,12 +206,14 @@ extern const struct malidp_hw_device malidp_device[MALIDP_MAX_DEVICES];
 
 static inline u32 malidp_hw_read(struct malidp_hw_device *hwdev, u32 reg)
 {
+	WARN_ON(hwdev->pm_suspended);
 	return readl(hwdev->regs + reg);
 }
 
 static inline void malidp_hw_write(struct malidp_hw_device *hwdev,
 				   u32 value, u32 reg)
 {
+	WARN_ON(hwdev->pm_suspended);
 	writel(value, hwdev->regs + reg);
 }
 
@@ -243,6 +278,47 @@ static inline bool malidp_hw_pitch_valid(struct malidp_hw_device *hwdev,
 	return !(pitch & (hwdev->map.bus_align_bytes - 1));
 }
 
+/* U16.16 */
+#define FP_1_00000	0x00010000	/* 1.0 */
+#define FP_0_66667	0x0000AAAA	/* 0.6667 = 1/1.5 */
+#define FP_0_50000	0x00008000	/* 0.5 = 1/2 */
+#define FP_0_36363	0x00005D17	/* 0.36363 = 1/2.75 */
+#define FP_0_25000	0x00004000	/* 0.25 = 1/4 */
+
+static inline enum malidp_scaling_coeff_set
+malidp_se_select_coeffs(u32 upscale_factor)
+{
+	return (upscale_factor >= FP_1_00000) ? MALIDP_UPSCALING_COEFFS :
+	       (upscale_factor >= FP_0_66667) ? MALIDP_DOWNSCALING_1_5_COEFFS :
+	       (upscale_factor >= FP_0_50000) ? MALIDP_DOWNSCALING_2_COEFFS :
+	       (upscale_factor >= FP_0_36363) ? MALIDP_DOWNSCALING_2_75_COEFFS :
+	       MALIDP_DOWNSCALING_4_COEFFS;
+}
+
+#undef FP_0_25000
+#undef FP_0_36363
+#undef FP_0_50000
+#undef FP_0_66667
+#undef FP_1_00000
+
+static inline void malidp_se_set_enh_coeffs(struct malidp_hw_device *hwdev)
+{
+	static const s32 enhancer_coeffs[] = {
+		-8, -8, -8, -8, 128, -8, -8, -8, -8
+	};
+	u32 val = MALIDP_SE_SET_ENH_LIMIT_LOW(MALIDP_SE_ENH_LOW_LEVEL) |
+		  MALIDP_SE_SET_ENH_LIMIT_HIGH(MALIDP_SE_ENH_HIGH_LEVEL);
+	u32 image_enh = hwdev->map.se_base +
+			((hwdev->map.features & MALIDP_REGMAP_HAS_CLEARIRQ) ?
+			 0x10 : 0xC) + MALIDP_SE_IMAGE_ENH;
+	u32 enh_coeffs = image_enh + MALIDP_SE_ENH_COEFF0;
+	int i;
+
+	malidp_hw_write(hwdev, val, image_enh);
+	for (i = 0; i < ARRAY_SIZE(enhancer_coeffs); ++i)
+		malidp_hw_write(hwdev, enhancer_coeffs[i], enh_coeffs + i * 4);
+}
+
 /*
  * background color components are defined as 12bits values,
  * they will be shifted right when stored on hardware that
@@ -252,4 +328,9 @@ static inline bool malidp_hw_pitch_valid(struct malidp_hw_device *hwdev,
 #define MALIDP_BGND_COLOR_G		0x000
 #define MALIDP_BGND_COLOR_B		0x000
 
+#define MALIDP_COLORADJ_NUM_COEFFS	12
+#define MALIDP_COEFFTAB_NUM_COEFFS	64
+
+#define MALIDP_GAMMA_LUT_SIZE		4096
+
 #endif  /* __MALIDP_HW_H__ */
diff --git a/drivers/gpu/drm/arm/malidp_planes.c b/drivers/gpu/drm/arm/malidp_planes.c
index d5aec082294c..814fda23cead 100644
--- a/drivers/gpu/drm/arm/malidp_planes.c
+++ b/drivers/gpu/drm/arm/malidp_planes.c
@@ -16,6 +16,7 @@
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_print.h>
 
 #include "malidp_hw.h"
 #include "malidp_drv.h"
@@ -24,6 +25,9 @@
 #define MALIDP_LAYER_FORMAT		0x000
 #define MALIDP_LAYER_CONTROL		0x004
 #define   LAYER_ENABLE			(1 << 0)
+#define   LAYER_FLOWCFG_MASK		7
+#define   LAYER_FLOWCFG(x)		(((x) & LAYER_FLOWCFG_MASK) << 1)
+#define     LAYER_FLOWCFG_SCALE_SE	3
 #define   LAYER_ROT_OFFSET		8
 #define   LAYER_H_FLIP			(1 << 10)
 #define   LAYER_V_FLIP			(1 << 11)
@@ -60,6 +64,27 @@ static void malidp_de_plane_destroy(struct drm_plane *plane)
 	devm_kfree(plane->dev->dev, mp);
 }
 
+/*
+ * Replicate what the default ->reset hook does: free the state pointer and
+ * allocate a new empty object. We just need enough space to store
+ * a malidp_plane_state instead of a drm_plane_state.
+ */
+static void malidp_plane_reset(struct drm_plane *plane)
+{
+	struct malidp_plane_state *state = to_malidp_plane_state(plane->state);
+
+	if (state)
+		__drm_atomic_helper_plane_destroy_state(&state->base);
+	kfree(state);
+	plane->state = NULL;
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (state) {
+		state->base.plane = plane;
+		state->base.rotation = DRM_ROTATE_0;
+		plane->state = &state->base;
+	}
+}
+
 static struct
 drm_plane_state *malidp_duplicate_plane_state(struct drm_plane *plane)
 {
@@ -90,26 +115,71 @@ static void malidp_destroy_plane_state(struct drm_plane *plane,
 	kfree(m_state);
 }
 
+static void malidp_plane_atomic_print_state(struct drm_printer *p,
+					    const struct drm_plane_state *state)
+{
+	struct malidp_plane_state *ms = to_malidp_plane_state(state);
+
+	drm_printf(p, "\trotmem_size=%u\n", ms->rotmem_size);
+	drm_printf(p, "\tformat_id=%u\n", ms->format);
+	drm_printf(p, "\tn_planes=%u\n", ms->n_planes);
+}
+
 static const struct drm_plane_funcs malidp_de_plane_funcs = {
 	.update_plane = drm_atomic_helper_update_plane,
 	.disable_plane = drm_atomic_helper_disable_plane,
 	.set_property = drm_atomic_helper_plane_set_property,
 	.destroy = malidp_de_plane_destroy,
-	.reset = drm_atomic_helper_plane_reset,
+	.reset = malidp_plane_reset,
 	.atomic_duplicate_state = malidp_duplicate_plane_state,
 	.atomic_destroy_state = malidp_destroy_plane_state,
+	.atomic_print_state = malidp_plane_atomic_print_state,
 };
 
+static int malidp_se_check_scaling(struct malidp_plane *mp,
+				   struct drm_plane_state *state)
+{
+	struct drm_crtc_state *crtc_state =
+		drm_atomic_get_existing_crtc_state(state->state, state->crtc);
+	struct malidp_crtc_state *mc;
+	struct drm_rect clip = { 0 };
+	u32 src_w, src_h;
+	int ret;
+
+	if (!crtc_state)
+		return -EINVAL;
+
+	clip.x2 = crtc_state->adjusted_mode.hdisplay;
+	clip.y2 = crtc_state->adjusted_mode.vdisplay;
+	ret = drm_plane_helper_check_state(state, &clip, 0, INT_MAX, true, true);
+	if (ret)
+		return ret;
+
+	src_w = state->src_w >> 16;
+	src_h = state->src_h >> 16;
+	if ((state->crtc_w == src_w) && (state->crtc_h == src_h)) {
+		/* Scaling not necessary for this plane. */
+		mc->scaled_planes_mask &= ~(mp->layer->id);
+		return 0;
+	}
+
+	if (mp->layer->id & (DE_SMART | DE_GRAPHICS2))
+		return -EINVAL;
+
+	mc = to_malidp_crtc_state(crtc_state);
+
+	mc->scaled_planes_mask |= mp->layer->id;
+	/* Defer scaling requirements calculation to the crtc check. */
+	return 0;
+}
+
 static int malidp_de_plane_check(struct drm_plane *plane,
 				 struct drm_plane_state *state)
 {
 	struct malidp_plane *mp = to_malidp_plane(plane);
 	struct malidp_plane_state *ms = to_malidp_plane_state(state);
-	struct drm_crtc_state *crtc_state;
 	struct drm_framebuffer *fb;
-	struct drm_rect clip = { 0 };
 	int i, ret;
-	u32 src_w, src_h;
 
 	if (!state->crtc || !state->fb)
 		return 0;
@@ -130,9 +200,6 @@ static int malidp_de_plane_check(struct drm_plane *plane,
 		}
 	}
 
-	src_w = state->src_w >> 16;
-	src_h = state->src_h >> 16;
-
 	if ((state->crtc_w > mp->hwdev->max_line_size) ||
 	    (state->crtc_h > mp->hwdev->max_line_size) ||
 	    (state->crtc_w < mp->hwdev->min_line_size) ||
@@ -149,22 +216,16 @@ static int malidp_de_plane_check(struct drm_plane *plane,
 	    (state->fb->pitches[1] != state->fb->pitches[2]))
 		return -EINVAL;
 
+	ret = malidp_se_check_scaling(mp, state);
+	if (ret)
+		return ret;
+
 	/* packed RGB888 / BGR888 can't be rotated or flipped */
 	if (state->rotation != DRM_ROTATE_0 &&
 	    (fb->format->format == DRM_FORMAT_RGB888 ||
 	     fb->format->format == DRM_FORMAT_BGR888))
 		return -EINVAL;
 
-	crtc_state = drm_atomic_get_existing_crtc_state(state->state, state->crtc);
-	clip.x2 = crtc_state->adjusted_mode.hdisplay;
-	clip.y2 = crtc_state->adjusted_mode.vdisplay;
-	ret = drm_plane_helper_check_state(state, &clip,
-					   DRM_PLANE_HELPER_NO_SCALING,
-					   DRM_PLANE_HELPER_NO_SCALING,
-					   true, true);
-	if (ret)
-		return ret;
-
 	ms->rotmem_size = 0;
 	if (state->rotation & MALIDP_ROTATED_MASK) {
 		int val;
@@ -269,6 +330,16 @@ static void malidp_de_plane_update(struct drm_plane *plane,
 	val &= ~LAYER_COMP_MASK;
 	val |= LAYER_COMP_PIXEL;
 
+	val &= ~LAYER_FLOWCFG(LAYER_FLOWCFG_MASK);
+	if (plane->state->crtc) {
+		struct malidp_crtc_state *m =
+			to_malidp_crtc_state(plane->state->crtc->state);
+
+		if (m->scaler_config.scale_enable &&
+		    m->scaler_config.plane_src_id == mp->layer->id)
+			val |= LAYER_FLOWCFG(LAYER_FLOWCFG_SCALE_SE);
+	}
+
 	/* set the 'enable layer' bit */
 	val |= LAYER_ENABLE;
 
@@ -281,7 +352,8 @@ static void malidp_de_plane_disable(struct drm_plane *plane,
 {
 	struct malidp_plane *mp = to_malidp_plane(plane);
 
-	malidp_hw_clearbits(mp->hwdev, LAYER_ENABLE,
+	malidp_hw_clearbits(mp->hwdev,
+			    LAYER_ENABLE | LAYER_FLOWCFG(LAYER_FLOWCFG_MASK),
 			    mp->layer->base + MALIDP_LAYER_CONTROL);
 }
 
diff --git a/drivers/gpu/drm/arm/malidp_regs.h b/drivers/gpu/drm/arm/malidp_regs.h
index b816067a65c5..2039f857f77d 100644
--- a/drivers/gpu/drm/arm/malidp_regs.h
+++ b/drivers/gpu/drm/arm/malidp_regs.h
@@ -63,6 +63,8 @@
 
 /* bit masks that are common between products */
 #define   MALIDP_CFG_VALID		(1 << 0)
+#define   MALIDP_DISP_FUNC_GAMMA	(1 << 0)
+#define   MALIDP_DISP_FUNC_CADJ		(1 << 4)
 #define   MALIDP_DISP_FUNC_ILACED	(1 << 8)
 
 /* register offsets for IRQ management */
@@ -99,6 +101,58 @@
 
 #define MALIDP_PRODUCT_ID(__core_id) ((u32)(__core_id) >> 16)
 
+/* register offsets relative to MALIDP5x0_COEFFS_BASE */
+#define MALIDP_COLOR_ADJ_COEF		0x00000
+#define MALIDP_COEF_TABLE_ADDR		0x00030
+#define MALIDP_COEF_TABLE_DATA		0x00034
+
+/* Scaling engine registers and masks. */
+#define   MALIDP_SE_SCALING_EN			(1 << 0)
+#define   MALIDP_SE_ALPHA_EN			(1 << 1)
+#define   MALIDP_SE_ENH_MASK			3
+#define   MALIDP_SE_ENH(x)			(((x) & MALIDP_SE_ENH_MASK) << 2)
+#define   MALIDP_SE_RGBO_IF_EN			(1 << 4)
+#define   MALIDP550_SE_CTL_SEL_MASK		7
+#define   MALIDP550_SE_CTL_VCSEL(x) \
+		(((x) & MALIDP550_SE_CTL_SEL_MASK) << 20)
+#define   MALIDP550_SE_CTL_HCSEL(x) \
+		(((x) & MALIDP550_SE_CTL_SEL_MASK) << 16)
+
+/* Blocks with offsets from SE_CONTROL register. */
+#define MALIDP_SE_LAYER_CONTROL			0x14
+#define   MALIDP_SE_L0_IN_SIZE			0x00
+#define   MALIDP_SE_L0_OUT_SIZE			0x04
+#define   MALIDP_SE_SET_V_SIZE(x)		(((x) & 0x1fff) << 16)
+#define   MALIDP_SE_SET_H_SIZE(x)		(((x) & 0x1fff) << 0)
+#define MALIDP_SE_SCALING_CONTROL		0x24
+#define   MALIDP_SE_H_INIT_PH			0x00
+#define   MALIDP_SE_H_DELTA_PH			0x04
+#define   MALIDP_SE_V_INIT_PH			0x08
+#define   MALIDP_SE_V_DELTA_PH			0x0c
+#define   MALIDP_SE_COEFFTAB_ADDR		0x10
+#define     MALIDP_SE_COEFFTAB_ADDR_MASK	0x7f
+#define     MALIDP_SE_V_COEFFTAB		(1 << 8)
+#define     MALIDP_SE_H_COEFFTAB		(1 << 9)
+#define     MALIDP_SE_SET_V_COEFFTAB_ADDR(x) \
+		(MALIDP_SE_V_COEFFTAB | ((x) & MALIDP_SE_COEFFTAB_ADDR_MASK))
+#define     MALIDP_SE_SET_H_COEFFTAB_ADDR(x) \
+		(MALIDP_SE_H_COEFFTAB | ((x) & MALIDP_SE_COEFFTAB_ADDR_MASK))
+#define   MALIDP_SE_COEFFTAB_DATA		0x14
+#define     MALIDP_SE_COEFFTAB_DATA_MASK	0x3fff
+#define     MALIDP_SE_SET_COEFFTAB_DATA(x) \
+		((x) & MALIDP_SE_COEFFTAB_DATA_MASK)
+/* Enhance coeffents reigster offset */
+#define MALIDP_SE_IMAGE_ENH			0x3C
+/* ENH_LIMITS offset 0x0 */
+#define     MALIDP_SE_ENH_LOW_LEVEL		24
+#define     MALIDP_SE_ENH_HIGH_LEVEL		63
+#define     MALIDP_SE_ENH_LIMIT_MASK		0xfff
+#define     MALIDP_SE_SET_ENH_LIMIT_LOW(x) \
+		((x) & MALIDP_SE_ENH_LIMIT_MASK)
+#define     MALIDP_SE_SET_ENH_LIMIT_HIGH(x) \
+		(((x) & MALIDP_SE_ENH_LIMIT_MASK) << 16)
+#define   MALIDP_SE_ENH_COEFF0			0x04
+
 /* register offsets and bits specific to DP500 */
 #define MALIDP500_ADDR_SPACE_SIZE	0x01000
 #define MALIDP500_DC_BASE		0x00000
@@ -120,6 +174,18 @@
 #define MALIDP500_COLOR_ADJ_COEF	0x00078
 #define MALIDP500_COEF_TABLE_ADDR	0x000a8
 #define MALIDP500_COEF_TABLE_DATA	0x000ac
+
+/*
+ * The YUV2RGB coefficients on the DP500 are not in the video layer's register
+ * block. They belong in a separate block above the layer's registers, hence
+ * the negative offset.
+ */
+#define MALIDP500_LV_YUV2RGB		((s16)(-0xB8))
+/*
+ * To match DP550/650, the start of the coeffs registers is
+ * at COLORADJ_COEFF0 instead of at YUV_RGB_COEF1.
+ */
+#define MALIDP500_COEFFS_BASE		0x00078
 #define MALIDP500_DE_LV_BASE		0x00100
 #define MALIDP500_DE_LV_PTR_BASE	0x00124
 #define MALIDP500_DE_LG1_BASE		0x00200
@@ -127,6 +193,7 @@
 #define MALIDP500_DE_LG2_BASE		0x00300
 #define MALIDP500_DE_LG2_PTR_BASE	0x0031c
 #define MALIDP500_SE_BASE		0x00c00
+#define MALIDP500_SE_CONTROL		0x00c0c
 #define MALIDP500_SE_PTR_BASE		0x00e0c
 #define MALIDP500_DC_IRQ_BASE		0x00f00
 #define MALIDP500_CONFIG_VALID		0x00f00
@@ -145,9 +212,7 @@
 #define MALIDP550_DE_DISP_SIDEBAND	0x00040
 #define MALIDP550_DE_BGND_COLOR		0x00044
 #define MALIDP550_DE_OUTPUT_DEPTH	0x0004c
-#define MALIDP550_DE_COLOR_COEF		0x00050
-#define MALIDP550_DE_COEF_TABLE_ADDR	0x00080
-#define MALIDP550_DE_COEF_TABLE_DATA	0x00084
+#define MALIDP550_COEFFS_BASE		0x00050
 #define MALIDP550_DE_LV1_BASE		0x00100
 #define MALIDP550_DE_LV1_PTR_BASE	0x00124
 #define MALIDP550_DE_LV2_BASE		0x00200
@@ -158,6 +223,7 @@
 #define MALIDP550_DE_LS_PTR_BASE	0x0042c
 #define MALIDP550_DE_PERF_BASE		0x00500
 #define MALIDP550_SE_BASE		0x08000
+#define MALIDP550_SE_CONTROL		0x08010
 #define MALIDP550_DC_BASE		0x0c000
 #define MALIDP550_DC_CONTROL		0x0c010
 #define   MALIDP550_DC_CONFIG_REQ	(1 << 16)
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 1597458d884e..d6c2a5d190eb 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -529,10 +529,10 @@ static const struct dma_buf_ops armada_gem_prime_dmabuf_ops = {
 	.map_dma_buf	= armada_gem_prime_map_dma_buf,
 	.unmap_dma_buf	= armada_gem_prime_unmap_dma_buf,
 	.release	= drm_gem_dmabuf_release,
-	.kmap_atomic	= armada_gem_dmabuf_no_kmap,
-	.kunmap_atomic	= armada_gem_dmabuf_no_kunmap,
-	.kmap		= armada_gem_dmabuf_no_kmap,
-	.kunmap		= armada_gem_dmabuf_no_kunmap,
+	.map_atomic	= armada_gem_dmabuf_no_kmap,
+	.unmap_atomic	= armada_gem_dmabuf_no_kunmap,
+	.map		= armada_gem_dmabuf_no_kmap,
+	.unmap		= armada_gem_dmabuf_no_kunmap,
 	.mmap		= armada_gem_dmabuf_mmap,
 };
 
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 47b78e52691c..aaef0a652f10 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -645,7 +645,8 @@ static void ast_crtc_reset(struct drm_crtc *crtc)
 }
 
 static int ast_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-			      u16 *blue, uint32_t size)
+			      u16 *blue, uint32_t size,
+			      struct drm_modeset_acquire_ctx *ctx)
 {
 	struct ast_crtc *ast_crtc = to_ast_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/ast/ast_ttm.c b/drivers/gpu/drm/ast/ast_ttm.c
index 50c910efa13d..e879496b8a42 100644
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver ast_bo_driver = {
 	.verify_access = ast_bo_verify_access,
 	.io_mem_reserve = &ast_ttm_io_mem_reserve,
 	.io_mem_free = &ast_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int ast_mm_init(struct ast_private *ast)
diff --git a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
index e7799b6ee829..65a3bd7a0c00 100644
--- a/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
+++ b/drivers/gpu/drm/atmel-hlcdc/atmel_hlcdc_output.c
@@ -22,7 +22,7 @@
 #include <linux/of_graph.h>
 
 #include <drm/drmP.h>
-#include <drm/drm_panel.h>
+#include <drm/drm_of.h>
 
 #include "atmel_hlcdc_dc.h"
 
@@ -152,29 +152,11 @@ static const struct drm_connector_funcs atmel_hlcdc_panel_connector_funcs = {
 	.atomic_destroy_state = drm_atomic_helper_connector_destroy_state,
 };
 
-static int atmel_hlcdc_check_endpoint(struct drm_device *dev,
-				      const struct of_endpoint *ep)
-{
-	struct device_node *np;
-	void *obj;
-
-	np = of_graph_get_remote_port_parent(ep->local_node);
-
-	obj = of_drm_find_panel(np);
-	if (!obj)
-		obj = of_drm_find_bridge(np);
-
-	of_node_put(np);
-
-	return obj ? 0 : -EPROBE_DEFER;
-}
-
 static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
-				       const struct of_endpoint *ep)
+				       const struct device_node *np)
 {
 	struct atmel_hlcdc_dc *dc = dev->dev_private;
 	struct atmel_hlcdc_rgb_output *output;
-	struct device_node *np;
 	struct drm_panel *panel;
 	struct drm_bridge *bridge;
 	int ret;
@@ -195,13 +177,11 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 
 	output->encoder.possible_crtcs = 0x1;
 
-	np = of_graph_get_remote_port_parent(ep->local_node);
-
-	ret = -EPROBE_DEFER;
+	ret = drm_of_find_panel_or_bridge(np, 0, 0, &panel, &bridge);
+	if (ret)
+		return ret;
 
-	panel = of_drm_find_panel(np);
 	if (panel) {
-		of_node_put(np);
 		output->connector.dpms = DRM_MODE_DPMS_OFF;
 		output->connector.polled = DRM_CONNECTOR_POLL_CONNECT;
 		drm_connector_helper_add(&output->connector,
@@ -226,9 +206,6 @@ static int atmel_hlcdc_attach_endpoint(struct drm_device *dev,
 		return 0;
 	}
 
-	bridge = of_drm_find_bridge(np);
-	of_node_put(np);
-
 	if (bridge) {
 		ret = drm_bridge_attach(&output->encoder, bridge, NULL);
 		if (!ret)
@@ -243,31 +220,22 @@ err_encoder_cleanup:
 
 int atmel_hlcdc_create_outputs(struct drm_device *dev)
 {
-	struct device_node *ep_np = NULL;
-	struct of_endpoint ep;
-	int ret;
-
-	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
-		ret = of_graph_parse_endpoint(ep_np, &ep);
-		if (!ret)
-			ret = atmel_hlcdc_check_endpoint(dev, &ep);
-
-		if (ret) {
-			of_node_put(ep_np);
-			return ret;
-		}
-	}
-
-	for_each_endpoint_of_node(dev->dev->of_node, ep_np) {
-		ret = of_graph_parse_endpoint(ep_np, &ep);
-		if (!ret)
-			ret = atmel_hlcdc_attach_endpoint(dev, &ep);
-
-		if (ret) {
-			of_node_put(ep_np);
+	struct device_node *remote;
+	int ret = -ENODEV;
+	int endpoint = 0;
+
+	while (true) {
+		/* Loop thru possible multiple connections to the output */
+		remote = of_graph_get_remote_node(dev->dev->of_node, 0,
+						  endpoint++);
+		if (!remote)
+			break;
+
+		ret = atmel_hlcdc_attach_endpoint(dev, remote);
+		of_node_put(remote);
+		if (ret)
 			return ret;
-		}
 	}
 
-	return 0;
+	return ret;
 }
diff --git a/drivers/gpu/drm/bochs/bochs_mm.c b/drivers/gpu/drm/bochs/bochs_mm.c
index 857755ac2d70..c4cadb638460 100644
--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@@ -205,6 +205,7 @@ struct ttm_bo_driver bochs_bo_driver = {
 	.verify_access = bochs_bo_verify_access,
 	.io_mem_reserve = &bochs_ttm_io_mem_reserve,
 	.io_mem_free = &bochs_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int bochs_mm_init(struct bochs_device *bochs)
diff --git a/drivers/gpu/drm/bridge/adv7511/adv7533.c b/drivers/gpu/drm/bridge/adv7511/adv7533.c
index 8b210373cfa2..ac804f81e2f6 100644
--- a/drivers/gpu/drm/bridge/adv7511/adv7533.c
+++ b/drivers/gpu/drm/bridge/adv7511/adv7533.c
@@ -232,7 +232,6 @@ void adv7533_detach_dsi(struct adv7511 *adv)
 int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
 {
 	u32 num_lanes;
-	struct device_node *endpoint;
 
 	of_property_read_u32(np, "adi,dsi-lanes", &num_lanes);
 
@@ -241,17 +240,10 @@ int adv7533_parse_dt(struct device_node *np, struct adv7511 *adv)
 
 	adv->num_dsi_lanes = num_lanes;
 
-	endpoint = of_graph_get_next_endpoint(np, NULL);
-	if (!endpoint)
+	adv->host_node = of_graph_get_remote_node(np, 0, 0);
+	if (!adv->host_node)
 		return -ENODEV;
 
-	adv->host_node = of_graph_get_remote_port_parent(endpoint);
-	if (!adv->host_node) {
-		of_node_put(endpoint);
-		return -ENODEV;
-	}
-
-	of_node_put(endpoint);
 	of_node_put(adv->host_node);
 
 	adv->use_timing_gen = !of_property_read_bool(np,
diff --git a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
index c26997afd3cf..4c758ed51939 100644
--- a/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
+++ b/drivers/gpu/drm/bridge/analogix/analogix_dp_core.c
@@ -1439,13 +1439,19 @@ void analogix_dp_unbind(struct device *dev, struct device *master,
 	struct analogix_dp_device *dp = dev_get_drvdata(dev);
 
 	analogix_dp_bridge_disable(dp->bridge);
+	dp->connector.funcs->destroy(&dp->connector);
+	dp->encoder->funcs->destroy(dp->encoder);
 
 	if (dp->plat_data->panel) {
 		if (drm_panel_unprepare(dp->plat_data->panel))
 			DRM_ERROR("failed to turnoff the panel\n");
+		if (drm_panel_detach(dp->plat_data->panel))
+			DRM_ERROR("failed to detach the panel\n");
 	}
 
+	drm_dp_aux_unregister(&dp->aux);
 	pm_runtime_disable(dev);
+	clk_disable_unprepare(dp->clock);
 }
 EXPORT_SYMBOL_GPL(analogix_dp_unbind);
 
diff --git a/drivers/gpu/drm/bridge/dumb-vga-dac.c b/drivers/gpu/drm/bridge/dumb-vga-dac.c
index 63e113bd21d2..831a606c4706 100644
--- a/drivers/gpu/drm/bridge/dumb-vga-dac.c
+++ b/drivers/gpu/drm/bridge/dumb-vga-dac.c
@@ -154,21 +154,12 @@ static const struct drm_bridge_funcs dumb_vga_bridge_funcs = {
 
 static struct i2c_adapter *dumb_vga_retrieve_ddc(struct device *dev)
 {
-	struct device_node *end_node, *phandle, *remote;
+	struct device_node *phandle, *remote;
 	struct i2c_adapter *ddc;
 
-	end_node = of_graph_get_endpoint_by_regs(dev->of_node, 1, -1);
-	if (!end_node) {
-		dev_err(dev, "Missing connector endpoint\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	remote = of_graph_get_remote_port_parent(end_node);
-	of_node_put(end_node);
-	if (!remote) {
-		dev_err(dev, "Enable to parse remote node\n");
+	remote = of_graph_get_remote_node(dev->of_node, 1, -1);
+	if (!remote)
 		return ERR_PTR(-EINVAL);
-	}
 
 	phandle = of_parse_phandle(remote, "ddc-i2c-bus", 0);
 	of_node_put(remote);
diff --git a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
index cfc606a13a6d..11f11086a68f 100644
--- a/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
+++ b/drivers/gpu/drm/bridge/megachips-stdpxxxx-ge-b850v3-fw.c
@@ -279,10 +279,6 @@ static int ge_b850v3_lvds_init(struct device *dev)
 		return -ENOMEM;
 	}
 
-	ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs;
-	ge_b850v3_lvds_ptr->bridge.of_node = dev->of_node;
-	drm_bridge_add(&ge_b850v3_lvds_ptr->bridge);
-
 success:
 	mutex_unlock(&ge_b850v3_lvds_dev_mutex);
 	return 0;
@@ -317,6 +313,11 @@ static int stdp4028_ge_b850v3_fw_probe(struct i2c_client *stdp4028_i2c,
 	ge_b850v3_lvds_ptr->stdp4028_i2c = stdp4028_i2c;
 	i2c_set_clientdata(stdp4028_i2c, ge_b850v3_lvds_ptr);
 
+	/* drm bridge initialization */
+	ge_b850v3_lvds_ptr->bridge.funcs = &ge_b850v3_lvds_funcs;
+	ge_b850v3_lvds_ptr->bridge.of_node = dev->of_node;
+	drm_bridge_add(&ge_b850v3_lvds_ptr->bridge);
+
 	/* Clear pending interrupts since power up. */
 	i2c_smbus_write_word_data(stdp4028_i2c,
 				  STDP4028_DPTX_IRQ_STS_REG,
diff --git a/drivers/gpu/drm/bridge/nxp-ptn3460.c b/drivers/gpu/drm/bridge/nxp-ptn3460.c
index 27f98c518dde..351704390d02 100644
--- a/drivers/gpu/drm/bridge/nxp-ptn3460.c
+++ b/drivers/gpu/drm/bridge/nxp-ptn3460.c
@@ -20,8 +20,8 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
-#include <linux/of_graph.h>
 
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
 #include "drm_crtc.h"
@@ -292,7 +292,6 @@ static int ptn3460_probe(struct i2c_client *client,
 {
 	struct device *dev = &client->dev;
 	struct ptn3460_bridge *ptn_bridge;
-	struct device_node *endpoint, *panel_node;
 	int ret;
 
 	ptn_bridge = devm_kzalloc(dev, sizeof(*ptn_bridge), GFP_KERNEL);
@@ -300,16 +299,9 @@ static int ptn3460_probe(struct i2c_client *client,
 		return -ENOMEM;
 	}
 
-	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (endpoint) {
-		panel_node = of_graph_get_remote_port_parent(endpoint);
-		if (panel_node) {
-			ptn_bridge->panel = of_drm_find_panel(panel_node);
-			of_node_put(panel_node);
-			if (!ptn_bridge->panel)
-				return -EPROBE_DEFER;
-		}
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, &ptn_bridge->panel, NULL);
+	if (ret)
+		return ret;
 
 	ptn_bridge->client = client;
 
diff --git a/drivers/gpu/drm/bridge/parade-ps8622.c b/drivers/gpu/drm/bridge/parade-ps8622.c
index ac8cc5b50d9f..1dcec3b97e67 100644
--- a/drivers/gpu/drm/bridge/parade-ps8622.c
+++ b/drivers/gpu/drm/bridge/parade-ps8622.c
@@ -22,10 +22,10 @@
 #include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
-#include <linux/of_graph.h>
 #include <linux/pm.h>
 #include <linux/regulator/consumer.h>
 
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
 #include "drmP.h"
@@ -536,7 +536,6 @@ static int ps8622_probe(struct i2c_client *client,
 					const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
-	struct device_node *endpoint, *panel_node;
 	struct ps8622_bridge *ps8622;
 	int ret;
 
@@ -544,16 +543,9 @@ static int ps8622_probe(struct i2c_client *client,
 	if (!ps8622)
 		return -ENOMEM;
 
-	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (endpoint) {
-		panel_node = of_graph_get_remote_port_parent(endpoint);
-		if (panel_node) {
-			ps8622->panel = of_drm_find_panel(panel_node);
-			of_node_put(panel_node);
-			if (!ps8622->panel)
-				return -EPROBE_DEFER;
-		}
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, &ps8622->panel, NULL);
+	if (ret)
+		return ret;
 
 	ps8622->client = client;
 
diff --git a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
index 32f02e92e0b9..4e1f54a675d8 100644
--- a/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
+++ b/drivers/gpu/drm/bridge/synopsys/dw-hdmi.c
@@ -30,18 +30,15 @@
 #include <drm/drm_encoder_slave.h>
 #include <drm/bridge/dw_hdmi.h>
 
+#include <uapi/linux/media-bus-format.h>
+#include <uapi/linux/videodev2.h>
+
 #include "dw-hdmi.h"
 #include "dw-hdmi-audio.h"
 
 #define DDC_SEGMENT_ADDR	0x30
 #define HDMI_EDID_LEN		512
 
-#define RGB			0
-#define YCBCR444		1
-#define YCBCR422_16BITS		2
-#define YCBCR422_8BITS		3
-#define XVYCC444		4
-
 enum hdmi_datamap {
 	RGB444_8B = 0x01,
 	RGB444_10B = 0x03,
@@ -95,10 +92,10 @@ struct hdmi_vmode {
 };
 
 struct hdmi_data_info {
-	unsigned int enc_in_format;
-	unsigned int enc_out_format;
-	unsigned int enc_color_depth;
-	unsigned int colorimetry;
+	unsigned int enc_in_bus_format;
+	unsigned int enc_out_bus_format;
+	unsigned int enc_in_encoding;
+	unsigned int enc_out_encoding;
 	unsigned int pix_repet_factor;
 	unsigned int hdcp_enable;
 	struct hdmi_vmode video_mode;
@@ -567,6 +564,78 @@ void dw_hdmi_audio_disable(struct dw_hdmi *hdmi)
 }
 EXPORT_SYMBOL_GPL(dw_hdmi_audio_disable);
 
+static bool hdmi_bus_fmt_is_rgb(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv444(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool hdmi_bus_fmt_is_yuv422(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static int hdmi_bus_fmt_color_depth(unsigned int bus_format)
+{
+	switch (bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+		return 8;
+
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		return 10;
+
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+		return 12;
+
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+	case MEDIA_BUS_FMT_YUV16_1X48:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		return 16;
+
+	default:
+		return 0;
+	}
+}
+
 /*
  * this submodule is responsible for the video data synchronization.
  * for example, for RGB 4:4:4 input, the data map is defined as
@@ -579,37 +648,49 @@ static void hdmi_video_sample(struct dw_hdmi *hdmi)
 	int color_format = 0;
 	u8 val;
 
-	if (hdmi->hdmi_data.enc_in_format == RGB) {
-		if (hdmi->hdmi_data.enc_color_depth == 8)
-			color_format = 0x01;
-		else if (hdmi->hdmi_data.enc_color_depth == 10)
-			color_format = 0x03;
-		else if (hdmi->hdmi_data.enc_color_depth == 12)
-			color_format = 0x05;
-		else if (hdmi->hdmi_data.enc_color_depth == 16)
-			color_format = 0x07;
-		else
-			return;
-	} else if (hdmi->hdmi_data.enc_in_format == YCBCR444) {
-		if (hdmi->hdmi_data.enc_color_depth == 8)
-			color_format = 0x09;
-		else if (hdmi->hdmi_data.enc_color_depth == 10)
-			color_format = 0x0B;
-		else if (hdmi->hdmi_data.enc_color_depth == 12)
-			color_format = 0x0D;
-		else if (hdmi->hdmi_data.enc_color_depth == 16)
-			color_format = 0x0F;
-		else
-			return;
-	} else if (hdmi->hdmi_data.enc_in_format == YCBCR422_8BITS) {
-		if (hdmi->hdmi_data.enc_color_depth == 8)
-			color_format = 0x16;
-		else if (hdmi->hdmi_data.enc_color_depth == 10)
-			color_format = 0x14;
-		else if (hdmi->hdmi_data.enc_color_depth == 12)
-			color_format = 0x12;
-		else
-			return;
+	switch (hdmi->hdmi_data.enc_in_bus_format) {
+	case MEDIA_BUS_FMT_RGB888_1X24:
+		color_format = 0x01;
+		break;
+	case MEDIA_BUS_FMT_RGB101010_1X30:
+		color_format = 0x03;
+		break;
+	case MEDIA_BUS_FMT_RGB121212_1X36:
+		color_format = 0x05;
+		break;
+	case MEDIA_BUS_FMT_RGB161616_1X48:
+		color_format = 0x07;
+		break;
+
+	case MEDIA_BUS_FMT_YUV8_1X24:
+	case MEDIA_BUS_FMT_UYYVYY8_0_5X24:
+		color_format = 0x09;
+		break;
+	case MEDIA_BUS_FMT_YUV10_1X30:
+	case MEDIA_BUS_FMT_UYYVYY10_0_5X30:
+		color_format = 0x0B;
+		break;
+	case MEDIA_BUS_FMT_YUV12_1X36:
+	case MEDIA_BUS_FMT_UYYVYY12_0_5X36:
+		color_format = 0x0D;
+		break;
+	case MEDIA_BUS_FMT_YUV16_1X48:
+	case MEDIA_BUS_FMT_UYYVYY16_0_5X48:
+		color_format = 0x0F;
+		break;
+
+	case MEDIA_BUS_FMT_UYVY8_1X16:
+		color_format = 0x16;
+		break;
+	case MEDIA_BUS_FMT_UYVY10_1X20:
+		color_format = 0x14;
+		break;
+	case MEDIA_BUS_FMT_UYVY12_1X24:
+		color_format = 0x12;
+		break;
+
+	default:
+		return;
 	}
 
 	val = HDMI_TX_INVID0_INTERNAL_DE_GENERATOR_DISABLE |
@@ -632,26 +713,30 @@ static void hdmi_video_sample(struct dw_hdmi *hdmi)
 
 static int is_color_space_conversion(struct dw_hdmi *hdmi)
 {
-	return hdmi->hdmi_data.enc_in_format != hdmi->hdmi_data.enc_out_format;
+	return hdmi->hdmi_data.enc_in_bus_format != hdmi->hdmi_data.enc_out_bus_format;
 }
 
 static int is_color_space_decimation(struct dw_hdmi *hdmi)
 {
-	if (hdmi->hdmi_data.enc_out_format != YCBCR422_8BITS)
+	if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format))
 		return 0;
-	if (hdmi->hdmi_data.enc_in_format == RGB ||
-	    hdmi->hdmi_data.enc_in_format == YCBCR444)
+
+	if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_in_bus_format) ||
+	    hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_in_bus_format))
 		return 1;
+
 	return 0;
 }
 
 static int is_color_space_interpolation(struct dw_hdmi *hdmi)
 {
-	if (hdmi->hdmi_data.enc_in_format != YCBCR422_8BITS)
+	if (!hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_in_bus_format))
 		return 0;
-	if (hdmi->hdmi_data.enc_out_format == RGB ||
-	    hdmi->hdmi_data.enc_out_format == YCBCR444)
+
+	if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format) ||
+	    hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format))
 		return 1;
+
 	return 0;
 }
 
@@ -662,15 +747,16 @@ static void dw_hdmi_update_csc_coeffs(struct dw_hdmi *hdmi)
 	u32 csc_scale = 1;
 
 	if (is_color_space_conversion(hdmi)) {
-		if (hdmi->hdmi_data.enc_out_format == RGB) {
-			if (hdmi->hdmi_data.colorimetry ==
-					HDMI_COLORIMETRY_ITU_601)
+		if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format)) {
+			if (hdmi->hdmi_data.enc_out_encoding ==
+						V4L2_YCBCR_ENC_601)
 				csc_coeff = &csc_coeff_rgb_out_eitu601;
 			else
 				csc_coeff = &csc_coeff_rgb_out_eitu709;
-		} else if (hdmi->hdmi_data.enc_in_format == RGB) {
-			if (hdmi->hdmi_data.colorimetry ==
-					HDMI_COLORIMETRY_ITU_601)
+		} else if (hdmi_bus_fmt_is_rgb(
+					hdmi->hdmi_data.enc_in_bus_format)) {
+			if (hdmi->hdmi_data.enc_out_encoding ==
+						V4L2_YCBCR_ENC_601)
 				csc_coeff = &csc_coeff_rgb_in_eitu601;
 			else
 				csc_coeff = &csc_coeff_rgb_in_eitu709;
@@ -708,16 +794,23 @@ static void hdmi_video_csc(struct dw_hdmi *hdmi)
 	else if (is_color_space_decimation(hdmi))
 		decimation = HDMI_CSC_CFG_DECMODE_CHROMA_INT_FORMULA3;
 
-	if (hdmi->hdmi_data.enc_color_depth == 8)
+	switch (hdmi_bus_fmt_color_depth(hdmi->hdmi_data.enc_out_bus_format)) {
+	case 8:
 		color_depth = HDMI_CSC_SCALE_CSC_COLORDE_PTH_24BPP;
-	else if (hdmi->hdmi_data.enc_color_depth == 10)
+		break;
+	case 10:
 		color_depth = HDMI_CSC_SCALE_CSC_COLORDE_PTH_30BPP;
-	else if (hdmi->hdmi_data.enc_color_depth == 12)
+		break;
+	case 12:
 		color_depth = HDMI_CSC_SCALE_CSC_COLORDE_PTH_36BPP;
-	else if (hdmi->hdmi_data.enc_color_depth == 16)
+		break;
+	case 16:
 		color_depth = HDMI_CSC_SCALE_CSC_COLORDE_PTH_48BPP;
-	else
+		break;
+
+	default:
 		return;
+	}
 
 	/* Configure the CSC registers */
 	hdmi_writeb(hdmi, interpolation | decimation, HDMI_CSC_CFG);
@@ -740,32 +833,43 @@ static void hdmi_video_packetize(struct dw_hdmi *hdmi)
 	struct hdmi_data_info *hdmi_data = &hdmi->hdmi_data;
 	u8 val, vp_conf;
 
-	if (hdmi_data->enc_out_format == RGB ||
-	    hdmi_data->enc_out_format == YCBCR444) {
-		if (!hdmi_data->enc_color_depth) {
-			output_select = HDMI_VP_CONF_OUTPUT_SELECTOR_BYPASS;
-		} else if (hdmi_data->enc_color_depth == 8) {
+	if (hdmi_bus_fmt_is_rgb(hdmi->hdmi_data.enc_out_bus_format) ||
+	    hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format)) {
+		switch (hdmi_bus_fmt_color_depth(
+					hdmi->hdmi_data.enc_out_bus_format)) {
+		case 8:
 			color_depth = 4;
 			output_select = HDMI_VP_CONF_OUTPUT_SELECTOR_BYPASS;
-		} else if (hdmi_data->enc_color_depth == 10) {
+			break;
+		case 10:
 			color_depth = 5;
-		} else if (hdmi_data->enc_color_depth == 12) {
+			break;
+		case 12:
 			color_depth = 6;
-		} else if (hdmi_data->enc_color_depth == 16) {
+			break;
+		case 16:
 			color_depth = 7;
-		} else {
-			return;
+			break;
+		default:
+			output_select = HDMI_VP_CONF_OUTPUT_SELECTOR_BYPASS;
 		}
-	} else if (hdmi_data->enc_out_format == YCBCR422_8BITS) {
-		if (!hdmi_data->enc_color_depth ||
-		    hdmi_data->enc_color_depth == 8)
+	} else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format)) {
+		switch (hdmi_bus_fmt_color_depth(
+					hdmi->hdmi_data.enc_out_bus_format)) {
+		case 0:
+		case 8:
 			remap_size = HDMI_VP_REMAP_YCC422_16bit;
-		else if (hdmi_data->enc_color_depth == 10)
+			break;
+		case 10:
 			remap_size = HDMI_VP_REMAP_YCC422_20bit;
-		else if (hdmi_data->enc_color_depth == 12)
+			break;
+		case 12:
 			remap_size = HDMI_VP_REMAP_YCC422_24bit;
-		else
+			break;
+
+		default:
 			return;
+		}
 		output_select = HDMI_VP_CONF_OUTPUT_SELECTOR_YCC422;
 	} else {
 		return;
@@ -1111,10 +1215,46 @@ static enum drm_connector_status dw_hdmi_phy_read_hpd(struct dw_hdmi *hdmi,
 		connector_status_connected : connector_status_disconnected;
 }
 
+static void dw_hdmi_phy_update_hpd(struct dw_hdmi *hdmi, void *data,
+				   bool force, bool disabled, bool rxsense)
+{
+	u8 old_mask = hdmi->phy_mask;
+
+	if (force || disabled || !rxsense)
+		hdmi->phy_mask |= HDMI_PHY_RX_SENSE;
+	else
+		hdmi->phy_mask &= ~HDMI_PHY_RX_SENSE;
+
+	if (old_mask != hdmi->phy_mask)
+		hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0);
+}
+
+static void dw_hdmi_phy_setup_hpd(struct dw_hdmi *hdmi, void *data)
+{
+	/*
+	 * Configure the PHY RX SENSE and HPD interrupts polarities and clear
+	 * any pending interrupt.
+	 */
+	hdmi_writeb(hdmi, HDMI_PHY_HPD | HDMI_PHY_RX_SENSE, HDMI_PHY_POL0);
+	hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE,
+		    HDMI_IH_PHY_STAT0);
+
+	/* Enable cable hot plug irq. */
+	hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0);
+
+	/* Clear and unmute interrupts. */
+	hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE,
+		    HDMI_IH_PHY_STAT0);
+	hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE),
+		    HDMI_IH_MUTE_PHY_STAT0);
+}
+
 static const struct dw_hdmi_phy_ops dw_hdmi_synopsys_phy_ops = {
 	.init = dw_hdmi_phy_init,
 	.disable = dw_hdmi_phy_disable,
 	.read_hpd = dw_hdmi_phy_read_hpd,
+	.update_hpd = dw_hdmi_phy_update_hpd,
+	.setup_hpd = dw_hdmi_phy_setup_hpd,
 };
 
 /* -----------------------------------------------------------------------------
@@ -1148,28 +1288,36 @@ static void hdmi_config_AVI(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	/* Initialise info frame from DRM mode */
 	drm_hdmi_avi_infoframe_from_display_mode(&frame, mode);
 
-	if (hdmi->hdmi_data.enc_out_format == YCBCR444)
+	if (hdmi_bus_fmt_is_yuv444(hdmi->hdmi_data.enc_out_bus_format))
 		frame.colorspace = HDMI_COLORSPACE_YUV444;
-	else if (hdmi->hdmi_data.enc_out_format == YCBCR422_8BITS)
+	else if (hdmi_bus_fmt_is_yuv422(hdmi->hdmi_data.enc_out_bus_format))
 		frame.colorspace = HDMI_COLORSPACE_YUV422;
 	else
 		frame.colorspace = HDMI_COLORSPACE_RGB;
 
 	/* Set up colorimetry */
-	if (hdmi->hdmi_data.enc_out_format == XVYCC444) {
-		frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
-		if (hdmi->hdmi_data.colorimetry == HDMI_COLORIMETRY_ITU_601)
-			frame.extended_colorimetry =
+	switch (hdmi->hdmi_data.enc_out_encoding) {
+	case V4L2_YCBCR_ENC_601:
+		if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV601)
+			frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+		else
+			frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+		frame.extended_colorimetry =
 				HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
-		else /*hdmi->hdmi_data.colorimetry == HDMI_COLORIMETRY_ITU_709*/
-			frame.extended_colorimetry =
+		break;
+	case V4L2_YCBCR_ENC_709:
+		if (hdmi->hdmi_data.enc_in_encoding == V4L2_YCBCR_ENC_XV709)
+			frame.colorimetry = HDMI_COLORIMETRY_EXTENDED;
+		else
+			frame.colorimetry = HDMI_COLORIMETRY_ITU_709;
+		frame.extended_colorimetry =
 				HDMI_EXTENDED_COLORIMETRY_XV_YCC_709;
-	} else if (hdmi->hdmi_data.enc_out_format != RGB) {
-		frame.colorimetry = hdmi->hdmi_data.colorimetry;
-		frame.extended_colorimetry = HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
-	} else { /* Carries no data */
-		frame.colorimetry = HDMI_COLORIMETRY_NONE;
-		frame.extended_colorimetry = HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+		break;
+	default: /* Carries no data */
+		frame.colorimetry = HDMI_COLORIMETRY_ITU_601;
+		frame.extended_colorimetry =
+				HDMI_EXTENDED_COLORIMETRY_XV_YCC_601;
+		break;
 	}
 
 	frame.scan_mode = HDMI_SCAN_MODE_NONE;
@@ -1498,19 +1646,30 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	    (hdmi->vic == 21) || (hdmi->vic == 22) ||
 	    (hdmi->vic == 2) || (hdmi->vic == 3) ||
 	    (hdmi->vic == 17) || (hdmi->vic == 18))
-		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_601;
+		hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_601;
 	else
-		hdmi->hdmi_data.colorimetry = HDMI_COLORIMETRY_ITU_709;
+		hdmi->hdmi_data.enc_out_encoding = V4L2_YCBCR_ENC_709;
 
 	hdmi->hdmi_data.video_mode.mpixelrepetitionoutput = 0;
 	hdmi->hdmi_data.video_mode.mpixelrepetitioninput = 0;
 
-	/* TODO: Get input format from IPU (via FB driver interface) */
-	hdmi->hdmi_data.enc_in_format = RGB;
+	/* TOFIX: Get input format from plat data or fallback to RGB888 */
+	if (hdmi->plat_data->input_bus_format)
+		hdmi->hdmi_data.enc_in_bus_format =
+			hdmi->plat_data->input_bus_format;
+	else
+		hdmi->hdmi_data.enc_in_bus_format = MEDIA_BUS_FMT_RGB888_1X24;
+
+	/* TOFIX: Get input encoding from plat data or fallback to none */
+	if (hdmi->plat_data->input_bus_encoding)
+		hdmi->hdmi_data.enc_in_encoding =
+			hdmi->plat_data->input_bus_encoding;
+	else
+		hdmi->hdmi_data.enc_in_encoding = V4L2_YCBCR_ENC_DEFAULT;
 
-	hdmi->hdmi_data.enc_out_format = RGB;
+	/* TOFIX: Default to RGB888 output format */
+	hdmi->hdmi_data.enc_out_bus_format = MEDIA_BUS_FMT_RGB888_1X24;
 
-	hdmi->hdmi_data.enc_color_depth = 8;
 	hdmi->hdmi_data.pix_repet_factor = 0;
 	hdmi->hdmi_data.hdcp_enable = 0;
 	hdmi->hdmi_data.video_mode.mdataenablepolarity = true;
@@ -1558,8 +1717,7 @@ static int dw_hdmi_setup(struct dw_hdmi *hdmi, struct drm_display_mode *mode)
 	return 0;
 }
 
-/* Wait until we are registered to enable interrupts */
-static int dw_hdmi_fb_registered(struct dw_hdmi *hdmi)
+static void dw_hdmi_setup_i2c(struct dw_hdmi *hdmi)
 {
 	hdmi_writeb(hdmi, HDMI_PHY_I2CM_INT_ADDR_DONE_POL,
 		    HDMI_PHY_I2CM_INT_ADDR);
@@ -1567,15 +1725,6 @@ static int dw_hdmi_fb_registered(struct dw_hdmi *hdmi)
 	hdmi_writeb(hdmi, HDMI_PHY_I2CM_CTLINT_ADDR_NAC_POL |
 		    HDMI_PHY_I2CM_CTLINT_ADDR_ARBITRATION_POL,
 		    HDMI_PHY_I2CM_CTLINT_ADDR);
-
-	/* enable cable hot plug irq */
-	hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0);
-
-	/* Clear Hotplug interrupts */
-	hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE,
-		    HDMI_IH_PHY_STAT0);
-
-	return 0;
 }
 
 static void initialize_hdmi_ih_mutes(struct dw_hdmi *hdmi)
@@ -1682,15 +1831,10 @@ static void dw_hdmi_update_power(struct dw_hdmi *hdmi)
  */
 static void dw_hdmi_update_phy_mask(struct dw_hdmi *hdmi)
 {
-	u8 old_mask = hdmi->phy_mask;
-
-	if (hdmi->force || hdmi->disabled || !hdmi->rxsense)
-		hdmi->phy_mask |= HDMI_PHY_RX_SENSE;
-	else
-		hdmi->phy_mask &= ~HDMI_PHY_RX_SENSE;
-
-	if (old_mask != hdmi->phy_mask)
-		hdmi_writeb(hdmi, hdmi->phy_mask, HDMI_PHY_MASK0);
+	if (hdmi->phy.ops->update_hpd)
+		hdmi->phy.ops->update_hpd(hdmi, hdmi->phy.data,
+					  hdmi->force, hdmi->disabled,
+					  hdmi->rxsense);
 }
 
 static enum drm_connector_status
@@ -1803,6 +1947,20 @@ static int dw_hdmi_bridge_attach(struct drm_bridge *bridge)
 	return 0;
 }
 
+static bool dw_hdmi_bridge_mode_fixup(struct drm_bridge *bridge,
+				      const struct drm_display_mode *orig_mode,
+				      struct drm_display_mode *mode)
+{
+	struct dw_hdmi *hdmi = bridge->driver_private;
+	struct drm_connector *connector = &hdmi->connector;
+	enum drm_mode_status status;
+
+	status = dw_hdmi_connector_mode_valid(connector, mode);
+	if (status != MODE_OK)
+		return false;
+	return true;
+}
+
 static void dw_hdmi_bridge_mode_set(struct drm_bridge *bridge,
 				    struct drm_display_mode *orig_mode,
 				    struct drm_display_mode *mode)
@@ -1844,6 +2002,7 @@ static const struct drm_bridge_funcs dw_hdmi_bridge_funcs = {
 	.enable = dw_hdmi_bridge_enable,
 	.disable = dw_hdmi_bridge_disable,
 	.mode_set = dw_hdmi_bridge_mode_set,
+	.mode_fixup = dw_hdmi_bridge_mode_fixup,
 };
 
 static irqreturn_t dw_hdmi_i2c_irq(struct dw_hdmi *hdmi)
@@ -1882,6 +2041,41 @@ static irqreturn_t dw_hdmi_hardirq(int irq, void *dev_id)
 	return ret;
 }
 
+void __dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense)
+{
+	mutex_lock(&hdmi->mutex);
+
+	if (!hdmi->force) {
+		/*
+		 * If the RX sense status indicates we're disconnected,
+		 * clear the software rxsense status.
+		 */
+		if (!rx_sense)
+			hdmi->rxsense = false;
+
+		/*
+		 * Only set the software rxsense status when both
+		 * rxsense and hpd indicates we're connected.
+		 * This avoids what seems to be bad behaviour in
+		 * at least iMX6S versions of the phy.
+		 */
+		if (hpd)
+			hdmi->rxsense = true;
+
+		dw_hdmi_update_power(hdmi);
+		dw_hdmi_update_phy_mask(hdmi);
+	}
+	mutex_unlock(&hdmi->mutex);
+}
+
+void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense)
+{
+	struct dw_hdmi *hdmi = dev_get_drvdata(dev);
+
+	__dw_hdmi_setup_rx_sense(hdmi, hpd, rx_sense);
+}
+EXPORT_SYMBOL_GPL(dw_hdmi_setup_rx_sense);
+
 static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
 {
 	struct dw_hdmi *hdmi = dev_id;
@@ -1914,30 +2108,10 @@ static irqreturn_t dw_hdmi_irq(int irq, void *dev_id)
 	 * ask the source to re-read the EDID.
 	 */
 	if (intr_stat &
-	    (HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD)) {
-		mutex_lock(&hdmi->mutex);
-		if (!hdmi->force) {
-			/*
-			 * If the RX sense status indicates we're disconnected,
-			 * clear the software rxsense status.
-			 */
-			if (!(phy_stat & HDMI_PHY_RX_SENSE))
-				hdmi->rxsense = false;
-
-			/*
-			 * Only set the software rxsense status when both
-			 * rxsense and hpd indicates we're connected.
-			 * This avoids what seems to be bad behaviour in
-			 * at least iMX6S versions of the phy.
-			 */
-			if (phy_stat & HDMI_PHY_HPD)
-				hdmi->rxsense = true;
-
-			dw_hdmi_update_power(hdmi);
-			dw_hdmi_update_phy_mask(hdmi);
-		}
-		mutex_unlock(&hdmi->mutex);
-	}
+	    (HDMI_IH_PHY_STAT0_RX_SENSE | HDMI_IH_PHY_STAT0_HPD))
+		__dw_hdmi_setup_rx_sense(hdmi,
+					 phy_stat & HDMI_PHY_HPD,
+					 phy_stat & HDMI_PHY_RX_SENSE);
 
 	if (intr_stat & HDMI_IH_PHY_STAT0_HPD) {
 		dev_dbg(hdmi->dev, "EVENT=%s\n",
@@ -2204,29 +2378,15 @@ __dw_hdmi_probe(struct platform_device *pdev,
 			hdmi->ddc = NULL;
 	}
 
-	/*
-	 * Configure registers related to HDMI interrupt
-	 * generation before registering IRQ.
-	 */
-	hdmi_writeb(hdmi, HDMI_PHY_HPD | HDMI_PHY_RX_SENSE, HDMI_PHY_POL0);
-
-	/* Clear Hotplug interrupts */
-	hdmi_writeb(hdmi, HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE,
-		    HDMI_IH_PHY_STAT0);
-
 	hdmi->bridge.driver_private = hdmi;
 	hdmi->bridge.funcs = &dw_hdmi_bridge_funcs;
 #ifdef CONFIG_OF
 	hdmi->bridge.of_node = pdev->dev.of_node;
 #endif
 
-	ret = dw_hdmi_fb_registered(hdmi);
-	if (ret)
-		goto err_iahb;
-
-	/* Unmute interrupts */
-	hdmi_writeb(hdmi, ~(HDMI_IH_PHY_STAT0_HPD | HDMI_IH_PHY_STAT0_RX_SENSE),
-		    HDMI_IH_MUTE_PHY_STAT0);
+	dw_hdmi_setup_i2c(hdmi);
+	if (hdmi->phy.ops->setup_hpd)
+		hdmi->phy.ops->setup_hpd(hdmi, hdmi->phy.data);
 
 	memset(&pdevinfo, 0, sizeof(pdevinfo));
 	pdevinfo.parent = dev;
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index de9ffb49e9f6..5c26488e7a2d 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -1244,7 +1244,6 @@ static const struct regmap_config tc_regmap_config = {
 static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 {
 	struct device *dev = &client->dev;
-	struct device_node *ep;
 	struct tc_data *tc;
 	int ret;
 
@@ -1255,29 +1254,9 @@ static int tc_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	tc->dev = dev;
 
 	/* port@2 is the output port */
-	ep = of_graph_get_endpoint_by_regs(dev->of_node, 2, -1);
-	if (ep) {
-		struct device_node *remote;
-
-		remote = of_graph_get_remote_port_parent(ep);
-		if (!remote) {
-			dev_warn(dev, "endpoint %s not connected\n",
-				 ep->full_name);
-			of_node_put(ep);
-			return -ENODEV;
-		}
-		of_node_put(ep);
-		tc->panel = of_drm_find_panel(remote);
-		if (tc->panel) {
-			dev_dbg(dev, "found panel %s\n", remote->full_name);
-		} else {
-			dev_dbg(dev, "waiting for panel %s\n",
-				remote->full_name);
-			of_node_put(remote);
-			return -EPROBE_DEFER;
-		}
-		of_node_put(remote);
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 2, 0, &tc->panel, NULL);
+	if (ret)
+		return ret;
 
 	/* Shut down GPIO is optional */
 	tc->sd_gpio = devm_gpiod_get_optional(dev, "shutdown", GPIOD_OUT_HIGH);
diff --git a/drivers/gpu/drm/bridge/ti-tfp410.c b/drivers/gpu/drm/bridge/ti-tfp410.c
index 7d519b46aee4..eee4efda829e 100644
--- a/drivers/gpu/drm/bridge/ti-tfp410.c
+++ b/drivers/gpu/drm/bridge/ti-tfp410.c
@@ -165,18 +165,13 @@ static irqreturn_t tfp410_hpd_irq_thread(int irq, void *arg)
 
 static int tfp410_get_connector_properties(struct tfp410 *dvi)
 {
-	struct device_node *ep = NULL, *connector_node = NULL;
-	struct device_node *ddc_phandle = NULL;
+	struct device_node *connector_node, *ddc_phandle;
 	int ret = 0;
 
 	/* port@1 is the connector node */
-	ep = of_graph_get_endpoint_by_regs(dvi->dev->of_node, 1, -1);
-	if (!ep)
-		goto fail;
-
-	connector_node = of_graph_get_remote_port_parent(ep);
+	connector_node = of_graph_get_remote_node(dvi->dev->of_node, 1, -1);
 	if (!connector_node)
-		goto fail;
+		return -ENODEV;
 
 	dvi->hpd = fwnode_get_named_gpiod(&connector_node->fwnode,
 					"hpd-gpios", 0, GPIOD_IN, "hpd");
@@ -199,10 +194,10 @@ static int tfp410_get_connector_properties(struct tfp410 *dvi)
 	else
 		ret = -EPROBE_DEFER;
 
+	of_node_put(ddc_phandle);
+
 fail:
-	of_node_put(ep);
 	of_node_put(connector_node);
-	of_node_put(ddc_phandle);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/cirrus/cirrus_mode.c b/drivers/gpu/drm/cirrus/cirrus_mode.c
index ed43ab10ac99..53f6f0f84206 100644
--- a/drivers/gpu/drm/cirrus/cirrus_mode.c
+++ b/drivers/gpu/drm/cirrus/cirrus_mode.c
@@ -327,7 +327,8 @@ static void cirrus_crtc_commit(struct drm_crtc *crtc)
  * but it's a requirement that we provide the function
  */
 static int cirrus_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				 u16 *blue, uint32_t size)
+				 u16 *blue, uint32_t size,
+				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct cirrus_crtc *cirrus_crtc = to_cirrus_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/cirrus/cirrus_ttm.c b/drivers/gpu/drm/cirrus/cirrus_ttm.c
index f53aa8f4a143..93dbcd38355d 100644
--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver cirrus_bo_driver = {
 	.verify_access = cirrus_bo_verify_access,
 	.io_mem_reserve = &cirrus_ttm_io_mem_reserve,
 	.io_mem_free = &cirrus_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int cirrus_mm_init(struct cirrus_device *cirrus)
diff --git a/drivers/gpu/drm/drm_atomic.c b/drivers/gpu/drm/drm_atomic.c
index 9b892af7811a..f32506a7c1d6 100644
--- a/drivers/gpu/drm/drm_atomic.c
+++ b/drivers/gpu/drm/drm_atomic.c
@@ -1516,19 +1516,9 @@ EXPORT_SYMBOL(drm_atomic_add_affected_planes);
 void drm_atomic_legacy_backoff(struct drm_atomic_state *state)
 {
 	struct drm_device *dev = state->dev;
-	unsigned crtc_mask = 0;
-	struct drm_crtc *crtc;
 	int ret;
 	bool global = false;
 
-	drm_for_each_crtc(crtc, dev) {
-		if (crtc->acquire_ctx != state->acquire_ctx)
-			continue;
-
-		crtc_mask |= drm_crtc_mask(crtc);
-		crtc->acquire_ctx = NULL;
-	}
-
 	if (WARN_ON(dev->mode_config.acquire_ctx == state->acquire_ctx)) {
 		global = true;
 
@@ -1542,10 +1532,6 @@ retry:
 	if (ret)
 		goto retry;
 
-	drm_for_each_crtc(crtc, dev)
-		if (drm_crtc_mask(crtc) & crtc_mask)
-			crtc->acquire_ctx = state->acquire_ctx;
-
 	if (global)
 		dev->mode_config.acquire_ctx = state->acquire_ctx;
 }
@@ -1690,22 +1676,8 @@ static void drm_atomic_print_state(const struct drm_atomic_state *state)
 		drm_atomic_connector_print_state(&p, connector_state);
 }
 
-/**
- * drm_state_dump - dump entire device atomic state
- * @dev: the drm device
- * @p: where to print the state to
- *
- * Just for debugging.  Drivers might want an option to dump state
- * to dmesg in case of error irq's.  (Hint, you probably want to
- * ratelimit this!)
- *
- * The caller must drm_modeset_lock_all(), or if this is called
- * from error irq handler, it should not be enabled by default.
- * (Ie. if you are debugging errors you might not care that this
- * is racey.  But calling this without all modeset locks held is
- * not inherently safe.)
- */
-void drm_state_dump(struct drm_device *dev, struct drm_printer *p)
+static void __drm_state_dump(struct drm_device *dev, struct drm_printer *p,
+			     bool take_locks)
 {
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_plane *plane;
@@ -1716,17 +1688,51 @@ void drm_state_dump(struct drm_device *dev, struct drm_printer *p)
 	if (!drm_core_check_feature(dev, DRIVER_ATOMIC))
 		return;
 
-	list_for_each_entry(plane, &config->plane_list, head)
+	list_for_each_entry(plane, &config->plane_list, head) {
+		if (take_locks)
+			drm_modeset_lock(&plane->mutex, NULL);
 		drm_atomic_plane_print_state(p, plane->state);
+		if (take_locks)
+			drm_modeset_unlock(&plane->mutex);
+	}
 
-	list_for_each_entry(crtc, &config->crtc_list, head)
+	list_for_each_entry(crtc, &config->crtc_list, head) {
+		if (take_locks)
+			drm_modeset_lock(&crtc->mutex, NULL);
 		drm_atomic_crtc_print_state(p, crtc->state);
+		if (take_locks)
+			drm_modeset_unlock(&crtc->mutex);
+	}
 
 	drm_connector_list_iter_begin(dev, &conn_iter);
+	if (take_locks)
+		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 	drm_for_each_connector_iter(connector, &conn_iter)
 		drm_atomic_connector_print_state(p, connector->state);
+	if (take_locks)
+		drm_modeset_unlock(&dev->mode_config.connection_mutex);
 	drm_connector_list_iter_end(&conn_iter);
 }
+
+/**
+ * drm_state_dump - dump entire device atomic state
+ * @dev: the drm device
+ * @p: where to print the state to
+ *
+ * Just for debugging.  Drivers might want an option to dump state
+ * to dmesg in case of error irq's.  (Hint, you probably want to
+ * ratelimit this!)
+ *
+ * The caller must drm_modeset_lock_all(), or if this is called
+ * from error irq handler, it should not be enabled by default.
+ * (Ie. if you are debugging errors you might not care that this
+ * is racey.  But calling this without all modeset locks held is
+ * not inherently safe.)
+ */
+void drm_state_dump(struct drm_device *dev, struct drm_printer *p)
+{
+	__drm_state_dump(dev, p, false);
+}
 EXPORT_SYMBOL(drm_state_dump);
 
 #ifdef CONFIG_DEBUG_FS
@@ -1736,9 +1742,7 @@ static int drm_state_info(struct seq_file *m, void *data)
 	struct drm_device *dev = node->minor->dev;
 	struct drm_printer p = drm_seq_file_printer(m);
 
-	drm_modeset_lock_all(dev);
-	drm_state_dump(dev, &p);
-	drm_modeset_unlock_all(dev);
+	__drm_state_dump(dev, &p, true);
 
 	return 0;
 }
@@ -2077,94 +2081,6 @@ static void complete_crtc_signaling(struct drm_device *dev,
 	kfree(fence_state);
 }
 
-int drm_atomic_remove_fb(struct drm_framebuffer *fb)
-{
-	struct drm_modeset_acquire_ctx ctx;
-	struct drm_device *dev = fb->dev;
-	struct drm_atomic_state *state;
-	struct drm_plane *plane;
-	struct drm_connector *conn;
-	struct drm_connector_state *conn_state;
-	int i, ret = 0;
-	unsigned plane_mask;
-
-	state = drm_atomic_state_alloc(dev);
-	if (!state)
-		return -ENOMEM;
-
-	drm_modeset_acquire_init(&ctx, 0);
-	state->acquire_ctx = &ctx;
-
-retry:
-	plane_mask = 0;
-	ret = drm_modeset_lock_all_ctx(dev, &ctx);
-	if (ret)
-		goto unlock;
-
-	drm_for_each_plane(plane, dev) {
-		struct drm_plane_state *plane_state;
-
-		if (plane->state->fb != fb)
-			continue;
-
-		plane_state = drm_atomic_get_plane_state(state, plane);
-		if (IS_ERR(plane_state)) {
-			ret = PTR_ERR(plane_state);
-			goto unlock;
-		}
-
-		if (plane_state->crtc->primary == plane) {
-			struct drm_crtc_state *crtc_state;
-
-			crtc_state = drm_atomic_get_existing_crtc_state(state, plane_state->crtc);
-
-			ret = drm_atomic_add_affected_connectors(state, plane_state->crtc);
-			if (ret)
-				goto unlock;
-
-			crtc_state->active = false;
-			ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL);
-			if (ret)
-				goto unlock;
-		}
-
-		drm_atomic_set_fb_for_plane(plane_state, NULL);
-		ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
-		if (ret)
-			goto unlock;
-
-		plane_mask |= BIT(drm_plane_index(plane));
-
-		plane->old_fb = plane->fb;
-	}
-
-	for_each_connector_in_state(state, conn, conn_state, i) {
-		ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
-
-		if (ret)
-			goto unlock;
-	}
-
-	if (plane_mask)
-		ret = drm_atomic_commit(state);
-
-unlock:
-	if (plane_mask)
-		drm_atomic_clean_old_fb(dev, plane_mask, ret);
-
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(&ctx);
-		goto retry;
-	}
-
-	drm_atomic_state_put(state);
-
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
-
-	return ret;
-}
-
 int drm_mode_atomic_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv)
 {
diff --git a/drivers/gpu/drm/drm_atomic_helper.c b/drivers/gpu/drm/drm_atomic_helper.c
index c3994b4d5f32..8be9719284b0 100644
--- a/drivers/gpu/drm/drm_atomic_helper.c
+++ b/drivers/gpu/drm/drm_atomic_helper.c
@@ -459,10 +459,20 @@ mode_fixup(struct drm_atomic_state *state)
  *
  * Check the state object to see if the requested state is physically possible.
  * This does all the crtc and connector related computations for an atomic
- * update and adds any additional connectors needed for full modesets and calls
- * down into &drm_crtc_helper_funcs.mode_fixup and
- * &drm_encoder_helper_funcs.mode_fixup or
- * &drm_encoder_helper_funcs.atomic_check functions of the driver backend.
+ * update and adds any additional connectors needed for full modesets. It calls
+ * the various per-object callbacks in the follow order:
+ *
+ * 1. &drm_connector_helper_funcs.atomic_best_encoder for determining the new encoder.
+ * 2. &drm_connector_helper_funcs.atomic_check to validate the connector state.
+ * 3. If it's determined a modeset is needed then all connectors on the affected crtc
+ *    crtc are added and &drm_connector_helper_funcs.atomic_check is run on them.
+ * 4. &drm_bridge_funcs.mode_fixup is called on all encoder bridges.
+ * 5. &drm_encoder_helper_funcs.atomic_check is called to validate any encoder state.
+ *    This function is only called when the encoder will be part of a configured crtc,
+ *    it must not be used for implementing connector property validation.
+ *    If this function is NULL, &drm_atomic_encoder_helper_funcs.mode_fixup is called
+ *    instead.
+ * 6. &drm_crtc_helper_funcs.mode_fixup is called last, to fix up the mode with crtc constraints.
  *
  * &drm_crtc_state.mode_changed is set when the input mode is changed.
  * &drm_crtc_state.connectors_changed is set when a connector is added or
@@ -492,8 +502,12 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 	struct drm_connector *connector;
 	struct drm_connector_state *old_connector_state, *new_connector_state;
 	int i, ret;
+	unsigned connectors_mask = 0;
 
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
+		bool has_connectors =
+			!!new_crtc_state->connector_mask;
+
 		if (!drm_mode_equal(&old_crtc_state->mode, &new_crtc_state->mode)) {
 			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] mode changed\n",
 					 crtc->base.id, crtc->name);
@@ -515,13 +529,28 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 			new_crtc_state->mode_changed = true;
 			new_crtc_state->connectors_changed = true;
 		}
+
+		if (old_crtc_state->active != new_crtc_state->active) {
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active changed\n",
+					 crtc->base.id, crtc->name);
+			new_crtc_state->active_changed = true;
+		}
+
+		if (new_crtc_state->enable != has_connectors) {
+			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled/connectors mismatch\n",
+					 crtc->base.id, crtc->name);
+
+			return -EINVAL;
+		}
 	}
 
-	ret = handle_conflicting_encoders(state, state->legacy_set_config);
+	ret = handle_conflicting_encoders(state, false);
 	if (ret)
 		return ret;
 
 	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
+		const struct drm_connector_helper_funcs *funcs = connector->helper_private;
+
 		/*
 		 * This only sets crtc->connectors_changed for routing changes,
 		 * drivers must set crtc->connectors_changed themselves when
@@ -539,6 +568,13 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 			    new_connector_state->link_status)
 				new_crtc_state->connectors_changed = true;
 		}
+
+		if (funcs->atomic_check)
+			ret = funcs->atomic_check(connector, new_connector_state);
+		if (ret)
+			return ret;
+
+		connectors_mask += BIT(i);
 	}
 
 	/*
@@ -548,20 +584,6 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 	 * crtc only changed its mode but has the same set of connectors.
 	 */
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
-		bool has_connectors =
-			!!new_crtc_state->connector_mask;
-
-		/*
-		 * We must set ->active_changed after walking connectors for
-		 * otherwise an update that only changes active would result in
-		 * a full modeset because update_connector_routing force that.
-		 */
-		if (old_crtc_state->active != new_crtc_state->active) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] active changed\n",
-					 crtc->base.id, crtc->name);
-			new_crtc_state->active_changed = true;
-		}
-
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state))
 			continue;
 
@@ -577,13 +599,22 @@ drm_atomic_helper_check_modeset(struct drm_device *dev,
 		ret = drm_atomic_add_affected_planes(state, crtc);
 		if (ret != 0)
 			return ret;
+	}
 
-		if (new_crtc_state->enable != has_connectors) {
-			DRM_DEBUG_ATOMIC("[CRTC:%d:%s] enabled/connectors mismatch\n",
-					 crtc->base.id, crtc->name);
+	/*
+	 * Iterate over all connectors again, to make sure atomic_check()
+	 * has been called on them when a modeset is forced.
+	 */
+	for_each_oldnew_connector_in_state(state, connector, old_connector_state, new_connector_state, i) {
+		const struct drm_connector_helper_funcs *funcs = connector->helper_private;
 
-			return -EINVAL;
-		}
+		if (connectors_mask & BIT(i))
+			continue;
+
+		if (funcs->atomic_check)
+			ret = funcs->atomic_check(connector, new_connector_state);
+		if (ret)
+			return ret;
 	}
 
 	return mode_fixup(state);
@@ -2289,12 +2320,15 @@ int drm_atomic_helper_set_config(struct drm_mode_set *set,
 	if (!state)
 		return -ENOMEM;
 
-	state->legacy_set_config = true;
 	state->acquire_ctx = ctx;
 	ret = __drm_atomic_helper_set_config(set, state);
 	if (ret != 0)
 		goto fail;
 
+	ret = handle_conflicting_encoders(state, true);
+	if (ret)
+		return ret;
+
 	ret = drm_atomic_commit(state);
 
 fail:
@@ -2622,14 +2656,22 @@ EXPORT_SYMBOL(drm_atomic_helper_commit_duplicated_state);
 int drm_atomic_helper_resume(struct drm_device *dev,
 			     struct drm_atomic_state *state)
 {
-	struct drm_mode_config *config = &dev->mode_config;
+	struct drm_modeset_acquire_ctx ctx;
 	int err;
 
 	drm_mode_config_reset(dev);
 
-	drm_modeset_lock_all(dev);
-	err = drm_atomic_helper_commit_duplicated_state(state, config->acquire_ctx);
-	drm_modeset_unlock_all(dev);
+	drm_modeset_acquire_init(&ctx, 0);
+	while (1) {
+		err = drm_atomic_helper_commit_duplicated_state(state, &ctx);
+		if (err != -EDEADLK)
+			break;
+
+		drm_modeset_backoff(&ctx);
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
 
 	return err;
 }
@@ -2975,7 +3017,7 @@ int drm_atomic_helper_connector_dpms(struct drm_connector *connector,
 	if (!state)
 		return -ENOMEM;
 
-	state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
+	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
 retry:
 	crtc_state = drm_atomic_get_crtc_state(state, crtc);
 	if (IS_ERR(crtc_state)) {
@@ -3471,6 +3513,7 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state);
  * @green: green correction table
  * @blue: green correction table
  * @size: size of the tables
+ * @ctx: lock acquire context
  *
  * Implements support for legacy gamma correction table for drivers
  * that support color management through the DEGAMMA_LUT/GAMMA_LUT
@@ -3478,7 +3521,8 @@ EXPORT_SYMBOL(drm_atomic_helper_connector_destroy_state);
  */
 int drm_atomic_helper_legacy_gamma_set(struct drm_crtc *crtc,
 				       u16 *red, u16 *green, u16 *blue,
-				       uint32_t size)
+				       uint32_t size,
+				       struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_mode_config *config = &dev->mode_config;
@@ -3509,8 +3553,7 @@ int drm_atomic_helper_legacy_gamma_set(struct drm_crtc *crtc,
 		blob_data[i].blue = blue[i];
 	}
 
-	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
-retry:
+	state->acquire_ctx = ctx;
 	crtc_state = drm_atomic_get_crtc_state(state, crtc);
 	if (IS_ERR(crtc_state)) {
 		ret = PTR_ERR(crtc_state);
@@ -3534,18 +3577,10 @@ retry:
 		goto fail;
 
 	ret = drm_atomic_commit(state);
-fail:
-	if (ret == -EDEADLK)
-		goto backoff;
 
+fail:
 	drm_atomic_state_put(state);
 	drm_property_blob_put(blob);
 	return ret;
-
-backoff:
-	drm_atomic_state_clear(state);
-	drm_atomic_legacy_backoff(state);
-
-	goto retry;
 }
 EXPORT_SYMBOL(drm_atomic_helper_legacy_gamma_set);
diff --git a/drivers/gpu/drm/drm_color_mgmt.c b/drivers/gpu/drm/drm_color_mgmt.c
index cc23b9a505c0..533f3a3e6877 100644
--- a/drivers/gpu/drm/drm_color_mgmt.c
+++ b/drivers/gpu/drm/drm_color_mgmt.c
@@ -218,28 +218,28 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	void *r_base, *g_base, *b_base;
 	int size;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
-	drm_modeset_lock_all(dev);
 	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
-	if (!crtc) {
-		ret = -ENOENT;
-		goto out;
-	}
+	if (!crtc)
+		return -ENOENT;
 
-	if (crtc->funcs->gamma_set == NULL) {
-		ret = -ENOSYS;
-		goto out;
-	}
+	if (crtc->funcs->gamma_set == NULL)
+		return -ENOSYS;
 
 	/* memcpy into gamma store */
-	if (crtc_lut->gamma_size != crtc->gamma_size) {
-		ret = -EINVAL;
+	if (crtc_lut->gamma_size != crtc->gamma_size)
+		return -EINVAL;
+
+	drm_modeset_acquire_init(&ctx, 0);
+retry:
+	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (ret)
 		goto out;
-	}
 
 	size = crtc_lut->gamma_size * (sizeof(uint16_t));
 	r_base = crtc->gamma_store;
@@ -260,10 +260,17 @@ int drm_mode_gamma_set_ioctl(struct drm_device *dev,
 		goto out;
 	}
 
-	ret = crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size);
+	ret = crtc->funcs->gamma_set(crtc, r_base, g_base, b_base,
+				     crtc->gamma_size, &ctx);
 
 out:
-	drm_modeset_unlock_all(dev);
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
 	return ret;
 
 }
@@ -295,19 +302,15 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
-	drm_modeset_lock_all(dev);
 	crtc = drm_crtc_find(dev, crtc_lut->crtc_id);
-	if (!crtc) {
-		ret = -ENOENT;
-		goto out;
-	}
+	if (!crtc)
+		return -ENOENT;
 
 	/* memcpy into gamma store */
-	if (crtc_lut->gamma_size != crtc->gamma_size) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (crtc_lut->gamma_size != crtc->gamma_size)
+		return -EINVAL;
 
+	drm_modeset_lock(&crtc->mutex, NULL);
 	size = crtc_lut->gamma_size * (sizeof(uint16_t));
 	r_base = crtc->gamma_store;
 	if (copy_to_user((void __user *)(unsigned long)crtc_lut->red, r_base, size)) {
@@ -327,6 +330,6 @@ int drm_mode_gamma_get_ioctl(struct drm_device *dev,
 		goto out;
 	}
 out:
-	drm_modeset_unlock_all(dev);
+	drm_modeset_unlock(&crtc->mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index d69e180fc563..5af25ce5bf7c 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -576,6 +576,7 @@ int drm_mode_setcrtc(struct drm_device *dev, void *data,
 	}
 	DRM_DEBUG_KMS("[CRTC:%d:%s]\n", crtc->base.id, crtc->name);
 
+	mutex_lock(&crtc->dev->mode_config.mutex);
 	drm_modeset_acquire_init(&ctx, 0);
 retry:
 	ret = drm_modeset_lock_all_ctx(crtc->dev, &ctx);
@@ -721,6 +722,7 @@ out:
 	}
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
+	mutex_unlock(&crtc->dev->mode_config.mutex);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h
index 8c04275cf226..d077c5490041 100644
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@@ -182,7 +182,6 @@ int drm_atomic_get_property(struct drm_mode_object *obj,
 			    struct drm_property *property, uint64_t *val);
 int drm_mode_atomic_ioctl(struct drm_device *dev,
 			  void *data, struct drm_file *file_priv);
-int drm_atomic_remove_fb(struct drm_framebuffer *fb);
 
 
 /* drm_plane.c */
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index e65becd964a1..a0ea3241c651 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -109,6 +109,42 @@ static DEFINE_MUTEX(kernel_fb_helper_lock);
 	for (({ lockdep_assert_held(&(fbh)->dev->mode_config.mutex); }), \
 	     i__ = 0; i__ < (fbh)->connector_count; i__++)
 
+int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper,
+				    struct drm_connector *connector)
+{
+	struct drm_fb_helper_connector *fb_conn;
+	struct drm_fb_helper_connector **temp;
+	unsigned int count;
+
+	if (!drm_fbdev_emulation)
+		return 0;
+
+	WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex));
+
+	count = fb_helper->connector_count + 1;
+
+	if (count > fb_helper->connector_info_alloc_count) {
+		size_t size = count * sizeof(fb_conn);
+
+		temp = krealloc(fb_helper->connector_info, size, GFP_KERNEL);
+		if (!temp)
+			return -ENOMEM;
+
+		fb_helper->connector_info_alloc_count = count;
+		fb_helper->connector_info = temp;
+	}
+
+	fb_conn = kzalloc(sizeof(*fb_conn), GFP_KERNEL);
+	if (!fb_conn)
+		return -ENOMEM;
+
+	drm_connector_get(connector);
+	fb_conn->connector = connector;
+	fb_helper->connector_info[fb_helper->connector_count++] = fb_conn;
+	return 0;
+}
+EXPORT_SYMBOL(drm_fb_helper_add_one_connector);
+
 /**
  * drm_fb_helper_single_add_all_connectors() - add all connectors to fbdev
  * 					       emulation helper
@@ -162,36 +198,6 @@ out:
 }
 EXPORT_SYMBOL(drm_fb_helper_single_add_all_connectors);
 
-int drm_fb_helper_add_one_connector(struct drm_fb_helper *fb_helper, struct drm_connector *connector)
-{
-	struct drm_fb_helper_connector **temp;
-	struct drm_fb_helper_connector *fb_helper_connector;
-
-	if (!drm_fbdev_emulation)
-		return 0;
-
-	WARN_ON(!mutex_is_locked(&fb_helper->dev->mode_config.mutex));
-	if (fb_helper->connector_count + 1 > fb_helper->connector_info_alloc_count) {
-		temp = krealloc(fb_helper->connector_info, sizeof(struct drm_fb_helper_connector *) * (fb_helper->connector_count + 1), GFP_KERNEL);
-		if (!temp)
-			return -ENOMEM;
-
-		fb_helper->connector_info_alloc_count = fb_helper->connector_count + 1;
-		fb_helper->connector_info = temp;
-	}
-
-
-	fb_helper_connector = kzalloc(sizeof(struct drm_fb_helper_connector), GFP_KERNEL);
-	if (!fb_helper_connector)
-		return -ENOMEM;
-
-	drm_connector_get(connector);
-	fb_helper_connector->connector = connector;
-	fb_helper->connector_info[fb_helper->connector_count++] = fb_helper_connector;
-	return 0;
-}
-EXPORT_SYMBOL(drm_fb_helper_add_one_connector);
-
 int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 				       struct drm_connector *connector)
 {
@@ -213,9 +219,9 @@ int drm_fb_helper_remove_one_connector(struct drm_fb_helper *fb_helper,
 	fb_helper_connector = fb_helper->connector_info[i];
 	drm_connector_put(fb_helper_connector->connector);
 
-	for (j = i + 1; j < fb_helper->connector_count; j++) {
+	for (j = i + 1; j < fb_helper->connector_count; j++)
 		fb_helper->connector_info[j - 1] = fb_helper->connector_info[j];
-	}
+
 	fb_helper->connector_count--;
 	kfree(fb_helper_connector);
 
@@ -250,7 +256,8 @@ static void drm_fb_helper_restore_lut_atomic(struct drm_crtc *crtc)
 	g_base = r_base + crtc->gamma_size;
 	b_base = g_base + crtc->gamma_size;
 
-	crtc->funcs->gamma_set(crtc, r_base, g_base, b_base, crtc->gamma_size);
+	crtc->funcs->gamma_set(crtc, r_base, g_base, b_base,
+			       crtc->gamma_size, NULL);
 }
 
 /**
@@ -275,6 +282,9 @@ int drm_fb_helper_debug_enter(struct fb_info *info)
 			if (funcs->mode_set_base_atomic == NULL)
 				continue;
 
+			if (drm_drv_uses_atomic_modeset(mode_set->crtc->dev))
+				continue;
+
 			drm_fb_helper_save_lut_atomic(mode_set->crtc, helper);
 			funcs->mode_set_base_atomic(mode_set->crtc,
 						    mode_set->fb,
@@ -316,6 +326,7 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 
 	for (i = 0; i < helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set = &helper->crtc_info[i].mode_set;
+
 		crtc = mode_set->crtc;
 		funcs = crtc->helper_private;
 		fb = drm_mode_config_fb(crtc);
@@ -331,6 +342,9 @@ int drm_fb_helper_debug_leave(struct fb_info *info)
 		if (funcs->mode_set_base_atomic == NULL)
 			continue;
 
+		if (drm_drv_uses_atomic_modeset(crtc->dev))
+			continue;
+
 		drm_fb_helper_restore_lut_atomic(mode_set->crtc);
 		funcs->mode_set_base_atomic(mode_set->crtc, fb, crtc->x,
 					    crtc->y, LEAVE_ATOMIC_MODE_SET);
@@ -346,7 +360,7 @@ static int restore_fbdev_mode_atomic(struct drm_fb_helper *fb_helper)
 	struct drm_plane *plane;
 	struct drm_atomic_state *state;
 	int i, ret;
-	unsigned plane_mask;
+	unsigned int plane_mask;
 
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
@@ -378,7 +392,7 @@ retry:
 			goto fail;
 	}
 
-	for(i = 0; i < fb_helper->crtc_count; i++) {
+	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set = &fb_helper->crtc_info[i].mode_set;
 
 		ret = __drm_atomic_helper_set_config(mode_set, state);
@@ -404,17 +418,12 @@ backoff:
 	goto retry;
 }
 
-static int restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+static int restore_fbdev_mode_legacy(struct drm_fb_helper *fb_helper)
 {
 	struct drm_device *dev = fb_helper->dev;
 	struct drm_plane *plane;
 	int i;
 
-	drm_warn_on_modeset_not_all_locked(dev);
-
-	if (drm_drv_uses_atomic_modeset(dev))
-		return restore_fbdev_mode_atomic(fb_helper);
-
 	drm_for_each_plane(plane, dev) {
 		if (plane->type != DRM_PLANE_TYPE_PRIMARY)
 			drm_plane_force_disable(plane);
@@ -448,6 +457,18 @@ static int restore_fbdev_mode(struct drm_fb_helper *fb_helper)
 	return 0;
 }
 
+static int restore_fbdev_mode(struct drm_fb_helper *fb_helper)
+{
+	struct drm_device *dev = fb_helper->dev;
+
+	drm_warn_on_modeset_not_all_locked(dev);
+
+	if (drm_drv_uses_atomic_modeset(dev))
+		return restore_fbdev_mode_atomic(fb_helper);
+	else
+		return restore_fbdev_mode_legacy(fb_helper);
+}
+
 /**
  * drm_fb_helper_restore_fbdev_mode_unlocked - restore fbdev configuration
  * @fb_helper: fbcon to restore
@@ -488,8 +509,10 @@ static bool drm_fb_helper_is_bound(struct drm_fb_helper *fb_helper)
 	struct drm_crtc *crtc;
 	int bound = 0, crtcs_bound = 0;
 
-	/* Sometimes user space wants everything disabled, so don't steal the
-	 * display if there's a master. */
+	/*
+	 * Sometimes user space wants everything disabled, so don't steal the
+	 * display if there's a master.
+	 */
 	if (READ_ONCE(dev->master))
 		return false;
 
@@ -537,6 +560,7 @@ static bool drm_fb_helper_force_kernel_mode(void)
 static void drm_fb_helper_restore_work_fn(struct work_struct *ignored)
 {
 	bool ret;
+
 	ret = drm_fb_helper_force_kernel_mode();
 	if (ret == true)
 		DRM_ERROR("Failed to restore crtc configuration\n");
@@ -870,9 +894,8 @@ void drm_fb_helper_fini(struct drm_fb_helper *fb_helper)
 	mutex_lock(&kernel_fb_helper_lock);
 	if (!list_empty(&fb_helper->kernel_fb_list)) {
 		list_del(&fb_helper->kernel_fb_list);
-		if (list_empty(&kernel_fb_helper_list)) {
+		if (list_empty(&kernel_fb_helper_list))
 			unregister_sysrq_key('v', &sysrq_drm_fb_helper_restore_op);
-		}
 	}
 	mutex_unlock(&kernel_fb_helper_lock);
 
@@ -1165,6 +1188,7 @@ static int setcolreg(struct drm_crtc *crtc, u16 red, u16 green,
 			(blue << info->var.blue.offset);
 		if (info->var.transp.length > 0) {
 			u32 mask = (1 << info->var.transp.length) - 1;
+
 			mask <<= info->var.transp.offset;
 			value |= mask;
 		}
@@ -1447,7 +1471,7 @@ static int pan_display_atomic(struct fb_var_screeninfo *var,
 	struct drm_atomic_state *state;
 	struct drm_plane *plane;
 	int i, ret;
-	unsigned plane_mask;
+	unsigned int plane_mask;
 
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
@@ -1456,7 +1480,7 @@ static int pan_display_atomic(struct fb_var_screeninfo *var,
 	state->acquire_ctx = dev->mode_config.acquire_ctx;
 retry:
 	plane_mask = 0;
-	for(i = 0; i < fb_helper->crtc_count; i++) {
+	for (i = 0; i < fb_helper->crtc_count; i++) {
 		struct drm_mode_set *mode_set;
 
 		mode_set = &fb_helper->crtc_info[i].mode_set;
@@ -1496,34 +1520,14 @@ backoff:
 	goto retry;
 }
 
-/**
- * drm_fb_helper_pan_display - implementation for &fb_ops.fb_pan_display
- * @var: updated screen information
- * @info: fbdev registered by the helper
- */
-int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+static int pan_display_legacy(struct fb_var_screeninfo *var,
 			      struct fb_info *info)
 {
 	struct drm_fb_helper *fb_helper = info->par;
-	struct drm_device *dev = fb_helper->dev;
 	struct drm_mode_set *modeset;
 	int ret = 0;
 	int i;
 
-	if (oops_in_progress)
-		return -EBUSY;
-
-	drm_modeset_lock_all(dev);
-	if (!drm_fb_helper_is_bound(fb_helper)) {
-		drm_modeset_unlock_all(dev);
-		return -EBUSY;
-	}
-
-	if (drm_drv_uses_atomic_modeset(dev)) {
-		ret = pan_display_atomic(var, info);
-		goto unlock;
-	}
-
 	for (i = 0; i < fb_helper->crtc_count; i++) {
 		modeset = &fb_helper->crtc_info[i].mode_set;
 
@@ -1538,8 +1542,37 @@ int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
 			}
 		}
 	}
-unlock:
+
+	return ret;
+}
+
+/**
+ * drm_fb_helper_pan_display - implementation for &fb_ops.fb_pan_display
+ * @var: updated screen information
+ * @info: fbdev registered by the helper
+ */
+int drm_fb_helper_pan_display(struct fb_var_screeninfo *var,
+			      struct fb_info *info)
+{
+	struct drm_fb_helper *fb_helper = info->par;
+	struct drm_device *dev = fb_helper->dev;
+	int ret;
+
+	if (oops_in_progress)
+		return -EBUSY;
+
+	drm_modeset_lock_all(dev);
+	if (!drm_fb_helper_is_bound(fb_helper)) {
+		drm_modeset_unlock_all(dev);
+		return -EBUSY;
+	}
+
+	if (drm_drv_uses_atomic_modeset(dev))
+		ret = pan_display_atomic(var, info);
+	else
+		ret = pan_display_legacy(var, info);
 	drm_modeset_unlock_all(dev);
+
 	return ret;
 }
 EXPORT_SYMBOL(drm_fb_helper_pan_display);
@@ -1561,11 +1594,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	memset(&sizes, 0, sizeof(struct drm_fb_helper_surface_size));
 	sizes.surface_depth = 24;
 	sizes.surface_bpp = 32;
-	sizes.fb_width = (unsigned)-1;
-	sizes.fb_height = (unsigned)-1;
+	sizes.fb_width = (u32)-1;
+	sizes.fb_height = (u32)-1;
 
-	/* if driver picks 8 or 16 by default use that
-	   for both depth/bpp */
+	/* if driver picks 8 or 16 by default use that for both depth/bpp */
 	if (preferred_bpp != sizes.surface_bpp)
 		sizes.surface_depth = sizes.surface_bpp = preferred_bpp;
 
@@ -1630,6 +1662,7 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 
 		for (j = 0; j < mode_set->num_connectors; j++) {
 			struct drm_connector *connector = mode_set->connectors[j];
+
 			if (connector->has_tile) {
 				lasth = (connector->tile_h_loc == (connector->num_h_tile - 1));
 				lastv = (connector->tile_v_loc == (connector->num_v_tile - 1));
@@ -1645,8 +1678,10 @@ static int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	}
 
 	if (crtc_count == 0 || sizes.fb_width == -1 || sizes.fb_height == -1) {
-		/* hmm everyone went away - assume VGA cable just fell out
-		   and will come back later. */
+		/*
+		 * hmm everyone went away - assume VGA cable just fell out
+		 * and will come back later.
+		 */
 		DRM_INFO("Cannot find any crtc or sizes - going 1024x768\n");
 		sizes.fb_width = sizes.surface_width = 1024;
 		sizes.fb_height = sizes.surface_height = 768;
@@ -1703,7 +1738,6 @@ void drm_fb_helper_fill_fix(struct fb_info *info, uint32_t pitch,
 	info->fix.accel = FB_ACCEL_NONE;
 
 	info->fix.line_length = pitch;
-	return;
 }
 EXPORT_SYMBOL(drm_fb_helper_fill_fix);
 
@@ -1725,6 +1759,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helpe
 			    uint32_t fb_width, uint32_t fb_height)
 {
 	struct drm_framebuffer *fb = fb_helper->fb;
+
 	info->pseudo_palette = fb_helper->pseudo_palette;
 	info->var.xres_virtual = fb->width;
 	info->var.yres_virtual = fb->height;
@@ -2057,13 +2092,15 @@ retry:
 				continue;
 
 		} else {
-			if (fb_helper_conn->connector->tile_h_loc != tile_pass -1 &&
+			if (fb_helper_conn->connector->tile_h_loc != tile_pass - 1 &&
 			    fb_helper_conn->connector->tile_v_loc != tile_pass - 1)
 			/* if this tile_pass doesn't cover any of the tiles - keep going */
 				continue;
 
-			/* find the tile offsets for this pass - need
-			   to find all tiles left and above */
+			/*
+			 * find the tile offsets for this pass - need to find
+			 * all tiles left and above
+			 */
 			drm_get_tile_offsets(fb_helper, modes, offsets,
 					     i, fb_helper_conn->connector->tile_h_loc, fb_helper_conn->connector->tile_v_loc);
 		}
@@ -2147,8 +2184,10 @@ static int drm_pick_crtcs(struct drm_fb_helper *fb_helper,
 	if (!encoder)
 		goto out;
 
-	/* select a crtc for this connector and then attempt to configure
-	   remaining connectors */
+	/*
+	 * select a crtc for this connector and then attempt to configure
+	 * remaining connectors
+	 */
 	for (c = 0; c < fb_helper->crtc_count; c++) {
 		crtc = &fb_helper->crtc_info[c];
 
diff --git a/drivers/gpu/drm/drm_framebuffer.c b/drivers/gpu/drm/drm_framebuffer.c
index e8f9c13a0afd..fc8ef42203ec 100644
--- a/drivers/gpu/drm/drm_framebuffer.c
+++ b/drivers/gpu/drm/drm_framebuffer.c
@@ -24,6 +24,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_auth.h>
 #include <drm/drm_framebuffer.h>
+#include <drm/drm_atomic.h>
 
 #include "drm_crtc_internal.h"
 
@@ -755,6 +756,117 @@ void drm_framebuffer_cleanup(struct drm_framebuffer *fb)
 }
 EXPORT_SYMBOL(drm_framebuffer_cleanup);
 
+static int atomic_remove_fb(struct drm_framebuffer *fb)
+{
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_device *dev = fb->dev;
+	struct drm_atomic_state *state;
+	struct drm_plane *plane;
+	struct drm_connector *conn;
+	struct drm_connector_state *conn_state;
+	int i, ret = 0;
+	unsigned plane_mask;
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state)
+		return -ENOMEM;
+
+	drm_modeset_acquire_init(&ctx, 0);
+	state->acquire_ctx = &ctx;
+
+retry:
+	plane_mask = 0;
+	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (ret)
+		goto unlock;
+
+	drm_for_each_plane(plane, dev) {
+		struct drm_plane_state *plane_state;
+
+		if (plane->state->fb != fb)
+			continue;
+
+		plane_state = drm_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state)) {
+			ret = PTR_ERR(plane_state);
+			goto unlock;
+		}
+
+		if (plane_state->crtc->primary == plane) {
+			struct drm_crtc_state *crtc_state;
+
+			crtc_state = drm_atomic_get_existing_crtc_state(state, plane_state->crtc);
+
+			ret = drm_atomic_add_affected_connectors(state, plane_state->crtc);
+			if (ret)
+				goto unlock;
+
+			crtc_state->active = false;
+			ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL);
+			if (ret)
+				goto unlock;
+		}
+
+		drm_atomic_set_fb_for_plane(plane_state, NULL);
+		ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
+		if (ret)
+			goto unlock;
+
+		plane_mask |= BIT(drm_plane_index(plane));
+
+		plane->old_fb = plane->fb;
+	}
+
+	for_each_connector_in_state(state, conn, conn_state, i) {
+		ret = drm_atomic_set_crtc_for_connector(conn_state, NULL);
+
+		if (ret)
+			goto unlock;
+	}
+
+	if (plane_mask)
+		ret = drm_atomic_commit(state);
+
+unlock:
+	if (plane_mask)
+		drm_atomic_clean_old_fb(dev, plane_mask, ret);
+
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	drm_atomic_state_put(state);
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
+	return ret;
+}
+
+static void legacy_remove_fb(struct drm_framebuffer *fb)
+{
+	struct drm_device *dev = fb->dev;
+	struct drm_crtc *crtc;
+	struct drm_plane *plane;
+
+	drm_modeset_lock_all(dev);
+	/* remove from any CRTC */
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->primary->fb == fb) {
+			/* should turn off the crtc */
+			if (drm_crtc_force_disable(crtc))
+				DRM_ERROR("failed to reset crtc %p when fb was deleted\n", crtc);
+		}
+	}
+
+	drm_for_each_plane(plane, dev) {
+		if (plane->fb == fb)
+			drm_plane_force_disable(plane);
+	}
+	drm_modeset_unlock_all(dev);
+}
+
 /**
  * drm_framebuffer_remove - remove and unreference a framebuffer object
  * @fb: framebuffer to remove
@@ -770,8 +882,6 @@ EXPORT_SYMBOL(drm_framebuffer_cleanup);
 void drm_framebuffer_remove(struct drm_framebuffer *fb)
 {
 	struct drm_device *dev;
-	struct drm_crtc *crtc;
-	struct drm_plane *plane;
 
 	if (!fb)
 		return;
@@ -797,29 +907,12 @@ void drm_framebuffer_remove(struct drm_framebuffer *fb)
 	 */
 	if (drm_framebuffer_read_refcount(fb) > 1) {
 		if (drm_drv_uses_atomic_modeset(dev)) {
-			int ret = drm_atomic_remove_fb(fb);
+			int ret = atomic_remove_fb(fb);
 			WARN(ret, "atomic remove_fb failed with %i\n", ret);
-			goto out;
-		}
-
-		drm_modeset_lock_all(dev);
-		/* remove from any CRTC */
-		drm_for_each_crtc(crtc, dev) {
-			if (crtc->primary->fb == fb) {
-				/* should turn off the crtc */
-				if (drm_crtc_force_disable(crtc))
-					DRM_ERROR("failed to reset crtc %p when fb was deleted\n", crtc);
-			}
-		}
-
-		drm_for_each_plane(plane, dev) {
-			if (plane->fb == fb)
-				drm_plane_force_disable(plane);
-		}
-		drm_modeset_unlock_all(dev);
+		} else
+			legacy_remove_fb(fb);
 	}
 
-out:
 	drm_framebuffer_put(fb);
 }
 EXPORT_SYMBOL(drm_framebuffer_remove);
diff --git a/drivers/gpu/drm/drm_ioc32.c b/drivers/gpu/drm/drm_ioc32.c
index b134482f4022..ae386783e3ea 100644
--- a/drivers/gpu/drm/drm_ioc32.c
+++ b/drivers/gpu/drm/drm_ioc32.c
@@ -1,4 +1,4 @@
-/**
+/*
  * \file drm_ioc32.c
  *
  * 32-bit ioctl compatibility routines for the DRM.
@@ -72,15 +72,15 @@
 #define DRM_IOCTL_MODE_ADDFB232		DRM_IOWR(0xb8, drm_mode_fb_cmd232_t)
 
 typedef struct drm_version_32 {
-	int version_major;	  /**< Major version */
-	int version_minor;	  /**< Minor version */
-	int version_patchlevel;	   /**< Patch level */
-	u32 name_len;		  /**< Length of name buffer */
-	u32 name;		  /**< Name of driver */
-	u32 date_len;		  /**< Length of date buffer */
-	u32 date;		  /**< User-space buffer to hold date */
-	u32 desc_len;		  /**< Length of desc buffer */
-	u32 desc;		  /**< User-space buffer to hold desc */
+	int version_major;	  /* Major version */
+	int version_minor;	  /* Minor version */
+	int version_patchlevel;	   /* Patch level */
+	u32 name_len;		  /* Length of name buffer */
+	u32 name;		  /* Name of driver */
+	u32 date_len;		  /* Length of date buffer */
+	u32 date;		  /* User-space buffer to hold date */
+	u32 desc_len;		  /* Length of desc buffer */
+	u32 desc;		  /* User-space buffer to hold desc */
 } drm_version32_t;
 
 static int compat_drm_version(struct file *file, unsigned int cmd,
@@ -126,8 +126,8 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
 }
 
 typedef struct drm_unique32 {
-	u32 unique_len;	/**< Length of unique */
-	u32 unique;	/**< Unique name for driver instantiation */
+	u32 unique_len;	/* Length of unique */
+	u32 unique;	/* Unique name for driver instantiation */
 } drm_unique32_t;
 
 static int compat_drm_getunique(struct file *file, unsigned int cmd,
@@ -180,12 +180,12 @@ static int compat_drm_setunique(struct file *file, unsigned int cmd,
 }
 
 typedef struct drm_map32 {
-	u32 offset;		/**< Requested physical address (0 for SAREA)*/
-	u32 size;		/**< Requested physical size (bytes) */
-	enum drm_map_type type;	/**< Type of memory to map */
-	enum drm_map_flags flags;	/**< Flags */
-	u32 handle;		/**< User-space: "Handle" to pass to mmap() */
-	int mtrr;		/**< MTRR slot used */
+	u32 offset;		/* Requested physical address (0 for SAREA) */
+	u32 size;		/* Requested physical size (bytes) */
+	enum drm_map_type type;	/* Type of memory to map */
+	enum drm_map_flags flags;	/* Flags */
+	u32 handle;		/* User-space: "Handle" to pass to mmap() */
+	int mtrr;		/* MTRR slot used */
 } drm_map32_t;
 
 static int compat_drm_getmap(struct file *file, unsigned int cmd,
@@ -286,12 +286,12 @@ static int compat_drm_rmmap(struct file *file, unsigned int cmd,
 }
 
 typedef struct drm_client32 {
-	int idx;	/**< Which client desired? */
-	int auth;	/**< Is client authenticated? */
-	u32 pid;	/**< Process ID */
-	u32 uid;	/**< User ID */
-	u32 magic;	/**< Magic */
-	u32 iocs;	/**< Ioctl count */
+	int idx;	/* Which client desired? */
+	int auth;	/* Is client authenticated? */
+	u32 pid;	/* Process ID */
+	u32 uid;	/* User ID */
+	u32 magic;	/* Magic */
+	u32 iocs;	/* Ioctl count */
 } drm_client32_t;
 
 static int compat_drm_getclient(struct file *file, unsigned int cmd,
@@ -366,12 +366,12 @@ static int compat_drm_getstats(struct file *file, unsigned int cmd,
 }
 
 typedef struct drm_buf_desc32 {
-	int count;		 /**< Number of buffers of this size */
-	int size;		 /**< Size in bytes */
-	int low_mark;		 /**< Low water mark */
-	int high_mark;		 /**< High water mark */
+	int count;		 /* Number of buffers of this size */
+	int size;		 /* Size in bytes */
+	int low_mark;		 /* Low water mark */
+	int high_mark;		 /* High water mark */
 	int flags;
-	u32 agp_start;		 /**< Start address in the AGP aperture */
+	u32 agp_start;		 /* Start address in the AGP aperture */
 } drm_buf_desc32_t;
 
 static int compat_drm_addbufs(struct file *file, unsigned int cmd,
@@ -1111,13 +1111,18 @@ static drm_ioctl_compat_t *drm_compat_ioctls[] = {
 };
 
 /**
- * Called whenever a 32-bit process running under a 64-bit kernel
- * performs an ioctl on /dev/drm.
+ * drm_compat_ioctl - 32bit IOCTL compatibility handler for DRM drivers
+ * @filp: file this ioctl is called on
+ * @cmd: ioctl cmd number
+ * @arg: user argument
+ *
+ * Compatibility handler for 32 bit userspace running on 64 kernels. All actual
+ * IOCTL handling is forwarded to drm_ioctl(), while marshalling structures as
+ * appropriate. Note that this only handles DRM core IOCTLs, if the driver has
+ * botched IOCTL itself, it must handle those by wrapping this function.
  *
- * \param file_priv DRM file private.
- * \param cmd command.
- * \param arg user argument.
- * \return zero on success or negative number on failure.
+ * Returns:
+ * Zero on success, negative error code on failure.
  */
 long drm_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 {
@@ -1141,5 +1146,4 @@ long drm_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
 
 	return ret;
 }
-
 EXPORT_SYMBOL(drm_compat_ioctl);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 7d6deaa91281..865e3ee4d743 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -286,6 +286,9 @@ static int drm_getcap(struct drm_device *dev, void *data, struct drm_file *file_
 	case DRM_CAP_ADDFB2_MODIFIERS:
 		req->value = dev->mode_config.allow_fb_modifiers;
 		break;
+	case DRM_CAP_CRTC_IN_VBLANK_EVENT:
+		req->value = 1;
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -647,13 +650,59 @@ static const struct drm_ioctl_desc drm_ioctls[] = {
 #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls )
 
 /**
+ * DOC: driver specific ioctls
+ *
+ * First things first, driver private IOCTLs should only be needed for drivers
+ * supporting rendering. Kernel modesetting is all standardized, and extended
+ * through properties. There are a few exceptions in some existing drivers,
+ * which define IOCTL for use by the display DRM master, but they all predate
+ * properties.
+ *
+ * Now if you do have a render driver you always have to support it through
+ * driver private properties. There's a few steps needed to wire all the things
+ * up.
+ *
+ * First you need to define the structure for your IOCTL in your driver private
+ * UAPI header in ``include/uapi/drm/my_driver_drm.h``::
+ *
+ *     struct my_driver_operation {
+ *             u32 some_thing;
+ *             u32 another_thing;
+ *     };
+ *
+ * Please make sure that you follow all the best practices from
+ * ``Documentation/ioctl/botching-up-ioctls.txt``. Note that drm_ioctl()
+ * automatically zero-extends structures, hence make sure you can add more stuff
+ * at the end, i.e. don't put a variable sized array there.
+ *
+ * Then you need to define your IOCTL number, using one of DRM_IO(), DRM_IOR(),
+ * DRM_IOW() or DRM_IOWR(). It must start with the DRM_IOCTL\_ prefix::
+ *
+ *     ##define DRM_IOCTL_MY_DRIVER_OPERATION \
+ *         DRM_IOW(DRM_COMMAND_BASE, struct my_driver_operation)
+ * 
+ * DRM driver private IOCTL must be in the range from DRM_COMMAND_BASE to
+ * DRM_COMMAND_END. Finally you need an array of &struct drm_ioctl_desc to wire
+ * up the handlers and set the access rights:
+ *
+ *     static const struct drm_ioctl_desc my_driver_ioctls[] = {
+ *         DRM_IOCTL_DEF_DRV(MY_DRIVER_OPERATION, my_driver_operation,
+ *                 DRM_AUTH|DRM_RENDER_ALLOW),
+ *     };
+ *
+ * And then assign this to the &drm_driver.ioctls field in your driver
+ * structure.
+ */
+
+/**
  * drm_ioctl - ioctl callback implementation for DRM drivers
  * @filp: file this ioctl is called on
  * @cmd: ioctl cmd number
  * @arg: user argument
  *
- * Looks up the ioctl function in the ::ioctls table, checking for root
- * previleges if so required, and dispatches to the respective function.
+ * Looks up the ioctl function in the DRM core and the driver dispatch table,
+ * stored in &drm_driver.ioctls. It checks for necessary permission by calling
+ * drm_ioctl_permit(), and dispatches to the respective function.
  *
  * Returns:
  * Zero on success, negative error code on failure.
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index dac1b2593cb1..8c866cac62dd 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -1026,6 +1026,7 @@ void drm_crtc_arm_vblank_event(struct drm_crtc *crtc,
 
 	e->pipe = pipe;
 	e->event.sequence = drm_vblank_count(dev, pipe);
+	e->event.crtc_id = crtc->base.id;
 	list_add_tail(&e->base.link, &dev->vblank_event_list);
 }
 EXPORT_SYMBOL(drm_crtc_arm_vblank_event);
@@ -1056,6 +1057,7 @@ void drm_crtc_send_vblank_event(struct drm_crtc *crtc,
 		now = get_drm_timestamp();
 	}
 	e->pipe = pipe;
+	e->event.crtc_id = crtc->base.id;
 	send_vblank_event(dev, e, seq, &now);
 }
 EXPORT_SYMBOL(drm_crtc_send_vblank_event);
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index bf60f2645e55..64ef09a6cccb 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -149,108 +149,6 @@ void drm_modeset_unlock_all(struct drm_device *dev)
 EXPORT_SYMBOL(drm_modeset_unlock_all);
 
 /**
- * drm_modeset_lock_crtc - lock crtc with hidden acquire ctx for a plane update
- * @crtc: DRM CRTC
- * @plane: DRM plane to be updated on @crtc
- *
- * This function locks the given crtc and plane (which should be either the
- * primary or cursor plane) using a hidden acquire context. This is necessary so
- * that drivers internally using the atomic interfaces can grab further locks
- * with the lock acquire context.
- *
- * Note that @plane can be NULL, e.g. when the cursor support hasn't yet been
- * converted to universal planes yet.
- */
-void drm_modeset_lock_crtc(struct drm_crtc *crtc,
-			   struct drm_plane *plane)
-{
-	struct drm_modeset_acquire_ctx *ctx;
-	int ret;
-
-	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
-	if (WARN_ON(!ctx))
-		return;
-
-	drm_modeset_acquire_init(ctx, 0);
-
-retry:
-	ret = drm_modeset_lock(&crtc->mutex, ctx);
-	if (ret)
-		goto fail;
-
-	if (plane) {
-		ret = drm_modeset_lock(&plane->mutex, ctx);
-		if (ret)
-			goto fail;
-
-		if (plane->crtc) {
-			ret = drm_modeset_lock(&plane->crtc->mutex, ctx);
-			if (ret)
-				goto fail;
-		}
-	}
-
-	WARN_ON(crtc->acquire_ctx);
-
-	/* now we hold the locks, so now that it is safe, stash the
-	 * ctx for drm_modeset_unlock_crtc():
-	 */
-	crtc->acquire_ctx = ctx;
-
-	return;
-
-fail:
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(ctx);
-		goto retry;
-	}
-}
-EXPORT_SYMBOL(drm_modeset_lock_crtc);
-
-/**
- * drm_modeset_legacy_acquire_ctx - find acquire ctx for legacy ioctls
- * @crtc: drm crtc
- *
- * Legacy ioctl operations like cursor updates or page flips only have per-crtc
- * locking, and store the acquire ctx in the corresponding crtc. All other
- * legacy operations take all locks and use a global acquire context. This
- * function grabs the right one.
- */
-struct drm_modeset_acquire_ctx *
-drm_modeset_legacy_acquire_ctx(struct drm_crtc *crtc)
-{
-	if (crtc->acquire_ctx)
-		return crtc->acquire_ctx;
-
-	WARN_ON(!crtc->dev->mode_config.acquire_ctx);
-
-	return crtc->dev->mode_config.acquire_ctx;
-}
-EXPORT_SYMBOL(drm_modeset_legacy_acquire_ctx);
-
-/**
- * drm_modeset_unlock_crtc - drop crtc lock
- * @crtc: drm crtc
- *
- * This drops the crtc lock acquire with drm_modeset_lock_crtc() and all other
- * locks acquired through the hidden context.
- */
-void drm_modeset_unlock_crtc(struct drm_crtc *crtc)
-{
-	struct drm_modeset_acquire_ctx *ctx = crtc->acquire_ctx;
-
-	if (WARN_ON(!ctx))
-		return;
-
-	crtc->acquire_ctx = NULL;
-	drm_modeset_drop_locks(ctx);
-	drm_modeset_acquire_fini(ctx);
-
-	kfree(ctx);
-}
-EXPORT_SYMBOL(drm_modeset_unlock_crtc);
-
-/**
  * drm_warn_on_modeset_not_all_locked - check that all modeset locks are locked
  * @dev: device
  *
diff --git a/drivers/gpu/drm/drm_of.c b/drivers/gpu/drm/drm_of.c
index b5f2f0fece99..2120f33bdf4a 100644
--- a/drivers/gpu/drm/drm_of.c
+++ b/drivers/gpu/drm/drm_of.c
@@ -3,8 +3,10 @@
 #include <linux/list.h>
 #include <linux/of_graph.h>
 #include <drm/drmP.h>
+#include <drm/drm_bridge.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_encoder.h>
+#include <drm/drm_panel.h>
 #include <drm/drm_of.h>
 
 static void drm_release_of(struct device *dev, void *data)
@@ -208,3 +210,53 @@ int drm_of_encoder_active_endpoint(struct device_node *node,
 	return -EINVAL;
 }
 EXPORT_SYMBOL_GPL(drm_of_encoder_active_endpoint);
+
+/*
+ * drm_of_find_panel_or_bridge - return connected panel or bridge device
+ * @np: device tree node containing encoder output ports
+ * @panel: pointer to hold returned drm_panel
+ * @bridge: pointer to hold returned drm_bridge
+ *
+ * Given a DT node's port and endpoint number, find the connected node and
+ * return either the associated struct drm_panel or drm_bridge device. Either
+ * @panel or @bridge must not be NULL.
+ *
+ * Returns zero if successful, or one of the standard error codes if it fails.
+ */
+int drm_of_find_panel_or_bridge(const struct device_node *np,
+				int port, int endpoint,
+				struct drm_panel **panel,
+				struct drm_bridge **bridge)
+{
+	int ret = -EPROBE_DEFER;
+	struct device_node *remote;
+
+	if (!panel && !bridge)
+		return -EINVAL;
+
+	remote = of_graph_get_remote_node(np, port, endpoint);
+	if (!remote)
+		return -ENODEV;
+
+	if (panel) {
+		*panel = of_drm_find_panel(remote);
+		if (*panel)
+			ret = 0;
+	}
+
+	/* No panel found yet, check for a bridge next. */
+	if (bridge) {
+		if (ret) {
+			*bridge = of_drm_find_bridge(remote);
+			if (*bridge)
+				ret = 0;
+		} else {
+			*bridge = NULL;
+		}
+
+	}
+
+	of_node_put(remote);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(drm_of_find_panel_or_bridge);
diff --git a/drivers/gpu/drm/drm_plane.c b/drivers/gpu/drm/drm_plane.c
index bc71aa2b7872..fedd4d60d9cd 100644
--- a/drivers/gpu/drm/drm_plane.c
+++ b/drivers/gpu/drm/drm_plane.c
@@ -620,7 +620,8 @@ int drm_mode_setplane(struct drm_device *dev, void *data,
 
 static int drm_mode_cursor_universal(struct drm_crtc *crtc,
 				     struct drm_mode_cursor2 *req,
-				     struct drm_file *file_priv)
+				     struct drm_file *file_priv,
+				     struct drm_modeset_acquire_ctx *ctx)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_framebuffer *fb = NULL;
@@ -634,21 +635,11 @@ static int drm_mode_cursor_universal(struct drm_crtc *crtc,
 	int32_t crtc_x, crtc_y;
 	uint32_t crtc_w = 0, crtc_h = 0;
 	uint32_t src_w = 0, src_h = 0;
-	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
 	BUG_ON(!crtc->cursor);
 	WARN_ON(crtc->cursor->crtc != crtc && crtc->cursor->crtc != NULL);
 
-	drm_modeset_acquire_init(&ctx, 0);
-retry:
-	ret = drm_modeset_lock(&crtc->mutex, &ctx);
-	if (ret)
-		goto fail;
-	ret = drm_modeset_lock(&crtc->cursor->mutex, &ctx);
-	if (ret)
-		goto fail;
-
 	/*
 	 * Obtain fb we'll be using (either new or existing) and take an extra
 	 * reference to it if fb != null.  setplane will take care of dropping
@@ -693,7 +684,7 @@ retry:
 	 */
 	ret = __setplane_internal(crtc->cursor, crtc, fb,
 				crtc_x, crtc_y, crtc_w, crtc_h,
-				0, 0, src_w, src_h, &ctx);
+				0, 0, src_w, src_h, ctx);
 
 	/* Update successful; save new cursor position, if necessary */
 	if (ret == 0 && req->flags & DRM_MODE_CURSOR_MOVE) {
@@ -701,15 +692,6 @@ retry:
 		crtc->cursor_y = req->y;
 	}
 
-fail:
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(&ctx);
-		goto retry;
-	}
-
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
-
 	return ret;
 }
 
@@ -718,6 +700,7 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 				  struct drm_file *file_priv)
 {
 	struct drm_crtc *crtc;
+	struct drm_modeset_acquire_ctx ctx;
 	int ret = 0;
 
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
@@ -732,14 +715,24 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 		return -ENOENT;
 	}
 
+	drm_modeset_acquire_init(&ctx, 0);
+retry:
+	ret = drm_modeset_lock(&crtc->mutex, &ctx);
+	if (ret)
+		goto out;
 	/*
 	 * If this crtc has a universal cursor plane, call that plane's update
 	 * handler rather than using legacy cursor handlers.
 	 */
-	if (crtc->cursor)
-		return drm_mode_cursor_universal(crtc, req, file_priv);
+	if (crtc->cursor) {
+		ret = drm_modeset_lock(&crtc->cursor->mutex, &ctx);
+		if (ret)
+			goto out;
+
+		ret = drm_mode_cursor_universal(crtc, req, file_priv, &ctx);
+		goto out;
+	}
 
-	drm_modeset_lock_crtc(crtc, crtc->cursor);
 	if (req->flags & DRM_MODE_CURSOR_BO) {
 		if (!crtc->funcs->cursor_set && !crtc->funcs->cursor_set2) {
 			ret = -ENXIO;
@@ -763,7 +756,13 @@ static int drm_mode_cursor_common(struct drm_device *dev,
 		}
 	}
 out:
-	drm_modeset_unlock_crtc(crtc);
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
 
 	return ret;
 
diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c
index 9fb65b736a90..954eb848b5e2 100644
--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@@ -403,10 +403,10 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 	.map_dma_buf = drm_gem_map_dma_buf,
 	.unmap_dma_buf = drm_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
-	.kmap = drm_gem_dmabuf_kmap,
-	.kmap_atomic = drm_gem_dmabuf_kmap_atomic,
-	.kunmap = drm_gem_dmabuf_kunmap,
-	.kunmap_atomic = drm_gem_dmabuf_kunmap_atomic,
+	.map = drm_gem_dmabuf_kmap,
+	.map_atomic = drm_gem_dmabuf_kmap_atomic,
+	.unmap = drm_gem_dmabuf_kunmap,
+	.unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
 	.mmap = drm_gem_dmabuf_mmap,
 	.vmap = drm_gem_dmabuf_vmap,
 	.vunmap = drm_gem_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c
index 85005d57bde6..1b0c14ab3fff 100644
--- a/drivers/gpu/drm/drm_probe_helper.c
+++ b/drivers/gpu/drm/drm_probe_helper.c
@@ -44,7 +44,7 @@
  *
  * This library provides some helper code for output probing. It provides an
  * implementation of the core &drm_connector_funcs.fill_modes interface with
- * drm_helper_probe_single_connector_modes.
+ * drm_helper_probe_single_connector_modes().
  *
  * It also provides support for polling connectors with a work item and for
  * generic hotplug interrupt handling where the driver doesn't or cannot keep
@@ -169,12 +169,73 @@ void drm_kms_helper_poll_enable(struct drm_device *dev)
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);
 
 static enum drm_connector_status
-drm_connector_detect(struct drm_connector *connector, bool force)
+drm_helper_probe_detect_ctx(struct drm_connector *connector, bool force)
 {
-	return connector->funcs->detect ?
-		connector->funcs->detect(connector, force) :
-		connector_status_connected;
+	const struct drm_connector_helper_funcs *funcs = connector->helper_private;
+	struct drm_modeset_acquire_ctx ctx;
+	int ret;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+retry:
+	ret = drm_modeset_lock(&connector->dev->mode_config.connection_mutex, &ctx);
+	if (!ret) {
+		if (funcs->detect_ctx)
+			ret = funcs->detect_ctx(connector, &ctx, force);
+		else if (connector->funcs->detect)
+			ret = connector->funcs->detect(connector, force);
+		else
+			ret = connector_status_connected;
+	}
+
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	if (WARN_ON(ret < 0))
+		ret = connector_status_unknown;
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
+	return ret;
+}
+
+/**
+ * drm_helper_probe_detect - probe connector status
+ * @connector: connector to probe
+ * @ctx: acquire_ctx, or NULL to let this function handle locking.
+ * @force: Whether destructive probe operations should be performed.
+ *
+ * This function calls the detect callbacks of the connector.
+ * This function returns &drm_connector_status, or
+ * if @ctx is set, it might also return -EDEADLK.
+ */
+int
+drm_helper_probe_detect(struct drm_connector *connector,
+			struct drm_modeset_acquire_ctx *ctx,
+			bool force)
+{
+	const struct drm_connector_helper_funcs *funcs = connector->helper_private;
+	struct drm_device *dev = connector->dev;
+	int ret;
+
+	if (!ctx)
+		return drm_helper_probe_detect_ctx(connector, force);
+
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex, ctx);
+	if (ret)
+		return ret;
+
+	if (funcs->detect_ctx)
+		return funcs->detect_ctx(connector, ctx, force);
+	else if (connector->funcs->detect)
+		return connector->funcs->detect(connector, force);
+	else
+		return connector_status_connected;
 }
+EXPORT_SYMBOL(drm_helper_probe_detect);
 
 /**
  * drm_helper_probe_single_connector_modes - get complete set of display modes
@@ -239,15 +300,27 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	struct drm_display_mode *mode;
 	const struct drm_connector_helper_funcs *connector_funcs =
 		connector->helper_private;
-	int count = 0;
+	int count = 0, ret;
 	int mode_flags = 0;
 	bool verbose_prune = true;
 	enum drm_connector_status old_status;
+	struct drm_modeset_acquire_ctx ctx;
 
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 
+	drm_modeset_acquire_init(&ctx, 0);
+
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
 			connector->name);
+
+retry:
+	ret = drm_modeset_lock(&dev->mode_config.connection_mutex, &ctx);
+	if (ret == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	} else
+		WARN_ON(ret < 0);
+
 	/* set all old modes to the stale state */
 	list_for_each_entry(mode, &connector->modes, head)
 		mode->status = MODE_STALE;
@@ -263,7 +336,15 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 		if (connector->funcs->force)
 			connector->funcs->force(connector);
 	} else {
-		connector->status = drm_connector_detect(connector, true);
+		ret = drm_helper_probe_detect(connector, &ctx, true);
+
+		if (ret == -EDEADLK) {
+			drm_modeset_backoff(&ctx);
+			goto retry;
+		} else if (WARN(ret < 0, "Invalid return value %i for connector detection\n", ret))
+			ret = connector_status_unknown;
+
+		connector->status = ret;
 	}
 
 	/*
@@ -355,6 +436,9 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 prune:
 	drm_mode_prune_invalid(dev, &connector->modes, verbose_prune);
 
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+
 	if (list_empty(&connector->modes))
 		return 0;
 
@@ -440,7 +524,7 @@ static void output_poll_execute(struct work_struct *work)
 
 		repoll = true;
 
-		connector->status = drm_connector_detect(connector, false);
+		connector->status = drm_helper_probe_detect(connector, NULL, false);
 		if (old_status != connector->status) {
 			const char *old, *new;
 
@@ -588,7 +672,7 @@ bool drm_helper_hpd_irq_event(struct drm_device *dev)
 
 		old_status = connector->status;
 
-		connector->status = drm_connector_detect(connector, false);
+		connector->status = drm_helper_probe_detect(connector, NULL, false);
 		DRM_DEBUG_KMS("[CONNECTOR:%d:%s] status updated from %s to %s\n",
 			      connector->base.id,
 			      connector->name,
diff --git a/drivers/gpu/drm/drm_property.c b/drivers/gpu/drm/drm_property.c
index b17959c3e099..3e88fa24eab3 100644
--- a/drivers/gpu/drm/drm_property.c
+++ b/drivers/gpu/drm/drm_property.c
@@ -442,8 +442,7 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 	struct drm_property *property;
 	int enum_count = 0;
 	int value_count = 0;
-	int ret = 0, i;
-	int copied;
+	int i, copied;
 	struct drm_property_enum *prop_enum;
 	struct drm_mode_property_enum __user *enum_ptr;
 	uint64_t __user *values_ptr;
@@ -451,55 +450,43 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 	if (!drm_core_check_feature(dev, DRIVER_MODESET))
 		return -EINVAL;
 
-	drm_modeset_lock_all(dev);
 	property = drm_property_find(dev, out_resp->prop_id);
-	if (!property) {
-		ret = -ENOENT;
-		goto done;
-	}
-
-	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
-			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
-		list_for_each_entry(prop_enum, &property->enum_list, head)
-			enum_count++;
-	}
-
-	value_count = property->num_values;
+	if (!property)
+		return -ENOENT;
 
 	strncpy(out_resp->name, property->name, DRM_PROP_NAME_LEN);
 	out_resp->name[DRM_PROP_NAME_LEN-1] = 0;
 	out_resp->flags = property->flags;
 
-	if ((out_resp->count_values >= value_count) && value_count) {
-		values_ptr = (uint64_t __user *)(unsigned long)out_resp->values_ptr;
-		for (i = 0; i < value_count; i++) {
-			if (copy_to_user(values_ptr + i, &property->values[i], sizeof(uint64_t))) {
-				ret = -EFAULT;
-				goto done;
-			}
+	value_count = property->num_values;
+	values_ptr = u64_to_user_ptr(out_resp->values_ptr);
+
+	for (i = 0; i < value_count; i++) {
+		if (i < out_resp->count_values &&
+		    put_user(property->values[i], values_ptr + i)) {
+			return -EFAULT;
 		}
 	}
 	out_resp->count_values = value_count;
 
+	copied = 0;
+	enum_ptr = u64_to_user_ptr(out_resp->enum_blob_ptr);
+
 	if (drm_property_type_is(property, DRM_MODE_PROP_ENUM) ||
-			drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
-		if ((out_resp->count_enum_blobs >= enum_count) && enum_count) {
-			copied = 0;
-			enum_ptr = (struct drm_mode_property_enum __user *)(unsigned long)out_resp->enum_blob_ptr;
-			list_for_each_entry(prop_enum, &property->enum_list, head) {
-
-				if (copy_to_user(&enum_ptr[copied].value, &prop_enum->value, sizeof(uint64_t))) {
-					ret = -EFAULT;
-					goto done;
-				}
-
-				if (copy_to_user(&enum_ptr[copied].name,
-						 &prop_enum->name, DRM_PROP_NAME_LEN)) {
-					ret = -EFAULT;
-					goto done;
-				}
-				copied++;
-			}
+	    drm_property_type_is(property, DRM_MODE_PROP_BITMASK)) {
+		list_for_each_entry(prop_enum, &property->enum_list, head) {
+			enum_count++;
+			if (out_resp->count_enum_blobs < enum_count)
+				continue;
+
+			if (copy_to_user(&enum_ptr[copied].value,
+					 &prop_enum->value, sizeof(uint64_t)))
+				return -EFAULT;
+
+			if (copy_to_user(&enum_ptr[copied].name,
+					 &prop_enum->name, DRM_PROP_NAME_LEN))
+				return -EFAULT;
+			copied++;
 		}
 		out_resp->count_enum_blobs = enum_count;
 	}
@@ -514,9 +501,8 @@ int drm_mode_getproperty_ioctl(struct drm_device *dev,
 	 */
 	if (drm_property_type_is(property, DRM_MODE_PROP_BLOB))
 		out_resp->count_enum_blobs = 0;
-done:
-	drm_modeset_unlock_all(dev);
-	return ret;
+
+	return 0;
 }
 
 static void drm_property_free_blob(struct kref *kref)
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index 513288b5c2f6..1c5b5ce1fd7f 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -25,6 +25,20 @@
 #define to_drm_minor(d) dev_get_drvdata(d)
 #define to_drm_connector(d) dev_get_drvdata(d)
 
+/**
+ * DOC: overview
+ *
+ * DRM provides very little additional support to drivers for sysfs
+ * interactions, beyond just all the standard stuff. Drivers who want to expose
+ * additional sysfs properties and property groups can attach them at either
+ * &drm_device.dev or &drm_connector.kdev.
+ *
+ * Registration is automatically handled when calling drm_dev_register(), or
+ * drm_connector_register() in case of hot-plugged connectors. Unregistration is
+ * also automatically handled by drm_dev_unregister() and
+ * drm_connector_unregister().
+ */
+
 static struct device_type drm_sysfs_device_minor = {
 	.name = "drm_minor"
 };
@@ -250,15 +264,6 @@ static const struct attribute_group *connector_dev_groups[] = {
 	NULL
 };
 
-/**
- * drm_sysfs_connector_add - add a connector to sysfs
- * @connector: connector to add
- *
- * Create a connector device in sysfs, along with its associated connector
- * properties (so far, connection status, dpms, mode list and edid) and
- * generate a hotplug event so userspace knows there's a new connector
- * available.
- */
 int drm_sysfs_connector_add(struct drm_connector *connector)
 {
 	struct drm_device *dev = connector->dev;
@@ -285,19 +290,6 @@ int drm_sysfs_connector_add(struct drm_connector *connector)
 	return 0;
 }
 
-/**
- * drm_sysfs_connector_remove - remove an connector device from sysfs
- * @connector: connector to remove
- *
- * Remove @connector and its associated attributes from sysfs.  Note that
- * the device model core will take care of sending the "remove" uevent
- * at this time, so we don't need to do it.
- *
- * Note:
- * This routine should only be called if the connector was previously
- * successfully registered.  If @connector hasn't been registered yet,
- * you'll likely see a panic somewhere deep in sysfs code when called.
- */
 void drm_sysfs_connector_remove(struct drm_connector *connector)
 {
 	if (!connector->kdev)
@@ -333,20 +325,6 @@ static void drm_sysfs_release(struct device *dev)
 	kfree(dev);
 }
 
-/**
- * drm_sysfs_minor_alloc() - Allocate sysfs device for given minor
- * @minor: minor to allocate sysfs device for
- *
- * This allocates a new sysfs device for @minor and returns it. The device is
- * not registered nor linked. The caller has to use device_add() and
- * device_del() to register and unregister it.
- *
- * Note that dev_get_drvdata() on the new device will return the minor.
- * However, the device does not hold a ref-count to the minor nor to the
- * underlying drm_device. This is unproblematic as long as you access the
- * private data only in sysfs callbacks. device_del() disables those
- * synchronously, so they cannot be called after you cleanup a minor.
- */
 struct device *drm_sysfs_minor_alloc(struct drm_minor *minor)
 {
 	const char *minor_str;
@@ -384,15 +362,13 @@ err_free:
 }
 
 /**
- * drm_class_device_register - Register a struct device in the drm class.
+ * drm_class_device_register - register new device with the DRM sysfs class
+ * @dev: device to register
  *
- * @dev: pointer to struct device to register.
- *
- * @dev should have all relevant members pre-filled with the exception
- * of the class member. In particular, the device_type member must
- * be set.
+ * Registers a new &struct device within the DRM sysfs class. Essentially only
+ * used by ttm to have a place for its global settings. Drivers should never use
+ * this.
  */
-
 int drm_class_device_register(struct device *dev)
 {
 	if (!drm_class || IS_ERR(drm_class))
@@ -403,6 +379,14 @@ int drm_class_device_register(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(drm_class_device_register);
 
+/**
+ * drm_class_device_unregister - unregister device with the DRM sysfs class
+ * @dev: device to unregister
+ *
+ * Unregisters a &struct device from the DRM sysfs class. Essentially only used
+ * by ttm to have a place for its global settings. Drivers should never use
+ * this.
+ */
 void drm_class_device_unregister(struct device *dev)
 {
 	return device_unregister(dev);
diff --git a/drivers/gpu/drm/etnaviv/Kconfig b/drivers/gpu/drm/etnaviv/Kconfig
index cc1731c5289c..71cee4e9fefb 100644
--- a/drivers/gpu/drm/etnaviv/Kconfig
+++ b/drivers/gpu/drm/etnaviv/Kconfig
@@ -5,6 +5,7 @@ config DRM_ETNAVIV
 	depends on ARCH_MXC || ARCH_DOVE || (ARM && COMPILE_TEST)
 	depends on MMU
 	select SHMEM
+	select SYNC_FILE
 	select TMPFS
 	select IOMMU_API
 	select IOMMU_SUPPORT
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 587e45043542..5255278dde56 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -111,7 +111,7 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 	return 0;
 }
 
-static void etnaviv_preclose(struct drm_device *dev, struct drm_file *file)
+static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct etnaviv_file_private *ctx = file->driver_priv;
@@ -488,7 +488,7 @@ static struct drm_driver etnaviv_drm_driver = {
 				DRIVER_PRIME |
 				DRIVER_RENDER,
 	.open               = etnaviv_open,
-	.preclose           = etnaviv_preclose,
+	.postclose           = etnaviv_postclose,
 	.gem_free_object_unlocked = etnaviv_gem_free_object,
 	.gem_vm_ops         = &vm_ops,
 	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
@@ -512,7 +512,7 @@ static struct drm_driver etnaviv_drm_driver = {
 	.desc               = "etnaviv DRM",
 	.date               = "20151214",
 	.major              = 1,
-	.minor              = 0,
+	.minor              = 1,
 };
 
 /*
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem.h b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
index e63ff116a3b3..c4a091e87426 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@@ -20,6 +20,7 @@
 #include <linux/reservation.h>
 #include "etnaviv_drv.h"
 
+struct dma_fence;
 struct etnaviv_gem_ops;
 struct etnaviv_gem_object;
 
@@ -104,9 +105,10 @@ struct etnaviv_gem_submit {
 	struct drm_device *dev;
 	struct etnaviv_gpu *gpu;
 	struct ww_acquire_ctx ticket;
-	u32 fence;
+	struct dma_fence *fence;
 	unsigned int nr_bos;
 	struct etnaviv_gem_submit_bo bos[0];
+	u32 flags;
 };
 
 int etnaviv_gem_wait_bo(struct etnaviv_gpu *gpu, struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
index 726090d7a6ac..e1909429837e 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@@ -14,7 +14,9 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/dma-fence-array.h>
 #include <linux/reservation.h>
+#include <linux/sync_file.h>
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
@@ -169,8 +171,10 @@ static int submit_fence_sync(const struct etnaviv_gem_submit *submit)
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;
 		bool write = submit->bos[i].flags & ETNA_SUBMIT_BO_WRITE;
+		bool explicit = !(submit->flags & ETNA_SUBMIT_NO_IMPLICIT);
 
-		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write);
+		ret = etnaviv_gpu_fence_sync_obj(etnaviv_obj, context, write,
+						 explicit);
 		if (ret)
 			break;
 	}
@@ -290,6 +294,7 @@ static void submit_cleanup(struct etnaviv_gem_submit *submit)
 	}
 
 	ww_acquire_fini(&submit->ticket);
+	dma_fence_put(submit->fence);
 	kfree(submit);
 }
 
@@ -303,6 +308,9 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct etnaviv_gem_submit *submit;
 	struct etnaviv_cmdbuf *cmdbuf;
 	struct etnaviv_gpu *gpu;
+	struct dma_fence *in_fence = NULL;
+	struct sync_file *sync_file = NULL;
+	int out_fence_fd = -1;
 	void *stream;
 	int ret;
 
@@ -326,6 +334,11 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
+	if (args->flags & ~ETNA_SUBMIT_FLAGS) {
+		DRM_ERROR("invalid flags: 0x%x\n", args->flags);
+		return -EINVAL;
+	}
+
 	/*
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
@@ -365,12 +378,22 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_cmds;
 	}
 
+	if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
+		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
+		if (out_fence_fd < 0) {
+			ret = out_fence_fd;
+			goto err_submit_cmds;
+		}
+	}
+
 	submit = submit_create(dev, gpu, args->nr_bos);
 	if (!submit) {
 		ret = -ENOMEM;
 		goto err_submit_cmds;
 	}
 
+	submit->flags = args->flags;
+
 	ret = submit_lookup_objects(submit, file, bos, args->nr_bos);
 	if (ret)
 		goto err_submit_objects;
@@ -385,6 +408,24 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_objects;
 	}
 
+	if (args->flags & ETNA_SUBMIT_FENCE_FD_IN) {
+		in_fence = sync_file_get_fence(args->fence_fd);
+		if (!in_fence) {
+			ret = -EINVAL;
+			goto err_submit_objects;
+		}
+
+		/*
+		 * Wait if the fence is from a foreign context, or if the fence
+		 * array contains any fence from a foreign context.
+		 */
+		if (!dma_fence_match_context(in_fence, gpu->fence_context)) {
+			ret = dma_fence_wait(in_fence, true);
+			if (ret)
+				goto err_submit_objects;
+		}
+	}
+
 	ret = submit_fence_sync(submit);
 	if (ret)
 		goto err_submit_objects;
@@ -405,7 +446,23 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret == 0)
 		cmdbuf = NULL;
 
-	args->fence = submit->fence;
+	if (args->flags & ETNA_SUBMIT_FENCE_FD_OUT) {
+		/*
+		 * This can be improved: ideally we want to allocate the sync
+		 * file before kicking off the GPU job and just attach the
+		 * fence to the sync file here, eliminating the ENOMEM
+		 * possibility at this stage.
+		 */
+		sync_file = sync_file_create(submit->fence);
+		if (!sync_file) {
+			ret = -ENOMEM;
+			goto out;
+		}
+		fd_install(out_fence_fd, sync_file->file);
+	}
+
+	args->fence_fd = out_fence_fd;
+	args->fence = submit->fence->seqno;
 
 out:
 	submit_unpin_objects(submit);
@@ -419,9 +476,13 @@ out:
 		flush_workqueue(priv->wq);
 
 err_submit_objects:
+	if (in_fence)
+		dma_fence_put(in_fence);
 	submit_cleanup(submit);
 
 err_submit_cmds:
+	if (ret && (out_fence_fd >= 0))
+		put_unused_fd(out_fence_fd);
 	/* if we still own the cmdbuf */
 	if (cmdbuf)
 		etnaviv_cmdbuf_free(cmdbuf);
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index 130d7d517a19..9a9c40717801 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -18,6 +18,7 @@
 #include <linux/dma-fence.h>
 #include <linux/moduleparam.h>
 #include <linux/of_device.h>
+#include <linux/thermal.h>
 
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
@@ -409,6 +410,17 @@ static void etnaviv_gpu_load_clock(struct etnaviv_gpu *gpu, u32 clock)
 	gpu_write(gpu, VIVS_HI_CLOCK_CONTROL, clock);
 }
 
+static void etnaviv_gpu_update_clock(struct etnaviv_gpu *gpu)
+{
+	unsigned int fscale = 1 << (6 - gpu->freq_scale);
+	u32 clock;
+
+	clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
+		VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(fscale);
+
+	etnaviv_gpu_load_clock(gpu, clock);
+}
+
 static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 {
 	u32 control, idle;
@@ -426,11 +438,10 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	timeout = jiffies + msecs_to_jiffies(1000);
 
 	while (time_is_after_jiffies(timeout)) {
-		control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
-			  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
-
 		/* enable clock */
-		etnaviv_gpu_load_clock(gpu, control);
+		etnaviv_gpu_update_clock(gpu);
+
+		control = gpu_read(gpu, VIVS_HI_CLOCK_CONTROL);
 
 		/* Wait for stable clock.  Vivante's code waited for 1ms */
 		usleep_range(1000, 10000);
@@ -490,11 +501,7 @@ static int etnaviv_hw_reset(struct etnaviv_gpu *gpu)
 	}
 
 	/* We rely on the GPU running, so program the clock */
-	control = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
-		  VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
-
-	/* enable clock */
-	etnaviv_gpu_load_clock(gpu, control);
+	etnaviv_gpu_update_clock(gpu);
 
 	return 0;
 }
@@ -1051,6 +1058,12 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 {
 	struct etnaviv_fence *f;
 
+	/*
+	 * GPU lock must already be held, otherwise fence completion order might
+	 * not match the seqno order assigned here.
+	 */
+	lockdep_assert_held(&gpu->lock);
+
 	f = kzalloc(sizeof(*f), GFP_KERNEL);
 	if (!f)
 		return NULL;
@@ -1064,7 +1077,7 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 }
 
 int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
-	unsigned int context, bool exclusive)
+	unsigned int context, bool exclusive, bool explicit)
 {
 	struct reservation_object *robj = etnaviv_obj->resv;
 	struct reservation_object_list *fobj;
@@ -1077,6 +1090,9 @@ int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
 			return ret;
 	}
 
+	if (explicit)
+		return 0;
+
 	/*
 	 * If we have any shared fences, then the exclusive fence
 	 * should be ignored as it will already have been signalled.
@@ -1311,18 +1327,18 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 		goto out_pm_put;
 	}
 
+	mutex_lock(&gpu->lock);
+
 	fence = etnaviv_gpu_fence_alloc(gpu);
 	if (!fence) {
 		event_free(gpu, event);
 		ret = -ENOMEM;
-		goto out_pm_put;
+		goto out_unlock;
 	}
 
-	mutex_lock(&gpu->lock);
-
 	gpu->event[event].fence = fence;
-	submit->fence = fence->seqno;
-	gpu->active_fence = submit->fence;
+	submit->fence = dma_fence_get(fence);
+	gpu->active_fence = submit->fence->seqno;
 
 	if (gpu->lastctx != cmdbuf->ctx) {
 		gpu->mmu->need_flush = true;
@@ -1357,6 +1373,7 @@ int etnaviv_gpu_submit(struct etnaviv_gpu *gpu,
 	hangcheck_timer_reset(gpu);
 	ret = 0;
 
+out_unlock:
 	mutex_unlock(&gpu->lock);
 
 out_pm_put:
@@ -1526,17 +1543,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 #ifdef CONFIG_PM
 static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 {
-	u32 clock;
 	int ret;
 
 	ret = mutex_lock_killable(&gpu->lock);
 	if (ret)
 		return ret;
 
-	clock = VIVS_HI_CLOCK_CONTROL_DISABLE_DEBUG_REGISTERS |
-		VIVS_HI_CLOCK_CONTROL_FSCALE_VAL(0x40);
-
-	etnaviv_gpu_load_clock(gpu, clock);
+	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);
 
 	gpu->switch_context = true;
@@ -1548,6 +1561,47 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 }
 #endif
 
+static int
+etnaviv_gpu_cooling_get_max_state(struct thermal_cooling_device *cdev,
+				  unsigned long *state)
+{
+	*state = 6;
+
+	return 0;
+}
+
+static int
+etnaviv_gpu_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+				  unsigned long *state)
+{
+	struct etnaviv_gpu *gpu = cdev->devdata;
+
+	*state = gpu->freq_scale;
+
+	return 0;
+}
+
+static int
+etnaviv_gpu_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+				  unsigned long state)
+{
+	struct etnaviv_gpu *gpu = cdev->devdata;
+
+	mutex_lock(&gpu->lock);
+	gpu->freq_scale = state;
+	if (!pm_runtime_suspended(gpu->dev))
+		etnaviv_gpu_update_clock(gpu);
+	mutex_unlock(&gpu->lock);
+
+	return 0;
+}
+
+static struct thermal_cooling_device_ops cooling_ops = {
+	.get_max_state = etnaviv_gpu_cooling_get_max_state,
+	.get_cur_state = etnaviv_gpu_cooling_get_cur_state,
+	.set_cur_state = etnaviv_gpu_cooling_set_cur_state,
+};
+
 static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	void *data)
 {
@@ -1556,13 +1610,20 @@ static int etnaviv_gpu_bind(struct device *dev, struct device *master,
 	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
 	int ret;
 
+	gpu->cooling = thermal_of_cooling_device_register(dev->of_node,
+				(char *)dev_name(dev), gpu, &cooling_ops);
+	if (IS_ERR(gpu->cooling))
+		return PTR_ERR(gpu->cooling);
+
 #ifdef CONFIG_PM
 	ret = pm_runtime_get_sync(gpu->dev);
 #else
 	ret = etnaviv_gpu_clk_enable(gpu);
 #endif
-	if (ret < 0)
+	if (ret < 0) {
+		thermal_cooling_device_unregister(gpu->cooling);
 		return ret;
+	}
 
 	gpu->drm = drm;
 	gpu->fence_context = dma_fence_context_alloc(1);
@@ -1616,6 +1677,9 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	}
 
 	gpu->drm = NULL;
+
+	thermal_cooling_device_unregister(gpu->cooling);
+	gpu->cooling = NULL;
 }
 
 static const struct component_ops gpu_ops = {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 1c0606ea7d5e..9227a9740447 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -97,6 +97,7 @@ struct etnaviv_cmdbuf;
 
 struct etnaviv_gpu {
 	struct drm_device *drm;
+	struct thermal_cooling_device *cooling;
 	struct device *dev;
 	struct mutex lock;
 	struct etnaviv_chip_identity identity;
@@ -150,6 +151,7 @@ struct etnaviv_gpu {
 	u32 hangcheck_fence;
 	u32 hangcheck_dma_addr;
 	struct work_struct recover_work;
+	unsigned int freq_scale;
 };
 
 static inline void gpu_write(struct etnaviv_gpu *gpu, u32 reg, u32 data)
@@ -181,7 +183,7 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m);
 #endif
 
 int etnaviv_gpu_fence_sync_obj(struct etnaviv_gem_object *etnaviv_obj,
-	unsigned int context, bool exclusive);
+	unsigned int context, bool exclusive, bool implicit);
 
 void etnaviv_gpu_retire(struct etnaviv_gpu *gpu);
 int etnaviv_gpu_wait_fence_interruptible(struct etnaviv_gpu *gpu,
diff --git a/drivers/gpu/drm/exynos/exynos_dp.c b/drivers/gpu/drm/exynos/exynos_dp.c
index b445b50a5dc4..385537b726a6 100644
--- a/drivers/gpu/drm/exynos/exynos_dp.c
+++ b/drivers/gpu/drm/exynos/exynos_dp.c
@@ -23,6 +23,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
 #include <drm/bridge/analogix_dp.h>
@@ -211,8 +212,11 @@ static const struct component_ops exynos_dp_ops = {
 static int exynos_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *np = NULL, *endpoint = NULL;
+	struct device_node *np;
 	struct exynos_dp_device *dp;
+	struct drm_panel *panel;
+	struct drm_bridge *bridge;
+	int ret;
 
 	dp = devm_kzalloc(&pdev->dev, sizeof(struct exynos_dp_device),
 			  GFP_KERNEL);
@@ -236,28 +240,13 @@ static int exynos_dp_probe(struct platform_device *pdev)
 		goto out;
 	}
 
-	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (endpoint) {
-		np = of_graph_get_remote_port_parent(endpoint);
-		if (np) {
-			/* The remote port can be either a panel or a bridge */
-			dp->plat_data.panel = of_drm_find_panel(np);
-			if (!dp->plat_data.panel) {
-				dp->ptn_bridge = of_drm_find_bridge(np);
-				if (!dp->ptn_bridge) {
-					of_node_put(np);
-					return -EPROBE_DEFER;
-				}
-			}
-			of_node_put(np);
-		} else {
-			DRM_ERROR("no remote endpoint device node found.\n");
-			return -EINVAL;
-		}
-	} else {
-		DRM_ERROR("no port endpoint subnode found.\n");
-		return -EINVAL;
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0, &panel, &bridge);
+	if (ret)
+		return ret;
+
+	/* The remote port can be either a panel or a bridge */
+	dp->plat_data.panel = panel;
+	dp->ptn_bridge = bridge;
 
 out:
 	return component_add(&pdev->dev, &exynos_dp_ops);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dpi.c b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
index 3aab71a485ba..63abcd280fa0 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dpi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dpi.c
@@ -163,27 +163,13 @@ enum {
 	FIMD_PORT_WRB,
 };
 
-static struct device_node *exynos_dpi_of_find_panel_node(struct device *dev)
-{
-	struct device_node *np, *ep;
-
-	ep = of_graph_get_endpoint_by_regs(dev->of_node, FIMD_PORT_RGB, 0);
-	if (!ep)
-		return NULL;
-
-	np = of_graph_get_remote_port_parent(ep);
-	of_node_put(ep);
-
-	return np;
-}
-
 static int exynos_dpi_parse_dt(struct exynos_dpi *ctx)
 {
 	struct device *dev = ctx->dev;
 	struct device_node *dn = dev->of_node;
 	struct device_node *np;
 
-	ctx->panel_node = exynos_dpi_of_find_panel_node(dev);
+	ctx->panel_node = of_graph_get_remote_node(dn, FIMD_PORT_RGB, 0);
 
 	np = of_get_child_by_name(dn, "display-timings");
 	if (np) {
diff --git a/drivers/gpu/drm/exynos/exynos_drm_dsi.c b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
index 6d4da2f0932d..fc4fda738906 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_dsi.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_dsi.c
@@ -1659,17 +1659,10 @@ static int exynos_dsi_parse_dt(struct exynos_dsi *dsi)
 
 	of_node_put(ep);
 
-	ep = of_graph_get_next_endpoint(node, NULL);
-	if (!ep) {
-		ret = -EINVAL;
-		goto end;
-	}
+	dsi->bridge_node = of_graph_get_remote_node(node, DSI_PORT_OUT, 0);
+	if (!dsi->bridge_node)
+		return -EINVAL;
 
-	dsi->bridge_node = of_graph_get_remote_port_parent(ep);
-	if (!dsi->bridge_node) {
-		ret = -EINVAL;
-		goto end;
-	}
 end:
 	of_node_put(ep);
 
diff --git a/drivers/gpu/drm/exynos/exynos_drm_mic.c b/drivers/gpu/drm/exynos/exynos_drm_mic.c
index 2ef43d403eaa..e45720543a45 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_mic.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_mic.c
@@ -229,29 +229,6 @@ static void mic_set_reg_on(struct exynos_mic *mic, bool enable)
 	writel(reg, mic->reg + MIC_OP);
 }
 
-static struct device_node *get_remote_node(struct device_node *from, int reg)
-{
-	struct device_node *endpoint = NULL, *remote_node = NULL;
-
-	endpoint = of_graph_get_endpoint_by_regs(from, reg, -1);
-	if (!endpoint) {
-		DRM_ERROR("mic: Failed to find remote port from %s",
-				from->full_name);
-		goto exit;
-	}
-
-	remote_node = of_graph_get_remote_port_parent(endpoint);
-	if (!remote_node) {
-		DRM_ERROR("mic: Failed to find remote port parent from %s",
-							from->full_name);
-		goto exit;
-	}
-
-exit:
-	of_node_put(endpoint);
-	return remote_node;
-}
-
 static int parse_dt(struct exynos_mic *mic)
 {
 	int ret = 0, i, j;
@@ -263,7 +240,7 @@ static int parse_dt(struct exynos_mic *mic)
 	 * The first node must be for decon and the second one must be for dsi.
 	 */
 	for (i = 0, j = 0; i < NUM_ENDPOINTS; i++) {
-		remote_node = get_remote_node(mic->dev->of_node, i);
+		remote_node = of_graph_get_remote_node(mic->dev->of_node, i, 0);
 		if (!remote_node) {
 			ret = -EPIPE;
 			goto exit;
diff --git a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
index c3651456c963..dcbf3c06e1d8 100644
--- a/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
+++ b/drivers/gpu/drm/fsl-dcu/fsl_dcu_drm_rgb.c
@@ -15,6 +15,7 @@
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
 #include "fsl_dcu_drm_drv.h"
@@ -141,32 +142,11 @@ err_cleanup:
 	return ret;
 }
 
-static int fsl_dcu_attach_endpoint(struct fsl_dcu_drm_device *fsl_dev,
-				   const struct of_endpoint *ep)
-{
-	struct drm_bridge *bridge;
-	struct device_node *np;
-
-	np = of_graph_get_remote_port_parent(ep->local_node);
-
-	fsl_dev->connector.panel = of_drm_find_panel(np);
-	if (fsl_dev->connector.panel) {
-		of_node_put(np);
-		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
-	}
-
-	bridge = of_drm_find_bridge(np);
-	of_node_put(np);
-	if (!bridge)
-		return -ENODEV;
-
-	return drm_bridge_attach(&fsl_dev->encoder, bridge, NULL);
-}
-
 int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev)
 {
-	struct of_endpoint ep;
-	struct device_node *ep_node, *panel_node;
+	struct device_node *panel_node;
+	struct drm_panel *panel;
+	struct drm_bridge *bridge;
 	int ret;
 
 	/* This is for backward compatibility */
@@ -179,14 +159,14 @@ int fsl_dcu_create_outputs(struct fsl_dcu_drm_device *fsl_dev)
 		return fsl_dcu_attach_panel(fsl_dev, fsl_dev->connector.panel);
 	}
 
-	ep_node = of_graph_get_next_endpoint(fsl_dev->np, NULL);
-	if (!ep_node)
-		return -ENODEV;
-
-	ret = of_graph_parse_endpoint(ep_node, &ep);
-	of_node_put(ep_node);
+	ret = drm_of_find_panel_or_bridge(fsl_dev->np, 0, 0, &panel, &bridge);
 	if (ret)
-		return -ENODEV;
+		return ret;
+
+	if (panel) {
+		fsl_dev->connector.panel = panel;
+		return fsl_dcu_attach_panel(fsl_dev, panel);
+	}
 
-	return fsl_dcu_attach_endpoint(fsl_dev, &ep);
+	return drm_bridge_attach(&fsl_dev->encoder, bridge, NULL);
 }
diff --git a/drivers/gpu/drm/gma500/gma_display.c b/drivers/gpu/drm/gma500/gma_display.c
index 93ff46535c04..e7fd356acf2e 100644
--- a/drivers/gpu/drm/gma500/gma_display.c
+++ b/drivers/gpu/drm/gma500/gma_display.c
@@ -177,7 +177,8 @@ void gma_crtc_load_lut(struct drm_crtc *crtc)
 }
 
 int gma_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green, u16 *blue,
-		       u32 size)
+		       u32 size,
+		       struct drm_modeset_acquire_ctx *ctx)
 {
 	struct gma_crtc *gma_crtc = to_gma_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/gma500/gma_display.h b/drivers/gpu/drm/gma500/gma_display.h
index 166e608923db..239c374b6169 100644
--- a/drivers/gpu/drm/gma500/gma_display.h
+++ b/drivers/gpu/drm/gma500/gma_display.h
@@ -73,7 +73,8 @@ extern int gma_crtc_cursor_set(struct drm_crtc *crtc,
 extern int gma_crtc_cursor_move(struct drm_crtc *crtc, int x, int y);
 extern void gma_crtc_load_lut(struct drm_crtc *crtc);
 extern int gma_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-			      u16 *blue, u32 size);
+			      u16 *blue, u32 size,
+			      struct drm_modeset_acquire_ctx *ctx);
 extern void gma_crtc_dpms(struct drm_crtc *crtc, int mode);
 extern void gma_crtc_prepare(struct drm_crtc *crtc);
 extern void gma_crtc_commit(struct drm_crtc *crtc);
diff --git a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
index 1737e98bc10a..5abc69c9630f 100644
--- a/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
+++ b/drivers/gpu/drm/hisilicon/kirin/dw_drm_dsi.c
@@ -17,7 +17,6 @@
 
 #include <linux/clk.h>
 #include <linux/component.h>
-#include <linux/of_graph.h>
 
 #include <drm/drm_of.h>
 #include <drm/drm_crtc_helper.h>
@@ -754,34 +753,16 @@ static int dsi_parse_dt(struct platform_device *pdev, struct dw_dsi *dsi)
 {
 	struct dsi_hw_ctx *ctx = dsi->ctx;
 	struct device_node *np = pdev->dev.of_node;
-	struct device_node *endpoint, *bridge_node;
-	struct drm_bridge *bridge;
 	struct resource *res;
+	int ret;
 
 	/*
 	 * Get the endpoint node. In our case, dsi has one output port1
 	 * to which the external HDMI bridge is connected.
 	 */
-	endpoint = of_graph_get_endpoint_by_regs(np, 1, -1);
-	if (!endpoint) {
-		DRM_ERROR("no valid endpoint node\n");
-		return -ENODEV;
-	}
-	of_node_put(endpoint);
-
-	bridge_node = of_graph_get_remote_port_parent(endpoint);
-	if (!bridge_node) {
-		DRM_ERROR("no valid bridge node\n");
-		return -ENODEV;
-	}
-	of_node_put(bridge_node);
-
-	bridge = of_drm_find_bridge(bridge_node);
-	if (!bridge) {
-		DRM_INFO("wait for external HDMI bridge driver.\n");
-		return -EPROBE_DEFER;
-	}
-	dsi->bridge = bridge;
+	ret = drm_of_find_panel_or_bridge(np, 0, 0, NULL, &dsi->bridge);
+	if (ret)
+		return ret;
 
 	ctx->pclk = devm_clk_get(&pdev->dev, "pclk");
 	if (IS_ERR(ctx->pclk)) {
diff --git a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
index df4f50713e54..9c903672f582 100644
--- a/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
+++ b/drivers/gpu/drm/hisilicon/kirin/kirin_drm_drv.c
@@ -230,34 +230,6 @@ static const struct component_master_ops kirin_drm_ops = {
 	.unbind = kirin_drm_unbind,
 };
 
-static struct device_node *kirin_get_remote_node(struct device_node *np)
-{
-	struct device_node *endpoint, *remote;
-
-	/* get the first endpoint, in our case only one remote node
-	 * is connected to display controller.
-	 */
-	endpoint = of_graph_get_next_endpoint(np, NULL);
-	if (!endpoint) {
-		DRM_ERROR("no valid endpoint node\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	remote = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!remote) {
-		DRM_ERROR("no valid remote node\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	if (!of_device_is_available(remote)) {
-		DRM_ERROR("not available for remote node\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	return remote;
-}
-
 static int kirin_drm_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -271,7 +243,7 @@ static int kirin_drm_platform_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	remote = kirin_get_remote_node(np);
+	remote = of_graph_get_remote_node(np, 0, 0);
 	if (IS_ERR(remote))
 		return PTR_ERR(remote);
 
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index b7d7721e72fa..40af17ec6312 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -285,9 +285,6 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 {
 	int ret;
 
-	if (vgpu->failsafe)
-		return 0;
-
 	if (WARN_ON(bytes > 4))
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 6f972afbdbc3..41b2c3aaa04a 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -616,9 +616,6 @@ static inline u32 get_opcode(u32 cmd, int ring_id)
 {
 	struct decode_info *d_info;
 
-	if (ring_id >= I915_NUM_ENGINES)
-		return INVALID_OP;
-
 	d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
 	if (d_info == NULL)
 		return INVALID_OP;
@@ -661,9 +658,6 @@ static inline void print_opcode(u32 cmd, int ring_id)
 	struct decode_info *d_info;
 	int i;
 
-	if (ring_id >= I915_NUM_ENGINES)
-		return;
-
 	d_info = ring_decode_info[ring_id][CMD_TYPE(cmd)];
 	if (d_info == NULL)
 		return;
@@ -1215,7 +1209,7 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s,
 	if (!info->async_flip)
 		return 0;
 
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
 		tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
 				GENMASK(12, 10)) >> 10;
@@ -1243,7 +1237,7 @@ static int gen8_update_plane_mmio_from_mi_display_flip(
 
 	set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
 		      info->surf_val << 12);
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
 			      info->stride_val);
 		set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
@@ -1267,7 +1261,7 @@ static int decode_mi_display_flip(struct parser_exec_state *s,
 
 	if (IS_BROADWELL(dev_priv))
 		return gen8_decode_mi_display_flip(s, info);
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return skl_decode_mi_display_flip(s, info);
 
 	return -ENODEV;
@@ -1278,7 +1272,9 @@ static int check_mi_display_flip(struct parser_exec_state *s,
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
 
-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv)
+		|| IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv))
 		return gen8_check_mi_display_flip(s, info);
 	return -ENODEV;
 }
@@ -1289,7 +1285,9 @@ static int update_plane_mmio_from_mi_display_flip(
 {
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
 
-	if (IS_BROADWELL(dev_priv) || IS_SKYLAKE(dev_priv))
+	if (IS_BROADWELL(dev_priv)
+		|| IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv))
 		return gen8_update_plane_mmio_from_mi_display_flip(s, info);
 	return -ENODEV;
 }
@@ -1569,7 +1567,8 @@ static int batch_buffer_needs_scan(struct parser_exec_state *s)
 {
 	struct intel_gvt *gvt = s->vgpu->gvt;
 
-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		/* BDW decides privilege based on address space */
 		if (cmd_val(s, 0) & (1 << 8))
 			return 0;
@@ -2478,7 +2477,7 @@ static int cmd_parser_exec(struct parser_exec_state *s)
 
 	t1 = get_cycles();
 
-	memcpy(&s_before_advance_custom, s, sizeof(struct parser_exec_state));
+	s_before_advance_custom = *s;
 
 	if (info->handler) {
 		ret = info->handler(s);
@@ -2604,6 +2603,9 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 	unsigned long gma_head, gma_tail, gma_bottom, ring_size, ring_tail;
 	struct parser_exec_state s;
 	int ret = 0;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+				struct intel_vgpu_workload,
+				wa_ctx);
 
 	/* ring base is page aligned */
 	if (WARN_ON(!IS_ALIGNED(wa_ctx->indirect_ctx.guest_gma, GTT_PAGE_SIZE)))
@@ -2618,14 +2620,14 @@ static int scan_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 
 	s.buf_type = RING_BUFFER_INSTRUCTION;
 	s.buf_addr_type = GTT_BUFFER;
-	s.vgpu = wa_ctx->workload->vgpu;
-	s.ring_id = wa_ctx->workload->ring_id;
+	s.vgpu = workload->vgpu;
+	s.ring_id = workload->ring_id;
 	s.ring_start = wa_ctx->indirect_ctx.guest_gma;
 	s.ring_size = ring_size;
 	s.ring_head = gma_head;
 	s.ring_tail = gma_tail;
 	s.rb_va = wa_ctx->indirect_ctx.shadow_va;
-	s.workload = wa_ctx->workload;
+	s.workload = workload;
 
 	ret = ip_gma_set(&s, gma_head);
 	if (ret)
@@ -2708,12 +2710,15 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ctx_size = wa_ctx->indirect_ctx.size;
 	unsigned long guest_gma = wa_ctx->indirect_ctx.guest_gma;
-	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;
 	struct drm_i915_gem_object *obj;
 	int ret = 0;
 	void *map;
 
-	obj = i915_gem_object_create(wa_ctx->workload->vgpu->gvt->dev_priv,
+	obj = i915_gem_object_create(workload->vgpu->gvt->dev_priv,
 				     roundup(ctx_size + CACHELINE_BYTES,
 					     PAGE_SIZE));
 	if (IS_ERR(obj))
@@ -2733,8 +2738,8 @@ static int shadow_indirect_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 		goto unmap_src;
 	}
 
-	ret = copy_gma_to_hva(wa_ctx->workload->vgpu,
-				wa_ctx->workload->vgpu->gtt.ggtt_mm,
+	ret = copy_gma_to_hva(workload->vgpu,
+				workload->vgpu->gtt.ggtt_mm,
 				guest_gma, guest_gma + ctx_size,
 				map);
 	if (ret < 0) {
@@ -2772,7 +2777,10 @@ static int combine_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	int ret;
-	struct intel_vgpu *vgpu = wa_ctx->workload->vgpu;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	struct intel_vgpu *vgpu = workload->vgpu;
 
 	if (wa_ctx->indirect_ctx.size == 0)
 		return 0;
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 5419ae6ec633..e0261fcc5b50 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -161,8 +161,9 @@ static unsigned char virtual_dp_monitor_edid[GVT_EDID_NUM][EDID_SIZE] = {
 
 #define DPCD_HEADER_SIZE        0xb
 
+/* let the virtual display supports DP1.2 */
 static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
-	0x11, 0x0a, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+	0x12, 0x014, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
 };
 
 static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
@@ -172,26 +173,64 @@ static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 			SDE_PORTC_HOTPLUG_CPT |
 			SDE_PORTD_HOTPLUG_CPT);
 
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
 				SDE_PORTE_HOTPLUG_SPT);
+		vgpu_vreg(vgpu, SKL_FUSE_STATUS) |=
+				SKL_FUSE_DOWNLOAD_STATUS |
+				SKL_FUSE_PG0_DIST_STATUS |
+				SKL_FUSE_PG1_DIST_STATUS |
+				SKL_FUSE_PG2_DIST_STATUS;
+		vgpu_vreg(vgpu, LCPLL1_CTL) |=
+				LCPLL_PLL_ENABLE |
+				LCPLL_PLL_LOCK;
+		vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
+
+	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_B << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_C << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
+			TRANS_DDI_PORT_MASK);
+		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
+			(PORT_D << TRANS_DDI_PORT_SHIFT) |
+			TRANS_DDI_FUNC_ENABLE);
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
 		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
 	}
 
-	if (IS_SKYLAKE(dev_priv) &&
+	if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
 			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
 		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
 	}
@@ -353,7 +392,7 @@ void intel_vgpu_clean_display(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		clean_virtual_dp_monitor(vgpu, PORT_D);
 	else
 		clean_virtual_dp_monitor(vgpu, PORT_B);
@@ -375,7 +414,7 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution)
 
 	intel_vgpu_init_i2c_edid(vgpu);
 
-	if (IS_SKYLAKE(dev_priv))
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
 		return setup_virtual_dp_monitor(vgpu, PORT_D, GVT_DP_D,
 						resolution);
 	else
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index f1648fe5e5ea..42cd09ec63fa 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -495,7 +495,8 @@ void intel_gvt_i2c_handle_aux_ch_write(struct intel_vgpu *vgpu,
 			unsigned char val = edid_get_byte(vgpu);
 
 			aux_data_for_write = (val << 16);
-		}
+		} else
+			aux_data_for_write = (0xff << 16);
 	}
 	/* write the return value in AUX_CH_DATA reg which includes:
 	 * ACK of I2C_WRITE
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index f1f426a97aa9..dca989eb2d42 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -56,8 +56,8 @@ static int context_switch_events[] = {
 
 static int ring_id_to_context_switch_event(int ring_id)
 {
-	if (WARN_ON(ring_id < RCS && ring_id >
-				ARRAY_SIZE(context_switch_events)))
+	if (WARN_ON(ring_id < RCS ||
+		    ring_id >= ARRAY_SIZE(context_switch_events)))
 		return -EINVAL;
 
 	return context_switch_events[ring_id];
@@ -394,9 +394,11 @@ static void prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 
 static int update_wa_ctx_2_shadow_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
-	int ring_id = wa_ctx->workload->ring_id;
-	struct i915_gem_context *shadow_ctx =
-		wa_ctx->workload->vgpu->shadow_ctx;
+	struct intel_vgpu_workload *workload = container_of(wa_ctx,
+					struct intel_vgpu_workload,
+					wa_ctx);
+	int ring_id = workload->ring_id;
+	struct i915_gem_context *shadow_ctx = workload->vgpu->shadow_ctx;
 	struct drm_i915_gem_object *ctx_obj =
 		shadow_ctx->engine[ring_id].state->obj;
 	struct execlist_ring_context *shadow_ring_context;
@@ -680,15 +682,12 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 			CACHELINE_BYTES;
 		workload->wa_ctx.per_ctx.guest_gma =
 			per_ctx & PER_CTX_ADDR_MASK;
-		workload->wa_ctx.workload = workload;
 
 		WARN_ON(workload->wa_ctx.indirect_ctx.size && !(per_ctx & 0x1));
 	}
 
 	if (emulate_schedule_in)
-		memcpy(&workload->elsp_dwords,
-				&vgpu->execlist[ring_id].elsp_dwords,
-				sizeof(workload->elsp_dwords));
+		workload->elsp_dwords = vgpu->execlist[ring_id].elsp_dwords;
 
 	gvt_dbg_el("workload %p ring id %d head %x tail %x start %x ctl %x\n",
 			workload, ring_id, head, tail, start, ctl);
@@ -775,7 +774,8 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 			_EL_OFFSET_STATUS_PTR);
 
 	ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
-	ctx_status_ptr.read_ptr = ctx_status_ptr.write_ptr = 0x7;
+	ctx_status_ptr.read_ptr = 0;
+	ctx_status_ptr.write_ptr = 0x7;
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/firmware.c b/drivers/gpu/drm/i915/gvt/firmware.c
index 933a7c211a1c..dce8d15f706f 100644
--- a/drivers/gpu/drm/i915/gvt/firmware.c
+++ b/drivers/gpu/drm/i915/gvt/firmware.c
@@ -75,11 +75,11 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 	struct gvt_firmware_header *h;
 	void *firmware;
 	void *p;
-	unsigned long size;
+	unsigned long size, crc32_start;
 	int i;
 	int ret;
 
-	size = sizeof(*h) + info->mmio_size + info->cfg_space_size - 1;
+	size = sizeof(*h) + info->mmio_size + info->cfg_space_size;
 	firmware = vzalloc(size);
 	if (!firmware)
 		return -ENOMEM;
@@ -112,6 +112,9 @@ static int expose_firmware_sysfs(struct intel_gvt *gvt)
 
 	memcpy(gvt->firmware.mmio, p, info->mmio_size);
 
+	crc32_start = offsetof(struct gvt_firmware_header, crc32) + 4;
+	h->crc32 = crc32_le(0, firmware + crc32_start, size - crc32_start);
+
 	firmware_attr.size = size;
 	firmware_attr.private = firmware;
 
@@ -234,7 +237,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt)
 
 	firmware->mmio = mem;
 
-	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%04x.golden_hw_state",
+	sprintf(path, "%s/vid_0x%04x_did_0x%04x_rid_0x%02x.golden_hw_state",
 		 GVT_FIRMWARE_PATH, pdev->vendor, pdev->device,
 		 pdev->revision);
 
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index da7312715824..c6f0077f590d 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1837,11 +1837,15 @@ static int emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 		ret = gtt_entry_p2m(vgpu, &e, &m);
 		if (ret) {
 			gvt_vgpu_err("fail to translate guest gtt entry\n");
-			return ret;
+			/* guest driver may read/write the entry when partial
+			 * update the entry in this situation p2m will fail
+			 * settting the shadow entry to point to a scratch page
+			 */
+			ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
 		}
 	} else {
 		m = e;
-		m.val64 = 0;
+		ops->set_pfn(&m, gvt->gtt.scratch_ggtt_mfn);
 	}
 
 	ggtt_set_shadow_entry(ggtt_mm, &m, g_gtt_index);
@@ -2220,7 +2224,8 @@ int intel_gvt_init_gtt(struct intel_gvt *gvt)
 
 	gvt_dbg_core("init gtt\n");
 
-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		gvt->gtt.pte_ops = &gen8_gtt_pte_ops;
 		gvt->gtt.gma_ops = &gen8_gtt_gma_ops;
 		gvt->gtt.mm_alloc_page_table = gen8_mm_alloc_page_table;
@@ -2289,12 +2294,15 @@ void intel_gvt_clean_gtt(struct intel_gvt *gvt)
 void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
+	struct drm_i915_private *dev_priv = gvt->dev_priv;
 	struct intel_gvt_gtt_pte_ops *ops = vgpu->gvt->gtt.pte_ops;
 	u32 index;
 	u32 offset;
 	u32 num_entries;
 	struct intel_gvt_gtt_entry e;
 
+	intel_runtime_pm_get(dev_priv);
+
 	memset(&e, 0, sizeof(struct intel_gvt_gtt_entry));
 	e.type = GTT_TYPE_GGTT_PTE;
 	ops->set_pfn(&e, gvt->gtt.scratch_ggtt_mfn);
@@ -2309,6 +2317,8 @@ void intel_vgpu_reset_ggtt(struct intel_vgpu *vgpu)
 	num_entries = vgpu_hidden_sz(vgpu) >> PAGE_SHIFT;
 	for (offset = 0; offset < num_entries; offset++)
 		ops->set_entry(NULL, &e, index + offset, false, 0, vgpu);
+
+	intel_runtime_pm_put(dev_priv);
 }
 
 /**
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 3b9d59e457ba..7dea5e5d5567 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -52,6 +52,8 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_create = intel_gvt_create_vgpu,
 	.vgpu_destroy = intel_gvt_destroy_vgpu,
 	.vgpu_reset = intel_gvt_reset_vgpu,
+	.vgpu_activate = intel_gvt_activate_vgpu,
+	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
 };
 
 /**
@@ -106,7 +108,8 @@ static void init_device_info(struct intel_gvt *gvt)
 	struct intel_gvt_device_info *info = &gvt->device_info;
 	struct pci_dev *pdev = gvt->dev_priv->drm.pdev;
 
-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		info->max_support_vgpus = 8;
 		info->cfg_space_size = 256;
 		info->mmio_size = 2 * 1024 * 1024;
@@ -143,6 +146,11 @@ static int gvt_service_thread(void *data)
 			intel_gvt_emulate_vblank(gvt);
 			mutex_unlock(&gvt->lock);
 		}
+
+		if (test_and_clear_bit(INTEL_GVT_REQUEST_SCHED,
+					(void *)&gvt->service_request)) {
+			intel_gvt_schedule(gvt);
+		}
 	}
 
 	return 0;
@@ -196,6 +204,8 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
 
 	idr_destroy(&gvt->vgpu_idr);
 
+	intel_gvt_destroy_idle_vgpu(gvt->idle_vgpu);
+
 	kfree(dev_priv->gvt);
 	dev_priv->gvt = NULL;
 }
@@ -214,6 +224,7 @@ void intel_gvt_clean_device(struct drm_i915_private *dev_priv)
 int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 {
 	struct intel_gvt *gvt;
+	struct intel_vgpu *vgpu;
 	int ret;
 
 	/*
@@ -286,6 +297,14 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 		goto out_clean_types;
 	}
 
+	vgpu = intel_gvt_create_idle_vgpu(gvt);
+	if (IS_ERR(vgpu)) {
+		ret = PTR_ERR(vgpu);
+		gvt_err("failed to create idle vgpu\n");
+		goto out_clean_types;
+	}
+	gvt->idle_vgpu = vgpu;
+
 	gvt_dbg_core("gvt device initialization is done\n");
 	dev_priv->gvt = gvt;
 	return 0;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 6dfc48b63b71..930732e5c780 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -138,6 +138,10 @@ struct intel_vgpu_display {
 	struct intel_vgpu_sbi sbi;
 };
 
+struct vgpu_sched_ctl {
+	int weight;
+};
+
 struct intel_vgpu {
 	struct intel_gvt *gvt;
 	int id;
@@ -147,6 +151,7 @@ struct intel_vgpu {
 	bool failsafe;
 	bool resetting;
 	void *sched_data;
+	struct vgpu_sched_ctl sched_ctl;
 
 	struct intel_vgpu_fence fence;
 	struct intel_vgpu_gm gm;
@@ -160,6 +165,7 @@ struct intel_vgpu {
 	struct list_head workload_q_head[I915_NUM_ENGINES];
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
+	ktime_t last_ctx_submit_time;
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	struct i915_gem_context *shadow_ctx;
 
@@ -215,6 +221,7 @@ struct intel_vgpu_type {
 	unsigned int low_gm_size;
 	unsigned int high_gm_size;
 	unsigned int fence;
+	unsigned int weight;
 	enum intel_vgpu_edid resolution;
 };
 
@@ -236,6 +243,7 @@ struct intel_gvt {
 	DECLARE_HASHTABLE(cmd_table, GVT_CMD_HASH_BITS);
 	struct intel_vgpu_type *types;
 	unsigned int num_types;
+	struct intel_vgpu *idle_vgpu;
 
 	struct task_struct *service_thread;
 	wait_queue_head_t service_thread_wq;
@@ -249,6 +257,7 @@ static inline struct intel_gvt *to_gvt(struct drm_i915_private *i915)
 
 enum {
 	INTEL_GVT_REQUEST_EMULATE_VBLANK = 0,
+	INTEL_GVT_REQUEST_SCHED = 1,
 };
 
 static inline void intel_gvt_request_service(struct intel_gvt *gvt,
@@ -322,6 +331,8 @@ struct intel_vgpu_creation_params {
 	__u64 resolution;
 	__s32 primary;
 	__u64 vgpu_id;
+
+	__u32 weight;
 };
 
 int intel_vgpu_alloc_resource(struct intel_vgpu *vgpu,
@@ -376,13 +387,16 @@ static inline void intel_vgpu_write_pci_bar(struct intel_vgpu *vgpu,
 int intel_gvt_init_vgpu_types(struct intel_gvt *gvt);
 void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
 
+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
 struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 					 struct intel_vgpu_type *type);
 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
 				 unsigned int engine_mask);
 void intel_gvt_reset_vgpu(struct intel_vgpu *vgpu);
-
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu);
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu);
 
 /* validating GM functions */
 #define vgpu_gmadr_is_aperture(vgpu, gmadr) \
@@ -449,6 +463,8 @@ struct intel_gvt_ops {
 				struct intel_vgpu_type *);
 	void (*vgpu_destroy)(struct intel_vgpu *);
 	void (*vgpu_reset)(struct intel_vgpu *);
+	void (*vgpu_activate)(struct intel_vgpu *);
+	void (*vgpu_deactivate)(struct intel_vgpu *);
 };
 
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index eaff45d417e8..0ad1a508e2af 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -68,6 +68,8 @@ unsigned long intel_gvt_get_device_type(struct intel_gvt *gvt)
 		return D_BDW;
 	else if (IS_SKYLAKE(gvt->dev_priv))
 		return D_SKL;
+	else if (IS_KABYLAKE(gvt->dev_priv))
+		return D_KBL;
 
 	return 0;
 }
@@ -234,7 +236,8 @@ static int mul_force_wake_write(struct intel_vgpu *vgpu,
 	old = vgpu_vreg(vgpu, offset);
 	new = CALC_MODE_MASK_REG(old, *(u32 *)p_data);
 
-	if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 		switch (offset) {
 		case FORCEWAKE_RENDER_GEN9_REG:
 			ack_reg_offset = FORCEWAKE_ACK_RENDER_GEN9_REG;
@@ -823,8 +826,9 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	write_vreg(vgpu, offset, p_data, bytes);
 	data = vgpu_vreg(vgpu, offset);
 
-	if (IS_SKYLAKE(vgpu->gvt->dev_priv) &&
-	    offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) {
+	if ((IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv))
+		&& offset != _REG_SKL_DP_AUX_CH_CTL(port_index)) {
 		/* SKL DPB/C/D aux ctl register changed */
 		return 0;
 	} else if (IS_BROADWELL(vgpu->gvt->dev_priv) &&
@@ -970,6 +974,14 @@ static int dp_aux_ch_ctl_mmio_write(struct intel_vgpu *vgpu,
 	return 0;
 }
 
+static int mbctl_write(struct intel_vgpu *vgpu, unsigned int offset,
+		void *p_data, unsigned int bytes)
+{
+	*(u32 *)p_data &= (~GEN6_MBCTL_ENABLE_BOOT_FETCH);
+	write_vreg(vgpu, offset, p_data, bytes);
+	return 0;
+}
+
 static int vga_control_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
@@ -1303,7 +1315,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 
 	switch (cmd) {
 	case GEN9_PCODE_READ_MEM_LATENCY:
-		if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+			 || IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 			/**
 			 * "Read memory latency" command on gen9.
 			 * Below memory latency values are read
@@ -1316,7 +1329,8 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 		}
 		break;
 	case SKL_PCODE_CDCLK_CONTROL:
-		if (IS_SKYLAKE(vgpu->gvt->dev_priv))
+		if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+			 || IS_KABYLAKE(vgpu->gvt->dev_priv))
 			*data0 = SKL_CDCLK_READY_FOR_CHANGE;
 		break;
 	case GEN6_PCODE_READ_RC6VIDS:
@@ -1410,6 +1424,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 
 	execlist->elsp_dwords.data[execlist->elsp_dwords.index] = data;
 	if (execlist->elsp_dwords.index == 3) {
+		vgpu->last_ctx_submit_time = ktime_get();
 		ret = intel_vgpu_submit_execlist(vgpu, ring_id);
 		if(ret)
 			gvt_vgpu_err("fail submit workload on ring %d\n",
@@ -2238,7 +2253,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(0x7180, D_ALL);
 	MMIO_D(0x7408, D_ALL);
 	MMIO_D(0x7c00, D_ALL);
-	MMIO_D(GEN6_MBCTL, D_ALL);
+	MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
 	MMIO_D(0x911c, D_ALL);
 	MMIO_D(0x9120, D_ALL);
 	MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -2584,219 +2599,232 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);
 
-	MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL, NULL, dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
+	MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+						dp_aux_ch_ctl_mmio_write);
 
-	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL);
-	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL, NULL, skl_power_well_ctl_write);
+	MMIO_D(HSW_PWR_WELL_BIOS, D_SKL_PLUS);
+	MMIO_DH(HSW_PWR_WELL_DRIVER, D_SKL_PLUS, NULL,
+						skl_power_well_ctl_write);
+	MMIO_DH(GEN6_PCODE_MAILBOX, D_SKL_PLUS, NULL, mailbox_write);
 
 	MMIO_D(0xa210, D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DH(0x4ddc, D_SKL, NULL, skl_misc_ctl_write);
-	MMIO_DH(0x42080, D_SKL, NULL, skl_misc_ctl_write);
-	MMIO_D(0x45504, D_SKL);
-	MMIO_D(0x45520, D_SKL);
-	MMIO_D(0x46000, D_SKL);
-	MMIO_DH(0x46010, D_SKL, NULL, skl_lcpll_write);
-	MMIO_DH(0x46014, D_SKL, NULL, skl_lcpll_write);
-	MMIO_D(0x6C040, D_SKL);
-	MMIO_D(0x6C048, D_SKL);
-	MMIO_D(0x6C050, D_SKL);
-	MMIO_D(0x6C044, D_SKL);
-	MMIO_D(0x6C04C, D_SKL);
-	MMIO_D(0x6C054, D_SKL);
-	MMIO_D(0x6c058, D_SKL);
-	MMIO_D(0x6c05c, D_SKL);
-	MMIO_DH(0X6c060, D_SKL, dpll_status_read, NULL);
-
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL, NULL, pf_write);
-
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL, NULL, pf_write);
-
-	MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL, NULL, pf_write);
-	MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL, NULL, pf_write);
-
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL, NULL, NULL);
-
-	MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-
-	MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-
-	MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-
-	MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL, NULL, NULL);
-
-	MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL, NULL, NULL);
-	MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL, NULL, NULL);
-
-	MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL, NULL, NULL);
-
-	MMIO_D(0x70380, D_SKL);
-	MMIO_D(0x71380, D_SKL);
-	MMIO_D(0x72380, D_SKL);
-	MMIO_D(0x7039c, D_SKL);
-
-	MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL, NULL, NULL);
-	MMIO_D(0x8f074, D_SKL);
-	MMIO_D(0x8f004, D_SKL);
-	MMIO_D(0x8f034, D_SKL);
-
-	MMIO_D(0xb11c, D_SKL);
-
-	MMIO_D(0x51000, D_SKL);
-	MMIO_D(0x6c00c, D_SKL);
-
-	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
-	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL, NULL, NULL);
-
-	MMIO_D(0xd08, D_SKL);
-	MMIO_DFH(0x20e0, D_SKL, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x20ec, D_SKL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, skl_misc_ctl_write);
+	MMIO_DH(0x42080, D_SKL_PLUS, NULL, skl_misc_ctl_write);
+	MMIO_D(0x45504, D_SKL_PLUS);
+	MMIO_D(0x45520, D_SKL_PLUS);
+	MMIO_D(0x46000, D_SKL_PLUS);
+	MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_D(0x6C040, D_SKL | D_KBL);
+	MMIO_D(0x6C048, D_SKL | D_KBL);
+	MMIO_D(0x6C050, D_SKL | D_KBL);
+	MMIO_D(0x6C044, D_SKL | D_KBL);
+	MMIO_D(0x6C04C, D_SKL | D_KBL);
+	MMIO_D(0x6C054, D_SKL | D_KBL);
+	MMIO_D(0x6c058, D_SKL | D_KBL);
+	MMIO_D(0x6c05c, D_SKL | D_KBL);
+	MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL);
+
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_POS(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_WIN_SZ(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_B, 1), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 0), D_SKL_PLUS, NULL, pf_write);
+	MMIO_DH(SKL_PS_CTRL(PIPE_C, 1), D_SKL_PLUS, NULL, pf_write);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(CUR_BUF_CFG(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_BUF_CFG(PIPE_C), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_F(PLANE_WM(PIPE_A, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_A, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_A, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+
+	MMIO_F(PLANE_WM(PIPE_B, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_B, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_B, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+
+	MMIO_F(PLANE_WM(PIPE_C, 0, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_C, 1, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(PLANE_WM(PIPE_C, 2, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+
+	MMIO_F(CUR_WM(PIPE_A, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(CUR_WM(PIPE_B, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_F(CUR_WM(PIPE_C, 0), 4 * 8, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_WM_TRANS(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(CUR_WM_TRANS(PIPE_A), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_B), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(CUR_WM_TRANS(PIPE_C), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 0), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+
+	MMIO_D(0x70380, D_SKL_PLUS);
+	MMIO_D(0x71380, D_SKL_PLUS);
+	MMIO_D(0x72380, D_SKL_PLUS);
+	MMIO_D(0x7039c, D_SKL_PLUS);
+
+	MMIO_F(0x80000, 0x3000, 0, 0, 0, D_SKL_PLUS, NULL, NULL);
+	MMIO_D(0x8f074, D_SKL | D_KBL);
+	MMIO_D(0x8f004, D_SKL | D_KBL);
+	MMIO_D(0x8f034, D_SKL | D_KBL);
+
+	MMIO_D(0xb11c, D_SKL | D_KBL);
+
+	MMIO_D(0x51000, D_SKL | D_KBL);
+	MMIO_D(0x6c00c, D_SKL_PLUS);
+
+	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+
+	MMIO_D(0xd08, D_SKL_PLUS);
+	MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */
-	MMIO_DFH(0x4de0, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de4, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de8, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4dec, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df0, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df4, D_SKL, F_CMD_ACCESS, NULL, gen9_trtte_write);
-	MMIO_DH(0x4dfc, D_SKL, NULL, gen9_trtt_chicken_write);
+	MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
+	MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);
 
-	MMIO_D(0x45008, D_SKL);
+	MMIO_D(0x45008, D_SKL | D_KBL);
 
-	MMIO_D(0x46430, D_SKL);
+	MMIO_D(0x46430, D_SKL | D_KBL);
 
-	MMIO_D(0x46520, D_SKL);
+	MMIO_D(0x46520, D_SKL | D_KBL);
 
-	MMIO_D(0xc403c, D_SKL);
-	MMIO_D(0xb004, D_SKL);
+	MMIO_D(0xc403c, D_SKL | D_KBL);
+	MMIO_D(0xb004, D_SKL_PLUS);
 	MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);
 
-	MMIO_D(0x65900, D_SKL);
-	MMIO_D(0x1082c0, D_SKL);
-	MMIO_D(0x4068, D_SKL);
-	MMIO_D(0x67054, D_SKL);
-	MMIO_D(0x6e560, D_SKL);
-	MMIO_D(0x6e554, D_SKL);
-	MMIO_D(0x2b20, D_SKL);
-	MMIO_D(0x65f00, D_SKL);
-	MMIO_D(0x65f08, D_SKL);
-	MMIO_D(0x320f0, D_SKL);
-
-	MMIO_DFH(_REG_VCS2_EXCC, D_SKL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0x70034, D_SKL);
-	MMIO_D(0x71034, D_SKL);
-	MMIO_D(0x72034, D_SKL);
-
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL);
-
-	MMIO_D(0x44500, D_SKL);
+	MMIO_D(0x65900, D_SKL_PLUS);
+	MMIO_D(0x1082c0, D_SKL | D_KBL);
+	MMIO_D(0x4068, D_SKL | D_KBL);
+	MMIO_D(0x67054, D_SKL | D_KBL);
+	MMIO_D(0x6e560, D_SKL | D_KBL);
+	MMIO_D(0x6e554, D_SKL | D_KBL);
+	MMIO_D(0x2b20, D_SKL | D_KBL);
+	MMIO_D(0x65f00, D_SKL | D_KBL);
+	MMIO_D(0x65f08, D_SKL | D_KBL);
+	MMIO_D(0x320f0, D_SKL | D_KBL);
+
+	MMIO_DFH(_REG_VCS2_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_REG_VECS_EXCC, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(0x70034, D_SKL_PLUS);
+	MMIO_D(0x71034, D_SKL_PLUS);
+	MMIO_D(0x72034, D_SKL_PLUS);
+
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS);
+	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS);
+
+	MMIO_D(0x44500, D_SKL_PLUS);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL, F_MODE_MASK | F_CMD_ACCESS,
+	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
+
+	MMIO_D(0x4ab8, D_KBL);
+	MMIO_D(0x940c, D_SKL_PLUS);
+	MMIO_D(0x2248, D_SKL_PLUS | D_KBL);
+	MMIO_D(0x4ab0, D_SKL | D_KBL);
+	MMIO_D(0x20d4, D_SKL | D_KBL);
+
 	return 0;
 }
 
@@ -2873,7 +2901,8 @@ int intel_gvt_setup_mmio_info(struct intel_gvt *gvt)
 		ret = init_broadwell_mmio_info(gvt);
 		if (ret)
 			goto err;
-	} else if (IS_SKYLAKE(dev_priv)) {
+	} else if (IS_SKYLAKE(dev_priv)
+		|| IS_KABYLAKE(dev_priv)) {
 		ret = init_broadwell_mmio_info(gvt);
 		if (ret)
 			goto err;
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
index 92bb247e3478..9d6812f0957f 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -580,7 +580,7 @@ static void gen8_init_irq(
 
 		SET_BIT_INFO(irq, 4, PRIMARY_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
 		SET_BIT_INFO(irq, 5, SPRITE_C_FLIP_DONE, INTEL_GVT_IRQ_INFO_DE_PIPE_C);
-	} else if (IS_SKYLAKE(gvt->dev_priv)) {
+	} else if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv)) {
 		SET_BIT_INFO(irq, 25, AUX_CHANNEL_B, INTEL_GVT_IRQ_INFO_DE_PORT);
 		SET_BIT_INFO(irq, 26, AUX_CHANNEL_C, INTEL_GVT_IRQ_INFO_DE_PORT);
 		SET_BIT_INFO(irq, 27, AUX_CHANNEL_D, INTEL_GVT_IRQ_INFO_DE_PORT);
@@ -690,7 +690,8 @@ int intel_gvt_init_irq(struct intel_gvt *gvt)
 
 	gvt_dbg_core("init irq framework\n");
 
-	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)) {
+	if (IS_BROADWELL(gvt->dev_priv) || IS_SKYLAKE(gvt->dev_priv)
+		|| IS_KABYLAKE(gvt->dev_priv)) {
 		irq->ops = &gen8_irq_ops;
 		irq->irq_map = gen8_irq_map;
 	} else {
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 1ea3eb270de8..1ae0b4083ce1 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -295,10 +295,12 @@ static ssize_t description_show(struct kobject *kobj, struct device *dev,
 		return 0;
 
 	return sprintf(buf, "low_gm_size: %dMB\nhigh_gm_size: %dMB\n"
-		       "fence: %d\nresolution: %s\n",
+		       "fence: %d\nresolution: %s\n"
+		       "weight: %d\n",
 		       BYTES_TO_MB(type->low_gm_size),
 		       BYTES_TO_MB(type->high_gm_size),
-		       type->fence, vgpu_edid_str(type->resolution));
+		       type->fence, vgpu_edid_str(type->resolution),
+		       type->weight);
 }
 
 static MDEV_TYPE_ATTR_RO(available_instances);
@@ -544,6 +546,8 @@ static int intel_vgpu_open(struct mdev_device *mdev)
 	if (ret)
 		goto undo_group;
 
+	intel_gvt_ops->vgpu_activate(vgpu);
+
 	atomic_set(&vgpu->vdev.released, 0);
 	return ret;
 
@@ -569,6 +573,8 @@ static void __intel_vgpu_release(struct intel_vgpu *vgpu)
 	if (atomic_cmpxchg(&vgpu->vdev.released, 0, 1))
 		return;
 
+	intel_gvt_ops->vgpu_deactivate(vgpu);
+
 	ret = vfio_unregister_notifier(mdev_dev(vgpu->vdev.mdev), VFIO_IOMMU_NOTIFY,
 					&vgpu->vdev.iommu_notifier);
 	WARN(ret, "vfio_unregister_notifier for iommu failed: %d\n", ret);
@@ -1146,8 +1152,40 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 	return 0;
 }
 
+static ssize_t
+vgpu_id_show(struct device *dev, struct device_attribute *attr,
+	     char *buf)
+{
+	struct mdev_device *mdev = mdev_from_dev(dev);
+
+	if (mdev) {
+		struct intel_vgpu *vgpu = (struct intel_vgpu *)
+			mdev_get_drvdata(mdev);
+		return sprintf(buf, "%d\n", vgpu->id);
+	}
+	return sprintf(buf, "\n");
+}
+
+static DEVICE_ATTR_RO(vgpu_id);
+
+static struct attribute *intel_vgpu_attrs[] = {
+	&dev_attr_vgpu_id.attr,
+	NULL
+};
+
+static const struct attribute_group intel_vgpu_group = {
+	.name = "intel_vgpu",
+	.attrs = intel_vgpu_attrs,
+};
+
+static const struct attribute_group *intel_vgpu_groups[] = {
+	&intel_vgpu_group,
+	NULL,
+};
+
 static const struct mdev_parent_ops intel_vgpu_ops = {
 	.supported_type_groups	= intel_vgpu_type_groups,
+	.mdev_attr_groups       = intel_vgpu_groups,
 	.create			= intel_vgpu_create,
 	.remove			= intel_vgpu_remove,
 
@@ -1326,6 +1364,7 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 	vgpu->handle = (unsigned long)info;
 	info->vgpu = vgpu;
 	info->kvm = kvm;
+	kvm_get_kvm(info->kvm);
 
 	kvmgt_protect_table_init(info);
 	gvt_cache_init(vgpu);
@@ -1339,14 +1378,8 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 
 static bool kvmgt_guest_exit(struct kvmgt_guest_info *info)
 {
-	struct intel_vgpu *vgpu = info->vgpu;
-
-	if (!info) {
-		gvt_vgpu_err("kvmgt_guest_info invalid\n");
-		return false;
-	}
-
 	kvm_page_track_unregister_notifier(info->kvm, &info->track_node);
+	kvm_put_kvm(info->kvm);
 	kvmgt_protect_table_destroy(info);
 	gvt_cache_destroy(info->vgpu);
 	vfree(info);
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index a3a027025cd0..7edd66f38ef9 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -44,20 +44,21 @@ struct intel_vgpu;
 #define D_HSW   (1 << 2)
 #define D_BDW   (1 << 3)
 #define D_SKL	(1 << 4)
+#define D_KBL	(1 << 5)
 
-#define D_GEN9PLUS	(D_SKL)
-#define D_GEN8PLUS	(D_BDW | D_SKL)
-#define D_GEN75PLUS	(D_HSW | D_BDW | D_SKL)
-#define D_GEN7PLUS	(D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_GEN9PLUS	(D_SKL | D_KBL)
+#define D_GEN8PLUS	(D_BDW | D_SKL | D_KBL)
+#define D_GEN75PLUS	(D_HSW | D_BDW | D_SKL | D_KBL)
+#define D_GEN7PLUS	(D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
 
-#define D_SKL_PLUS	(D_SKL)
-#define D_BDW_PLUS	(D_BDW | D_SKL)
-#define D_HSW_PLUS	(D_HSW | D_BDW | D_SKL)
-#define D_IVB_PLUS	(D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_SKL_PLUS	(D_SKL | D_KBL)
+#define D_BDW_PLUS	(D_BDW | D_SKL | D_KBL)
+#define D_HSW_PLUS	(D_HSW | D_BDW | D_SKL | D_KBL)
+#define D_IVB_PLUS	(D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
 
 #define D_PRE_BDW	(D_SNB | D_IVB | D_HSW)
 #define D_PRE_SKL	(D_SNB | D_IVB | D_HSW | D_BDW)
-#define D_ALL		(D_SNB | D_IVB | D_HSW | D_BDW | D_SKL)
+#define D_ALL		(D_SNB | D_IVB | D_HSW | D_BDW | D_SKL | D_KBL)
 
 struct intel_gvt_mmio_info {
 	u32 offset;
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
index 95ee091ce085..c6e7972ac21d 100644
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ b/drivers/gpu/drm/i915/gvt/render.c
@@ -44,7 +44,7 @@ struct render_mmio {
 	u32 value;
 };
 
-static struct render_mmio gen8_render_mmio_list[] = {
+static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = {
 	{RCS, _MMIO(0x229c), 0xffff, false},
 	{RCS, _MMIO(0x2248), 0x0, false},
 	{RCS, _MMIO(0x2098), 0x0, false},
@@ -75,7 +75,7 @@ static struct render_mmio gen8_render_mmio_list[] = {
 	{BCS, _MMIO(0x22028), 0x0, false},
 };
 
-static struct render_mmio gen9_render_mmio_list[] = {
+static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = {
 	{RCS, _MMIO(0x229c), 0xffff, false},
 	{RCS, _MMIO(0x2248), 0x0, false},
 	{RCS, _MMIO(0x2098), 0x0, false},
@@ -126,6 +126,18 @@ static struct render_mmio gen9_render_mmio_list[] = {
 	{VCS2, _MMIO(0x1c028), 0xffff, false},
 
 	{VECS, _MMIO(0x1a028), 0xffff, false},
+
+	{RCS, _MMIO(0x7304), 0xffff, true},
+	{RCS, _MMIO(0x2248), 0x0, false},
+	{RCS, _MMIO(0x940c), 0x0, false},
+	{RCS, _MMIO(0x4ab8), 0x0, false},
+
+	{RCS, _MMIO(0x4ab0), 0x0, false},
+	{RCS, _MMIO(0x20d4), 0x0, false},
+
+	{RCS, _MMIO(0xb004), 0x0, false},
+	{RCS, _MMIO(0x20a0), 0x0, false},
+	{RCS, _MMIO(0x20e4), 0xffff, false},
 };
 
 static u32 gen9_render_mocs[I915_NUM_ENGINES][64];
@@ -159,7 +171,7 @@ static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
 	 */
 	fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
 					    FW_REG_READ | FW_REG_WRITE);
-	if (ring_id == RCS && IS_SKYLAKE(dev_priv))
+	if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
 		fw |= FORCEWAKE_RENDER;
 
 	intel_uncore_forcewake_get(dev_priv, fw);
@@ -192,9 +204,6 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;
 
-	if (!IS_SKYLAKE(dev_priv))
-		return;
-
 	offset.reg = regs[ring_id];
 	for (i = 0; i < 64; i++) {
 		gen9_render_mocs[ring_id][i] = I915_READ(offset);
@@ -207,7 +216,7 @@ static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
 		l3_offset.reg = 0xb020;
 		for (i = 0; i < 32; i++) {
 			gen9_render_mocs_L3[i] = I915_READ(l3_offset);
-			I915_WRITE(l3_offset, vgpu_vreg(vgpu, offset));
+			I915_WRITE(l3_offset, vgpu_vreg(vgpu, l3_offset));
 			POSTING_READ(l3_offset);
 			l3_offset.reg += 4;
 		}
@@ -230,9 +239,6 @@ static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
 	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
 		return;
 
-	if (!IS_SKYLAKE(dev_priv))
-		return;
-
 	offset.reg = regs[ring_id];
 	for (i = 0; i < 64; i++) {
 		vgpu_vreg(vgpu, offset) = I915_READ(offset);
@@ -265,7 +271,8 @@ void intel_gvt_load_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 	u32 inhibit_mask =
 		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
 
-	if (IS_SKYLAKE(vgpu->gvt->dev_priv)) {
+	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
+		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
 		mmio = gen9_render_mmio_list;
 		array_size = ARRAY_SIZE(gen9_render_mmio_list);
 		load_mocs(vgpu, ring_id);
@@ -312,7 +319,7 @@ void intel_gvt_restore_render_mmio(struct intel_vgpu *vgpu, int ring_id)
 	u32 v;
 	int i, array_size;
 
-	if (IS_SKYLAKE(dev_priv)) {
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
 		mmio = gen9_render_mmio_list;
 		array_size = ARRAY_SIZE(gen9_render_mmio_list);
 		restore_mocs(vgpu, ring_id);
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 34b9acdf3479..79ba4b3440aa 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -47,19 +47,92 @@ static bool vgpu_has_pending_workload(struct intel_vgpu *vgpu)
 	return false;
 }
 
+struct vgpu_sched_data {
+	struct list_head lru_list;
+	struct intel_vgpu *vgpu;
+
+	ktime_t sched_in_time;
+	ktime_t sched_out_time;
+	ktime_t sched_time;
+	ktime_t left_ts;
+	ktime_t allocated_ts;
+
+	struct vgpu_sched_ctl sched_ctl;
+};
+
+struct gvt_sched_data {
+	struct intel_gvt *gvt;
+	struct hrtimer timer;
+	unsigned long period;
+	struct list_head lru_runq_head;
+};
+
+static void vgpu_update_timeslice(struct intel_vgpu *pre_vgpu)
+{
+	ktime_t delta_ts;
+	struct vgpu_sched_data *vgpu_data = pre_vgpu->sched_data;
+
+	delta_ts = vgpu_data->sched_out_time - vgpu_data->sched_in_time;
+
+	vgpu_data->sched_time += delta_ts;
+	vgpu_data->left_ts -= delta_ts;
+}
+
+#define GVT_TS_BALANCE_PERIOD_MS 100
+#define GVT_TS_BALANCE_STAGE_NUM 10
+
+static void gvt_balance_timeslice(struct gvt_sched_data *sched_data)
+{
+	struct vgpu_sched_data *vgpu_data;
+	struct list_head *pos;
+	static uint64_t stage_check;
+	int stage = stage_check++ % GVT_TS_BALANCE_STAGE_NUM;
+
+	/* The timeslice accumulation reset at stage 0, which is
+	 * allocated again without adding previous debt.
+	 */
+	if (stage == 0) {
+		int total_weight = 0;
+		ktime_t fair_timeslice;
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			total_weight += vgpu_data->sched_ctl.weight;
+		}
+
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+			fair_timeslice = ms_to_ktime(GVT_TS_BALANCE_PERIOD_MS) *
+						vgpu_data->sched_ctl.weight /
+						total_weight;
+
+			vgpu_data->allocated_ts = fair_timeslice;
+			vgpu_data->left_ts = vgpu_data->allocated_ts;
+		}
+	} else {
+		list_for_each(pos, &sched_data->lru_runq_head) {
+			vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
+
+			/* timeslice for next 100ms should add the left/debt
+			 * slice of previous stages.
+			 */
+			vgpu_data->left_ts += vgpu_data->allocated_ts;
+		}
+	}
+}
+
 static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
 	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
+	struct vgpu_sched_data *vgpu_data;
+	ktime_t cur_time;
 
 	/* no target to schedule */
 	if (!scheduler->next_vgpu)
 		return;
 
-	gvt_dbg_sched("try to schedule next vgpu %d\n",
-			scheduler->next_vgpu->id);
-
 	/*
 	 * after the flag is set, workload dispatch thread will
 	 * stop dispatching workload for current vgpu
@@ -68,14 +141,18 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 
 	/* still have uncompleted workload? */
 	for_each_engine(engine, gvt->dev_priv, i) {
-		if (scheduler->current_workload[i]) {
-			gvt_dbg_sched("still have running workload\n");
+		if (scheduler->current_workload[i])
 			return;
-		}
 	}
 
-	gvt_dbg_sched("switch to next vgpu %d\n",
-			scheduler->next_vgpu->id);
+	cur_time = ktime_get();
+	if (scheduler->current_vgpu) {
+		vgpu_data = scheduler->current_vgpu->sched_data;
+		vgpu_data->sched_out_time = cur_time;
+		vgpu_update_timeslice(scheduler->current_vgpu);
+	}
+	vgpu_data = scheduler->next_vgpu->sched_data;
+	vgpu_data->sched_in_time = cur_time;
 
 	/* switch current vgpu */
 	scheduler->current_vgpu = scheduler->next_vgpu;
@@ -88,97 +165,106 @@ static void try_to_schedule_next_vgpu(struct intel_gvt *gvt)
 		wake_up(&scheduler->waitq[i]);
 }
 
-struct tbs_vgpu_data {
-	struct list_head list;
-	struct intel_vgpu *vgpu;
-	/* put some per-vgpu sched stats here */
-};
-
-struct tbs_sched_data {
-	struct intel_gvt *gvt;
-	struct delayed_work work;
-	unsigned long period;
-	struct list_head runq_head;
-};
-
-#define GVT_DEFAULT_TIME_SLICE (msecs_to_jiffies(1))
-
-static void tbs_sched_func(struct work_struct *work)
+static struct intel_vgpu *find_busy_vgpu(struct gvt_sched_data *sched_data)
 {
-	struct tbs_sched_data *sched_data = container_of(work,
-			struct tbs_sched_data, work.work);
-	struct tbs_vgpu_data *vgpu_data;
-
-	struct intel_gvt *gvt = sched_data->gvt;
-	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
-
+	struct vgpu_sched_data *vgpu_data;
 	struct intel_vgpu *vgpu = NULL;
-	struct list_head *pos, *head;
-
-	mutex_lock(&gvt->lock);
-
-	/* no vgpu or has already had a target */
-	if (list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-		goto out;
-
-	if (scheduler->current_vgpu) {
-		vgpu_data = scheduler->current_vgpu->sched_data;
-		head = &vgpu_data->list;
-	} else {
-		head = &sched_data->runq_head;
-	}
+	struct list_head *head = &sched_data->lru_runq_head;
+	struct list_head *pos;
 
 	/* search a vgpu with pending workload */
 	list_for_each(pos, head) {
-		if (pos == &sched_data->runq_head)
-			continue;
 
-		vgpu_data = container_of(pos, struct tbs_vgpu_data, list);
+		vgpu_data = container_of(pos, struct vgpu_sched_data, lru_list);
 		if (!vgpu_has_pending_workload(vgpu_data->vgpu))
 			continue;
 
-		vgpu = vgpu_data->vgpu;
-		break;
+		/* Return the vGPU only if it has time slice left */
+		if (vgpu_data->left_ts > 0) {
+			vgpu = vgpu_data->vgpu;
+			break;
+		}
 	}
 
+	return vgpu;
+}
+
+/* in nanosecond */
+#define GVT_DEFAULT_TIME_SLICE 1000000
+
+static void tbs_sched_func(struct gvt_sched_data *sched_data)
+{
+	struct intel_gvt *gvt = sched_data->gvt;
+	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
+	struct vgpu_sched_data *vgpu_data;
+	struct intel_vgpu *vgpu = NULL;
+	static uint64_t timer_check;
+
+	if (!(timer_check++ % GVT_TS_BALANCE_PERIOD_MS))
+		gvt_balance_timeslice(sched_data);
+
+	/* no active vgpu or has already had a target */
+	if (list_empty(&sched_data->lru_runq_head) || scheduler->next_vgpu)
+		goto out;
+
+	vgpu = find_busy_vgpu(sched_data);
 	if (vgpu) {
 		scheduler->next_vgpu = vgpu;
-		gvt_dbg_sched("pick next vgpu %d\n", vgpu->id);
+
+		/* Move the last used vGPU to the tail of lru_list */
+		vgpu_data = vgpu->sched_data;
+		list_del_init(&vgpu_data->lru_list);
+		list_add_tail(&vgpu_data->lru_list,
+				&sched_data->lru_runq_head);
+	} else {
+		scheduler->next_vgpu = gvt->idle_vgpu;
 	}
 out:
-	if (scheduler->next_vgpu) {
-		gvt_dbg_sched("try to schedule next vgpu %d\n",
-				scheduler->next_vgpu->id);
+	if (scheduler->next_vgpu)
 		try_to_schedule_next_vgpu(gvt);
-	}
+}
 
-	/*
-	 * still have vgpu on runq
-	 * or last schedule haven't finished due to running workload
-	 */
-	if (!list_empty(&sched_data->runq_head) || scheduler->next_vgpu)
-		schedule_delayed_work(&sched_data->work, sched_data->period);
+void intel_gvt_schedule(struct intel_gvt *gvt)
+{
+	struct gvt_sched_data *sched_data = gvt->scheduler.sched_data;
 
+	mutex_lock(&gvt->lock);
+	tbs_sched_func(sched_data);
 	mutex_unlock(&gvt->lock);
 }
 
+static enum hrtimer_restart tbs_timer_fn(struct hrtimer *timer_data)
+{
+	struct gvt_sched_data *data;
+
+	data = container_of(timer_data, struct gvt_sched_data, timer);
+
+	intel_gvt_request_service(data->gvt, INTEL_GVT_REQUEST_SCHED);
+
+	hrtimer_add_expires_ns(&data->timer, data->period);
+
+	return HRTIMER_RESTART;
+}
+
 static int tbs_sched_init(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler =
 		&gvt->scheduler;
 
-	struct tbs_sched_data *data;
+	struct gvt_sched_data *data;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&data->runq_head);
-	INIT_DELAYED_WORK(&data->work, tbs_sched_func);
+	INIT_LIST_HEAD(&data->lru_runq_head);
+	hrtimer_init(&data->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
+	data->timer.function = tbs_timer_fn;
 	data->period = GVT_DEFAULT_TIME_SLICE;
 	data->gvt = gvt;
 
 	scheduler->sched_data = data;
+
 	return 0;
 }
 
@@ -186,25 +272,28 @@ static void tbs_sched_clean(struct intel_gvt *gvt)
 {
 	struct intel_gvt_workload_scheduler *scheduler =
 		&gvt->scheduler;
-	struct tbs_sched_data *data = scheduler->sched_data;
+	struct gvt_sched_data *data = scheduler->sched_data;
+
+	hrtimer_cancel(&data->timer);
 
-	cancel_delayed_work(&data->work);
 	kfree(data);
 	scheduler->sched_data = NULL;
 }
 
 static int tbs_sched_init_vgpu(struct intel_vgpu *vgpu)
 {
-	struct tbs_vgpu_data *data;
+	struct vgpu_sched_data *data;
 
 	data = kzalloc(sizeof(*data), GFP_KERNEL);
 	if (!data)
 		return -ENOMEM;
 
+	data->sched_ctl.weight = vgpu->sched_ctl.weight;
 	data->vgpu = vgpu;
-	INIT_LIST_HEAD(&data->list);
+	INIT_LIST_HEAD(&data->lru_list);
 
 	vgpu->sched_data = data;
+
 	return 0;
 }
 
@@ -216,21 +305,24 @@ static void tbs_sched_clean_vgpu(struct intel_vgpu *vgpu)
 
 static void tbs_sched_start_schedule(struct intel_vgpu *vgpu)
 {
-	struct tbs_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
-	struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+	struct gvt_sched_data *sched_data = vgpu->gvt->scheduler.sched_data;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
 
-	if (!list_empty(&vgpu_data->list))
+	if (!list_empty(&vgpu_data->lru_list))
 		return;
 
-	list_add_tail(&vgpu_data->list, &sched_data->runq_head);
-	schedule_delayed_work(&sched_data->work, 0);
+	list_add_tail(&vgpu_data->lru_list, &sched_data->lru_runq_head);
+
+	if (!hrtimer_active(&sched_data->timer))
+		hrtimer_start(&sched_data->timer, ktime_add_ns(ktime_get(),
+			sched_data->period), HRTIMER_MODE_ABS);
 }
 
 static void tbs_sched_stop_schedule(struct intel_vgpu *vgpu)
 {
-	struct tbs_vgpu_data *vgpu_data = vgpu->sched_data;
+	struct vgpu_sched_data *vgpu_data = vgpu->sched_data;
 
-	list_del_init(&vgpu_data->list);
+	list_del_init(&vgpu_data->lru_list);
 }
 
 static struct intel_gvt_sched_policy_ops tbs_schedule_ops = {
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h
index bb8b9097e41a..ba00a5f7455f 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.h
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.h
@@ -43,6 +43,8 @@ struct intel_gvt_sched_policy_ops {
 	void (*stop_schedule)(struct intel_vgpu *vgpu);
 };
 
+void intel_gvt_schedule(struct intel_gvt *gvt);
+
 int intel_gvt_init_sched_policy(struct intel_gvt *gvt);
 
 void intel_gvt_clean_sched_policy(struct intel_gvt *gvt);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index ad8876bd15b3..bada32b33237 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -127,6 +127,11 @@ static int populate_shadow_context(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
+static inline bool is_gvt_request(struct drm_i915_gem_request *req)
+{
+	return i915_gem_context_force_single_submission(req->ctx);
+}
+
 static int shadow_context_status_change(struct notifier_block *nb,
 		unsigned long action, void *data)
 {
@@ -137,7 +142,7 @@ static int shadow_context_status_change(struct notifier_block *nb,
 	struct intel_vgpu_workload *workload =
 		scheduler->current_workload[req->engine->id];
 
-	if (unlikely(!workload))
+	if (!is_gvt_request(req) || unlikely(!workload))
 		return NOTIFY_OK;
 
 	switch (action) {
@@ -274,11 +279,8 @@ static struct intel_vgpu_workload *pick_next_workload(
 		goto out;
 	}
 
-	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id))) {
-		gvt_dbg_sched("ring id %d stop - no available workload\n",
-				ring_id);
+	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
 		goto out;
-	}
 
 	/*
 	 * still have current workload, maybe the workload disptacher
@@ -448,7 +450,8 @@ static int workload_thread(void *priv)
 	struct intel_vgpu_workload *workload = NULL;
 	struct intel_vgpu *vgpu = NULL;
 	int ret;
-	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv);
+	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
+			|| IS_KABYLAKE(gvt->dev_priv);
 	DEFINE_WAIT_FUNC(wait, woken_wake_function);
 
 	kfree(p);
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index 2833dfa8c9ae..2cd725c0573e 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -67,7 +67,6 @@ struct shadow_per_ctx {
 };
 
 struct intel_shadow_wa_ctx {
-	struct intel_vgpu_workload *workload;
 	struct shadow_indirect_ctx indirect_ctx;
 	struct shadow_per_ctx per_ctx;
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index 41cfa5ccae84..6e3cbd8caec2 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -64,18 +64,28 @@ void populate_pvinfo_page(struct intel_vgpu *vgpu)
 	WARN_ON(sizeof(struct vgt_if) != VGT_PVINFO_SIZE);
 }
 
+#define VGPU_MAX_WEIGHT 16
+#define VGPU_WEIGHT(vgpu_num)	\
+	(VGPU_MAX_WEIGHT / (vgpu_num))
+
 static struct {
 	unsigned int low_mm;
 	unsigned int high_mm;
 	unsigned int fence;
+
+	/* A vGPU with a weight of 8 will get twice as much GPU as a vGPU
+	 * with a weight of 4 on a contended host, different vGPU type has
+	 * different weight set. Legal weights range from 1 to 16.
+	 */
+	unsigned int weight;
 	enum intel_vgpu_edid edid;
 	char *name;
 } vgpu_types[] = {
 /* Fixed vGPU type table */
-	{ MB_TO_BYTES(64), MB_TO_BYTES(512), 4, GVT_EDID_1024_768, "8" },
-	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, GVT_EDID_1920_1200, "4" },
-	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, GVT_EDID_1920_1200, "2" },
-	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, GVT_EDID_1920_1200, "1" },
+	{ MB_TO_BYTES(64), MB_TO_BYTES(384), 4, VGPU_WEIGHT(8), GVT_EDID_1024_768, "8" },
+	{ MB_TO_BYTES(128), MB_TO_BYTES(512), 4, VGPU_WEIGHT(4), GVT_EDID_1920_1200, "4" },
+	{ MB_TO_BYTES(256), MB_TO_BYTES(1024), 4, VGPU_WEIGHT(2), GVT_EDID_1920_1200, "2" },
+	{ MB_TO_BYTES(512), MB_TO_BYTES(2048), 4, VGPU_WEIGHT(1), GVT_EDID_1920_1200, "1" },
 };
 
 /**
@@ -120,6 +130,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 		gvt->types[i].low_gm_size = vgpu_types[i].low_mm;
 		gvt->types[i].high_gm_size = vgpu_types[i].high_mm;
 		gvt->types[i].fence = vgpu_types[i].fence;
+
+		if (vgpu_types[i].weight < 1 ||
+					vgpu_types[i].weight > VGPU_MAX_WEIGHT)
+			return -EINVAL;
+
+		gvt->types[i].weight = vgpu_types[i].weight;
 		gvt->types[i].resolution = vgpu_types[i].edid;
 		gvt->types[i].avail_instance = min(low_avail / vgpu_types[i].low_mm,
 						   high_avail / vgpu_types[i].high_mm);
@@ -131,11 +147,12 @@ int intel_gvt_init_vgpu_types(struct intel_gvt *gvt)
 			sprintf(gvt->types[i].name, "GVTg_V5_%s",
 						vgpu_types[i].name);
 
-		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u res %s\n",
+		gvt_dbg_core("type[%d]: %s avail %u low %u high %u fence %u weight %u res %s\n",
 			     i, gvt->types[i].name,
 			     gvt->types[i].avail_instance,
 			     gvt->types[i].low_gm_size,
 			     gvt->types[i].high_gm_size, gvt->types[i].fence,
+			     gvt->types[i].weight,
 			     vgpu_edid_str(gvt->types[i].resolution));
 	}
 
@@ -179,20 +196,34 @@ static void intel_gvt_update_vgpu_types(struct intel_gvt *gvt)
 }
 
 /**
- * intel_gvt_destroy_vgpu - destroy a virtual GPU
+ * intel_gvt_active_vgpu - activate a virtual GPU
  * @vgpu: virtual GPU
  *
- * This function is called when user wants to destroy a virtual GPU.
+ * This function is called when user wants to activate a virtual GPU.
  *
  */
-void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
+void intel_gvt_activate_vgpu(struct intel_vgpu *vgpu)
+{
+	mutex_lock(&vgpu->gvt->lock);
+	vgpu->active = true;
+	mutex_unlock(&vgpu->gvt->lock);
+}
+
+/**
+ * intel_gvt_deactive_vgpu - deactivate a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to deactivate a virtual GPU.
+ * All virtual GPU runtime information will be destroyed.
+ *
+ */
+void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt *gvt = vgpu->gvt;
 
 	mutex_lock(&gvt->lock);
 
 	vgpu->active = false;
-	idr_remove(&gvt->vgpu_idr, vgpu->id);
 
 	if (atomic_read(&vgpu->running_workload_num)) {
 		mutex_unlock(&gvt->lock);
@@ -201,6 +232,26 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	}
 
 	intel_vgpu_stop_schedule(vgpu);
+
+	mutex_unlock(&gvt->lock);
+}
+
+/**
+ * intel_gvt_destroy_vgpu - destroy a virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy a virtual GPU.
+ *
+ */
+void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+
+	mutex_lock(&gvt->lock);
+
+	WARN(vgpu->active, "vGPU is still active!\n");
+
+	idr_remove(&gvt->vgpu_idr, vgpu->id);
 	intel_vgpu_clean_sched_policy(vgpu);
 	intel_vgpu_clean_gvt_context(vgpu);
 	intel_vgpu_clean_execlist(vgpu);
@@ -216,6 +267,59 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	mutex_unlock(&gvt->lock);
 }
 
+#define IDLE_VGPU_IDR 0
+
+/**
+ * intel_gvt_create_idle_vgpu - create an idle virtual GPU
+ * @gvt: GVT device
+ *
+ * This function is called when user wants to create an idle virtual GPU.
+ *
+ * Returns:
+ * pointer to intel_vgpu, error pointer if failed.
+ */
+struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt)
+{
+	struct intel_vgpu *vgpu;
+	enum intel_engine_id i;
+	int ret;
+
+	vgpu = vzalloc(sizeof(*vgpu));
+	if (!vgpu)
+		return ERR_PTR(-ENOMEM);
+
+	vgpu->id = IDLE_VGPU_IDR;
+	vgpu->gvt = gvt;
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		INIT_LIST_HEAD(&vgpu->workload_q_head[i]);
+
+	ret = intel_vgpu_init_sched_policy(vgpu);
+	if (ret)
+		goto out_free_vgpu;
+
+	vgpu->active = false;
+
+	return vgpu;
+
+out_free_vgpu:
+	vfree(vgpu);
+	return ERR_PTR(ret);
+}
+
+/**
+ * intel_gvt_destroy_vgpu - destroy an idle virtual GPU
+ * @vgpu: virtual GPU
+ *
+ * This function is called when user wants to destroy an idle virtual GPU.
+ *
+ */
+void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu)
+{
+	intel_vgpu_clean_sched_policy(vgpu);
+	vfree(vgpu);
+}
+
 static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 		struct intel_vgpu_creation_params *param)
 {
@@ -232,13 +336,15 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 
 	mutex_lock(&gvt->lock);
 
-	ret = idr_alloc(&gvt->vgpu_idr, vgpu, 1, GVT_MAX_VGPU, GFP_KERNEL);
+	ret = idr_alloc(&gvt->vgpu_idr, vgpu, IDLE_VGPU_IDR + 1, GVT_MAX_VGPU,
+		GFP_KERNEL);
 	if (ret < 0)
 		goto out_free_vgpu;
 
 	vgpu->id = ret;
 	vgpu->handle = param->handle;
 	vgpu->gvt = gvt;
+	vgpu->sched_ctl.weight = param->weight;
 	bitmap_zero(vgpu->tlb_handle_pending, I915_NUM_ENGINES);
 
 	intel_vgpu_init_cfg_space(vgpu, param->primary);
@@ -277,7 +383,6 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_shadow_ctx;
 
-	vgpu->active = true;
 	mutex_unlock(&gvt->lock);
 
 	return vgpu;
@@ -325,6 +430,7 @@ struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	param.low_gm_sz = type->low_gm_size;
 	param.high_gm_sz = type->high_gm_size;
 	param.fence_sz = type->fence;
+	param.weight = type->weight;
 	param.resolution = type->resolution;
 
 	/* XXX current param based on MB */
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 47e707d83c4d..d689e511744e 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -1012,9 +1012,12 @@ static int gpu_state_release(struct inode *inode, struct file *file)
 
 static int i915_gpu_info_open(struct inode *inode, struct file *file)
 {
+	struct drm_i915_private *i915 = inode->i_private;
 	struct i915_gpu_state *gpu;
 
-	gpu = i915_capture_gpu_state(inode->i_private);
+	intel_runtime_pm_get(i915);
+	gpu = i915_capture_gpu_state(i915);
+	intel_runtime_pm_put(i915);
 	if (!gpu)
 		return -ENOMEM;
 
@@ -1459,16 +1462,14 @@ static int ironlake_drpc_info(struct seq_file *m)
 
 static int i915_forcewake_domains(struct seq_file *m, void *data)
 {
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct drm_i915_private *i915 = node_to_i915(m->private);
 	struct intel_uncore_forcewake_domain *fw_domain;
+	unsigned int tmp;
 
-	spin_lock_irq(&dev_priv->uncore.lock);
-	for_each_fw_domain(fw_domain, dev_priv) {
+	for_each_fw_domain(fw_domain, i915, tmp)
 		seq_printf(m, "%s.wake_count = %u\n",
 			   intel_uncore_forcewake_domain_to_str(fw_domain->id),
-			   fw_domain->wake_count);
-	}
-	spin_unlock_irq(&dev_priv->uncore.lock);
+			   READ_ONCE(fw_domain->wake_count));
 
 	return 0;
 }
@@ -1938,9 +1939,8 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 
 static void describe_ctx_ring(struct seq_file *m, struct intel_ring *ring)
 {
-	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u, last head: %d)",
-		   ring->space, ring->head, ring->tail,
-		   ring->last_retired_head);
+	seq_printf(m, " (ringbuffer, space: %d, head: %u, tail: %u)",
+		   ring->space, ring->head, ring->tail);
 }
 
 static int i915_context_status(struct seq_file *m, void *unused)
@@ -2474,9 +2474,9 @@ static void i915_guc_client_info(struct seq_file *m,
 	enum intel_engine_id id;
 	uint64_t tot = 0;
 
-	seq_printf(m, "\tPriority %d, GuC ctx index: %u, PD offset 0x%x\n",
-		client->priority, client->ctx_index, client->proc_desc_offset);
-	seq_printf(m, "\tDoorbell id %d, offset: 0x%x, cookie 0x%x\n",
+	seq_printf(m, "\tPriority %d, GuC stage index: %u, PD offset 0x%x\n",
+		client->priority, client->stage_id, client->proc_desc_offset);
+	seq_printf(m, "\tDoorbell id %d, offset: 0x%lx, cookie 0x%x\n",
 		client->doorbell_id, client->doorbell_offset, client->doorbell_cookie);
 	seq_printf(m, "\tWQ size %d, offset: 0x%x, tail %d\n",
 		client->wq_size, client->wq_offset, client->wq_tail);
@@ -2511,7 +2511,7 @@ static int i915_guc_info(struct seq_file *m, void *data)
 	}
 
 	seq_printf(m, "Doorbell map:\n");
-	seq_printf(m, "\t%*pb\n", GUC_MAX_DOORBELLS, guc->doorbell_bitmap);
+	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
 	seq_printf(m, "Doorbell next cacheline: 0x%x\n\n", guc->db_cacheline);
 
 	seq_printf(m, "GuC total action count: %llu\n", guc->action_count);
@@ -4129,7 +4129,9 @@ i915_wedged_get(void *data, u64 *val)
 static int
 i915_wedged_set(void *data, u64 val)
 {
-	struct drm_i915_private *dev_priv = data;
+	struct drm_i915_private *i915 = data;
+	struct intel_engine_cs *engine;
+	unsigned int tmp;
 
 	/*
 	 * There is no safeguard against this debugfs entry colliding
@@ -4139,13 +4141,17 @@ i915_wedged_set(void *data, u64 val)
 	 * while it is writing to 'i915_wedged'
 	 */
 
-	if (i915_reset_backoff(&dev_priv->gpu_error))
+	if (i915_reset_backoff(&i915->gpu_error))
 		return -EAGAIN;
 
-	i915_handle_error(dev_priv, val,
-			  "Manually setting wedged to %llu", val);
+	for_each_engine_masked(engine, i915, val, tmp) {
+		engine->hangcheck.seqno = intel_engine_get_seqno(engine);
+		engine->hangcheck.stalled = true;
+	}
+
+	i915_handle_error(i915, val, "Manually setting wedged to %llu", val);
 
-	wait_on_bit(&dev_priv->gpu_error.flags,
+	wait_on_bit(&i915->gpu_error.flags,
 		    I915_RESET_HANDOFF,
 		    TASK_UNINTERRUPTIBLE);
 
@@ -4173,10 +4179,6 @@ fault_irq_set(struct drm_i915_private *i915,
 	if (err)
 		goto err_unlock;
 
-	/* Retire to kick idle work */
-	i915_gem_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests);
-
 	*irq = val;
 	mutex_unlock(&i915->drm.struct_mutex);
 
@@ -4280,7 +4282,7 @@ i915_drop_caches_set(void *data, u64 val)
 			goto unlock;
 	}
 
-	if (val & (DROP_RETIRE | DROP_ACTIVE))
+	if (val & DROP_RETIRE)
 		i915_gem_retire_requests(dev_priv);
 
 	lockdep_set_current_reclaim_state(GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 98b17070a123..3036d4835b0f 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -549,6 +549,7 @@ static const struct vga_switcheroo_client_ops i915_switcheroo_ops = {
 static void i915_gem_fini(struct drm_i915_private *dev_priv)
 {
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	intel_uc_fini_hw(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_context_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -609,7 +610,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	ret = i915_gem_init(dev_priv);
 	if (ret)
-		goto cleanup_irq;
+		goto cleanup_uc;
 
 	intel_modeset_gem_init(dev);
 
@@ -631,9 +632,9 @@ cleanup_gem:
 	if (i915_gem_suspend(dev_priv))
 		DRM_ERROR("failed to idle hardware; continuing to unload!\n");
 	i915_gem_fini(dev_priv);
+cleanup_uc:
+	intel_uc_fini_fw(dev_priv);
 cleanup_irq:
-	intel_guc_fini(dev_priv);
-	intel_huc_fini(dev_priv);
 	drm_irq_uninstall(dev);
 	intel_teardown_gmbus(dev_priv);
 cleanup_csr:
@@ -1351,9 +1352,8 @@ void i915_driver_unload(struct drm_device *dev)
 	/* Flush any outstanding unpin_work. */
 	drain_workqueue(dev_priv->wq);
 
-	intel_guc_fini(dev_priv);
-	intel_huc_fini(dev_priv);
 	i915_gem_fini(dev_priv);
+	intel_uc_fini_fw(dev_priv);
 	intel_fbc_cleanup_cfb(dev_priv);
 
 	intel_power_domains_fini(dev_priv);
@@ -1469,8 +1469,6 @@ static int i915_drm_suspend(struct drm_device *dev)
 		goto out;
 	}
 
-	intel_guc_suspend(dev_priv);
-
 	intel_display_suspend(dev);
 
 	intel_dp_mst_suspend(dev);
@@ -2177,6 +2175,20 @@ static void vlv_restore_gunit_s0ix_state(struct drm_i915_private *dev_priv)
 	I915_WRITE(VLV_GUNIT_CLOCK_GATE2,	s->clock_gate_dis2);
 }
 
+static int vlv_wait_for_pw_status(struct drm_i915_private *dev_priv,
+				  u32 mask, u32 val)
+{
+	/* The HW does not like us polling for PW_STATUS frequently, so
+	 * use the sleeping loop rather than risk the busy spin within
+	 * intel_wait_for_register().
+	 *
+	 * Transitioning between RC6 states should be at most 2ms (see
+	 * valleyview_enable_rps) so use a 3ms timeout.
+	 */
+	return wait_for((I915_READ_NOTRACE(VLV_GTLC_PW_STATUS) & mask) == val,
+			3);
+}
+
 int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
 {
 	u32 val;
@@ -2205,8 +2217,9 @@ int vlv_force_gfx_clock(struct drm_i915_private *dev_priv, bool force_on)
 
 static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
 {
+	u32 mask;
 	u32 val;
-	int err = 0;
+	int err;
 
 	val = I915_READ(VLV_GTLC_WAKE_CTRL);
 	val &= ~VLV_GTLC_ALLOWWAKEREQ;
@@ -2215,45 +2228,32 @@ static int vlv_allow_gt_wake(struct drm_i915_private *dev_priv, bool allow)
 	I915_WRITE(VLV_GTLC_WAKE_CTRL, val);
 	POSTING_READ(VLV_GTLC_WAKE_CTRL);
 
-	err = intel_wait_for_register(dev_priv,
-				      VLV_GTLC_PW_STATUS,
-				      VLV_GTLC_ALLOWWAKEACK,
-				      allow,
-				      1);
+	mask = VLV_GTLC_ALLOWWAKEACK;
+	val = allow ? mask : 0;
+
+	err = vlv_wait_for_pw_status(dev_priv, mask, val);
 	if (err)
 		DRM_ERROR("timeout disabling GT waking\n");
 
 	return err;
 }
 
-static int vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
-				 bool wait_for_on)
+static void vlv_wait_for_gt_wells(struct drm_i915_private *dev_priv,
+				  bool wait_for_on)
 {
 	u32 mask;
 	u32 val;
-	int err;
 
 	mask = VLV_GTLC_PW_MEDIA_STATUS_MASK | VLV_GTLC_PW_RENDER_STATUS_MASK;
 	val = wait_for_on ? mask : 0;
-	if ((I915_READ(VLV_GTLC_PW_STATUS) & mask) == val)
-		return 0;
-
-	DRM_DEBUG_KMS("waiting for GT wells to go %s (%08x)\n",
-		      onoff(wait_for_on),
-		      I915_READ(VLV_GTLC_PW_STATUS));
 
 	/*
 	 * RC6 transitioning can be delayed up to 2 msec (see
 	 * valleyview_enable_rps), use 3 msec for safety.
 	 */
-	err = intel_wait_for_register(dev_priv,
-				      VLV_GTLC_PW_STATUS, mask, val,
-				      3);
-	if (err)
+	if (vlv_wait_for_pw_status(dev_priv, mask, val))
 		DRM_ERROR("timeout waiting for GT wells to go %s\n",
 			  onoff(wait_for_on));
-
-	return err;
 }
 
 static void vlv_check_no_gt_access(struct drm_i915_private *dev_priv)
@@ -2274,7 +2274,7 @@ static int vlv_suspend_complete(struct drm_i915_private *dev_priv)
 	 * Bspec defines the following GT well on flags as debug only, so
 	 * don't treat them as hard failures.
 	 */
-	(void)vlv_wait_for_gt_wells(dev_priv, false);
+	vlv_wait_for_gt_wells(dev_priv, false);
 
 	mask = VLV_GTLC_RENDER_CTX_EXISTS | VLV_GTLC_MEDIA_CTX_EXISTS;
 	WARN_ON((I915_READ(VLV_GTLC_WAKE_CTRL) & mask) != mask);
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index a5947a496d0a..c9b0949f6c1a 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -79,26 +79,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20170320"
-#define DRIVER_TIMESTAMP	1489994464
-
-#undef WARN_ON
-/* Many gcc seem to no see through this and fall over :( */
-#if 0
-#define WARN_ON(x) ({ \
-	bool __i915_warn_cond = (x); \
-	if (__builtin_constant_p(__i915_warn_cond)) \
-		BUILD_BUG_ON(__i915_warn_cond); \
-	WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
-#else
-#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
-#endif
-
-#undef WARN_ON_ONCE
-#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
-
-#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
-			     (long) (x), __func__);
+#define DRIVER_DATE		"20170403"
+#define DRIVER_TIMESTAMP	1491198738
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -703,9 +685,9 @@ enum forcewake_domain_id {
 };
 
 enum forcewake_domains {
-	FORCEWAKE_RENDER = (1 << FW_DOMAIN_ID_RENDER),
-	FORCEWAKE_BLITTER = (1 << FW_DOMAIN_ID_BLITTER),
-	FORCEWAKE_MEDIA	= (1 << FW_DOMAIN_ID_MEDIA),
+	FORCEWAKE_RENDER = BIT(FW_DOMAIN_ID_RENDER),
+	FORCEWAKE_BLITTER = BIT(FW_DOMAIN_ID_BLITTER),
+	FORCEWAKE_MEDIA	= BIT(FW_DOMAIN_ID_MEDIA),
 	FORCEWAKE_ALL = (FORCEWAKE_RENDER |
 			 FORCEWAKE_BLITTER |
 			 FORCEWAKE_MEDIA)
@@ -732,21 +714,25 @@ intel_uncore_forcewake_for_reg(struct drm_i915_private *dev_priv,
 
 struct intel_uncore_funcs {
 	void (*force_wake_get)(struct drm_i915_private *dev_priv,
-							enum forcewake_domains domains);
+			       enum forcewake_domains domains);
 	void (*force_wake_put)(struct drm_i915_private *dev_priv,
-							enum forcewake_domains domains);
-
-	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv, i915_reg_t r, bool trace);
-
-	void (*mmio_writeb)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint8_t val, bool trace);
-	void (*mmio_writew)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint16_t val, bool trace);
-	void (*mmio_writel)(struct drm_i915_private *dev_priv, i915_reg_t r,
-				uint32_t val, bool trace);
+			       enum forcewake_domains domains);
+
+	uint8_t  (*mmio_readb)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint16_t (*mmio_readw)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint32_t (*mmio_readl)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+	uint64_t (*mmio_readq)(struct drm_i915_private *dev_priv,
+			       i915_reg_t r, bool trace);
+
+	void (*mmio_writeb)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint8_t val, bool trace);
+	void (*mmio_writew)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint16_t val, bool trace);
+	void (*mmio_writel)(struct drm_i915_private *dev_priv,
+			    i915_reg_t r, uint32_t val, bool trace);
 };
 
 struct intel_forcewake_range {
@@ -770,32 +756,35 @@ struct intel_uncore {
 	enum forcewake_domains fw_domains;
 	enum forcewake_domains fw_domains_active;
 
+	u32 fw_set;
+	u32 fw_clear;
+	u32 fw_reset;
+
 	struct intel_uncore_forcewake_domain {
-		struct drm_i915_private *i915;
 		enum forcewake_domain_id id;
 		enum forcewake_domains mask;
 		unsigned wake_count;
 		struct hrtimer timer;
 		i915_reg_t reg_set;
-		u32 val_set;
-		u32 val_clear;
 		i915_reg_t reg_ack;
-		i915_reg_t reg_post;
-		u32 val_reset;
 	} fw_domain[FW_DOMAIN_ID_COUNT];
 
 	int unclaimed_mmio_check;
 };
 
+#define __mask_next_bit(mask) ({					\
+	int __idx = ffs(mask) - 1;					\
+	mask &= ~BIT(__idx);						\
+	__idx;								\
+})
+
 /* Iterate over initialised fw domains */
-#define for_each_fw_domain_masked(domain__, mask__, dev_priv__) \
-	for ((domain__) = &(dev_priv__)->uncore.fw_domain[0]; \
-	     (domain__) < &(dev_priv__)->uncore.fw_domain[FW_DOMAIN_ID_COUNT]; \
-	     (domain__)++) \
-		for_each_if ((mask__) & (domain__)->mask)
+#define for_each_fw_domain_masked(domain__, mask__, dev_priv__, tmp__) \
+	for (tmp__ = (mask__); \
+	     tmp__ ? (domain__ = &(dev_priv__)->uncore.fw_domain[__mask_next_bit(tmp__)]), 1 : 0;)
 
-#define for_each_fw_domain(domain__, dev_priv__) \
-	for_each_fw_domain_masked(domain__, FORCEWAKE_ALL, dev_priv__)
+#define for_each_fw_domain(domain__, dev_priv__, tmp__) \
+	for_each_fw_domain_masked(domain__, (dev_priv__)->uncore.fw_domains, dev_priv__, tmp__)
 
 #define CSR_VERSION(major, minor)	((major) << 16 | (minor))
 #define CSR_VERSION_MAJOR(version)	((version) >> 16)
@@ -846,6 +835,7 @@ struct intel_csr {
 	func(has_resource_streamer); \
 	func(has_runtime_pm); \
 	func(has_snoop); \
+	func(unfenced_needs_alignment); \
 	func(cursor_needs_physical); \
 	func(hws_needs_physical); \
 	func(overlay_needs_physical); \
@@ -2578,12 +2568,6 @@ static inline struct drm_i915_private *huc_to_i915(struct intel_huc *huc)
 	     (id__)++) \
 		for_each_if ((engine__) = (dev_priv__)->engine[(id__)])
 
-#define __mask_next_bit(mask) ({					\
-	int __idx = ffs(mask) - 1;					\
-	mask &= ~BIT(__idx);						\
-	__idx;								\
-})
-
 /* Iterator over subset of engines selected by mask */
 #define for_each_engine_masked(engine__, dev_priv__, mask__, tmp__) \
 	for (tmp__ = mask__ & INTEL_INFO(dev_priv__)->ring_mask;	\
@@ -3956,14 +3940,14 @@ u64 intel_rc6_residency_us(struct drm_i915_private *dev_priv,
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 #define __raw_read(x, s) \
-static inline uint##x##_t __raw_i915_read##x(struct drm_i915_private *dev_priv, \
+static inline uint##x##_t __raw_i915_read##x(const struct drm_i915_private *dev_priv, \
 					     i915_reg_t reg) \
 { \
 	return read##s(dev_priv->regs + i915_mmio_reg_offset(reg)); \
 }
 
 #define __raw_write(x, s) \
-static inline void __raw_i915_write##x(struct drm_i915_private *dev_priv, \
+static inline void __raw_i915_write##x(const struct drm_i915_private *dev_priv, \
 				       i915_reg_t reg, uint##x##_t val) \
 { \
 	write##s(val, dev_priv->regs + i915_mmio_reg_offset(reg)); \
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 58e1db77d70e..532a577ff7a1 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2321,7 +2321,7 @@ rebuild_st:
 	st->nents = 0;
 	for (i = 0; i < page_count; i++) {
 		page = shmem_read_mapping_page_gfp(mapping, i, gfp);
-		if (IS_ERR(page)) {
+		if (unlikely(IS_ERR(page))) {
 			i915_gem_shrink(dev_priv,
 					page_count,
 					I915_SHRINK_BOUND |
@@ -2329,12 +2329,21 @@ rebuild_st:
 					I915_SHRINK_PURGEABLE);
 			page = shmem_read_mapping_page_gfp(mapping, i, gfp);
 		}
-		if (IS_ERR(page)) {
+		if (unlikely(IS_ERR(page))) {
+			gfp_t reclaim;
+
 			/* We've tried hard to allocate the memory by reaping
 			 * our own buffer, now let the real VM do its job and
 			 * go down in flames if truly OOM.
+			 *
+			 * However, since graphics tend to be disposable,
+			 * defer the oom here by reporting the ENOMEM back
+			 * to userspace.
 			 */
-			page = shmem_read_mapping_page(mapping, i);
+			reclaim = mapping_gfp_mask(mapping);
+			reclaim |= __GFP_NORETRY; /* reclaim, but no oom */
+
+			page = shmem_read_mapping_page_gfp(mapping, i, reclaim);
 			if (IS_ERR(page)) {
 				ret = PTR_ERR(page);
 				goto err_sg;
@@ -2989,10 +2998,15 @@ void i915_gem_set_wedged(struct drm_i915_private *dev_priv)
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 	set_bit(I915_WEDGED, &dev_priv->gpu_error.flags);
 
+	/* Retire completed requests first so the list of inflight/incomplete
+	 * requests is accurate and we don't try and mark successful requests
+	 * as in error during __i915_gem_set_wedged_BKL().
+	 */
+	i915_gem_retire_requests(dev_priv);
+
 	stop_machine(__i915_gem_set_wedged_BKL, dev_priv, NULL);
 
 	i915_gem_context_lost(dev_priv);
-	i915_gem_retire_requests(dev_priv);
 
 	mod_delayed_work(dev_priv->wq, &dev_priv->gt.idle_work, 0);
 }
@@ -3098,9 +3112,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	 * Wait for last execlists context complete, but bail out in case a
 	 * new request is submitted.
 	 */
-	wait_for(READ_ONCE(dev_priv->gt.active_requests) ||
-		 intel_engines_are_idle(dev_priv),
-		 10);
+	wait_for(intel_engines_are_idle(dev_priv), 10);
 	if (READ_ONCE(dev_priv->gt.active_requests))
 		return;
 
@@ -3259,6 +3271,29 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
 	return 0;
 }
 
+static int wait_for_engine(struct intel_engine_cs *engine, int timeout_ms)
+{
+	return wait_for(intel_engine_is_idle(engine), timeout_ms);
+}
+
+static int wait_for_engines(struct drm_i915_private *i915)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, i915, id) {
+		if (GEM_WARN_ON(wait_for_engine(engine, 50))) {
+			i915_gem_set_wedged(i915);
+			return -EIO;
+		}
+
+		GEM_BUG_ON(intel_engine_get_seqno(engine) !=
+			   intel_engine_last_submit(engine));
+	}
+
+	return 0;
+}
+
 int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 {
 	int ret;
@@ -3273,13 +3308,16 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 			if (ret)
 				return ret;
 		}
+
+		i915_gem_retire_requests(i915);
+		GEM_BUG_ON(i915->gt.active_requests);
+
+		ret = wait_for_engines(i915);
 	} else {
 		ret = wait_for_timeline(&i915->gt.global_timeline, flags);
-		if (ret)
-			return ret;
 	}
 
-	return 0;
+	return ret;
 }
 
 /** Flushes the GTT write domain for the object if it's dirty. */
@@ -3307,8 +3345,14 @@ i915_gem_object_flush_gtt_write_domain(struct drm_i915_gem_object *obj)
 	 * system agents we cannot reproduce this behaviour).
 	 */
 	wmb();
-	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv))
-		POSTING_READ(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+	if (INTEL_GEN(dev_priv) >= 6 && !HAS_LLC(dev_priv)) {
+		if (intel_runtime_pm_get_if_in_use(dev_priv)) {
+			spin_lock_irq(&dev_priv->uncore.lock);
+			POSTING_READ_FW(RING_ACTHD(dev_priv->engine[RCS]->mmio_base));
+			spin_unlock_irq(&dev_priv->uncore.lock);
+			intel_runtime_pm_put(dev_priv);
+		}
+	}
 
 	intel_fb_obj_flush(obj, write_origin(obj, I915_GEM_DOMAIN_GTT));
 
@@ -4408,13 +4452,12 @@ int i915_gem_suspend(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_unlock;
 
-	i915_gem_retire_requests(dev_priv);
-	GEM_BUG_ON(dev_priv->gt.active_requests);
-
 	assert_kernel_context_is_current(dev_priv);
 	i915_gem_context_lost(dev_priv);
 	mutex_unlock(&dev->struct_mutex);
 
+	intel_guc_suspend(dev_priv);
+
 	cancel_delayed_work_sync(&dev_priv->gpu_error.hangcheck_work);
 	cancel_delayed_work_sync(&dev_priv->gt.retire_work);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index d925fb582ba7..ffd01e02fe94 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -168,7 +168,7 @@ void i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 
 		i915_sw_fence_await_reservation(&clflush->wait,
 						obj->resv, NULL,
-						false, I915_FENCE_TIMEOUT,
+						true, I915_FENCE_TIMEOUT,
 						GFP_KERNEL);
 
 		reservation_object_lock(obj->resv, NULL);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 486051ed681d..8bd0c4966913 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -576,25 +576,25 @@ void i915_gem_context_close(struct drm_device *dev, struct drm_file *file)
 }
 
 static inline int
-mi_set_context(struct drm_i915_gem_request *req, u32 hw_flags)
+mi_set_context(struct drm_i915_gem_request *req, u32 flags)
 {
 	struct drm_i915_private *dev_priv = req->i915;
 	struct intel_engine_cs *engine = req->engine;
 	enum intel_engine_id id;
-	u32 *cs, flags = hw_flags | MI_MM_SPACE_GTT;
 	const int num_rings =
-		/* Use an extended w/a on ivb+ if signalling from other rings */
-		i915.semaphores ?
+		/* Use an extended w/a on gen7 if signalling from other rings */
+		(i915.semaphores && INTEL_GEN(dev_priv) == 7) ?
 		INTEL_INFO(dev_priv)->num_rings - 1 :
 		0;
 	int len;
+	u32 *cs;
 
-	/* These flags are for resource streamer on HSW+ */
+	flags |= MI_MM_SPACE_GTT;
 	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
-		flags |= (HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN);
-	else if (INTEL_GEN(dev_priv) < 8)
-		flags |= (MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN);
-
+		/* These flags are for resource streamer on HSW+ */
+		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
+	else
+		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
 
 	len = 4;
 	if (INTEL_GEN(dev_priv) >= 7)
diff --git a/drivers/gpu/drm/i915/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
index 11898cd97596..f225bf680b6d 100644
--- a/drivers/gpu/drm/i915/i915_gem_dmabuf.c
+++ b/drivers/gpu/drm/i915/i915_gem_dmabuf.c
@@ -200,10 +200,10 @@ static const struct dma_buf_ops i915_dmabuf_ops =  {
 	.map_dma_buf = i915_gem_map_dma_buf,
 	.unmap_dma_buf = i915_gem_unmap_dma_buf,
 	.release = drm_gem_dmabuf_release,
-	.kmap = i915_gem_dmabuf_kmap,
-	.kmap_atomic = i915_gem_dmabuf_kmap_atomic,
-	.kunmap = i915_gem_dmabuf_kunmap,
-	.kunmap_atomic = i915_gem_dmabuf_kunmap_atomic,
+	.map = i915_gem_dmabuf_kmap,
+	.map_atomic = i915_gem_dmabuf_kmap_atomic,
+	.unmap = i915_gem_dmabuf_kunmap,
+	.unmap_atomic = i915_gem_dmabuf_kunmap_atomic,
 	.mmap = i915_gem_dmabuf_mmap,
 	.vmap = i915_gem_dmabuf_vmap,
 	.vunmap = i915_gem_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/i915/i915_gem_evict.c b/drivers/gpu/drm/i915/i915_gem_evict.c
index 2da3a94fc9f3..51e365f70464 100644
--- a/drivers/gpu/drm/i915/i915_gem_evict.c
+++ b/drivers/gpu/drm/i915/i915_gem_evict.c
@@ -196,7 +196,6 @@ search_again:
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests(dev_priv);
 	goto search_again;
 
 found:
@@ -383,7 +382,6 @@ int i915_gem_evict_vm(struct i915_address_space *vm, bool do_idle)
 		if (ret)
 			return ret;
 
-		i915_gem_retire_requests(dev_priv);
 		WARN_ON(!list_empty(&vm->active_list));
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index dd7181ed5eca..a3e59c8ef27b 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -890,6 +890,7 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 	struct list_head ordered_vmas;
 	struct list_head pinned_vmas;
 	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
+	bool needs_unfenced_map = INTEL_INFO(engine->i915)->unfenced_needs_alignment;
 	int retry;
 
 	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;
@@ -910,7 +911,8 @@ i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
 		if (!has_fenced_gpu_access)
 			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
 		need_fence =
-			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+			(entry->flags & EXEC_OBJECT_NEEDS_FENCE ||
+			 needs_unfenced_map) &&
 			i915_gem_object_is_tiled(obj);
 		need_mappable = need_fence || need_reloc_mappable(vma);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index cee9c4fec52a..8bab4aea63e6 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2364,7 +2364,7 @@ void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 
 	if (unlikely(ggtt->do_idle_maps)) {
-		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
+		if (i915_gem_wait_for_idle(dev_priv, 0)) {
 			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
 			/* Wait a bit, in hopes it avoids the hang */
 			udelay(10);
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index 0e8d1010cecb..5ddbc9499775 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -37,6 +37,17 @@ static const char *i915_fence_get_driver_name(struct dma_fence *fence)
 
 static const char *i915_fence_get_timeline_name(struct dma_fence *fence)
 {
+	/* The timeline struct (as part of the ppgtt underneath a context)
+	 * may be freed when the request is no longer in use by the GPU.
+	 * We could extend the life of a context to beyond that of all
+	 * fences, possibly keeping the hw resource around indefinitely,
+	 * or we just give them a false name. Since
+	 * dma_fence_ops.get_timeline_name is a debug feature, the occasional
+	 * lie seems justifiable.
+	 */
+	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
+		return "signaled";
+
 	return to_request(fence)->timeline->common->name;
 }
 
@@ -180,7 +191,6 @@ i915_priotree_init(struct i915_priotree *pt)
 
 static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 {
-	struct i915_gem_timeline *timeline = &i915->gt.global_timeline;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 	int ret;
@@ -192,15 +202,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests(i915);
-	GEM_BUG_ON(i915->gt.active_requests > 1);
-
 	/* If the seqno wraps around, we need to clear the breadcrumb rbtree */
 	for_each_engine(engine, i915, id) {
-		struct intel_timeline *tl = &timeline->engine[id];
-
-		if (wait_for(intel_engine_is_idle(engine), 50))
-			return -EBUSY;
+		struct i915_gem_timeline *timeline;
+		struct intel_timeline *tl = engine->timeline;
 
 		if (!i915_seqno_passed(seqno, tl->seqno)) {
 			/* spin until threads are complete */
@@ -211,14 +216,10 @@ static int reset_all_global_seqno(struct drm_i915_private *i915, u32 seqno)
 		/* Finally reset hw state */
 		tl->seqno = seqno;
 		intel_engine_init_global_seqno(engine, seqno);
-	}
-
-	list_for_each_entry(timeline, &i915->gt.timelines, link) {
-		for_each_engine(engine, i915, id) {
-			struct intel_timeline *tl = &timeline->engine[id];
 
-			memset(tl->sync_seqno, 0, sizeof(tl->sync_seqno));
-		}
+		list_for_each_entry(timeline, &i915->gt.timelines, link)
+			memset(timeline->engine[id].sync_seqno, 0,
+			       sizeof(timeline->engine[id].sync_seqno));
 	}
 
 	return 0;
@@ -295,7 +296,7 @@ static void i915_gem_request_retire(struct drm_i915_gem_request *request)
 	 * completion order.
 	 */
 	list_del(&request->ring_link);
-	request->ring->last_retired_head = request->postfix;
+	request->ring->head = request->postfix;
 	if (!--request->i915->gt.active_requests) {
 		GEM_BUG_ON(!request->i915->gt.awake);
 		mod_delayed_work(request->i915->wq,
@@ -651,6 +652,9 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 
 	GEM_BUG_ON(to == from);
 
+	if (i915_gem_request_completed(from))
+		return 0;
+
 	if (to->engine->schedule) {
 		ret = i915_priotree_add_dependency(to->i915,
 						   &to->priotree,
diff --git a/drivers/gpu/drm/i915/i915_gem_shrinker.c b/drivers/gpu/drm/i915/i915_gem_shrinker.c
index 2978acdd995e..129ed303a6c4 100644
--- a/drivers/gpu/drm/i915/i915_gem_shrinker.c
+++ b/drivers/gpu/drm/i915/i915_gem_shrinker.c
@@ -53,6 +53,17 @@ static bool i915_gem_shrinker_lock(struct drm_device *dev, bool *unlock)
 	BUG();
 }
 
+static void i915_gem_shrinker_unlock(struct drm_device *dev, bool unlock)
+{
+	if (!unlock)
+		return;
+
+	mutex_unlock(&dev->struct_mutex);
+
+	/* expedite the RCU grace period to free some request slabs */
+	synchronize_rcu_expedited();
+}
+
 static bool any_vma_pinned(struct drm_i915_gem_object *obj)
 {
 	struct i915_vma *vma;
@@ -232,11 +243,8 @@ i915_gem_shrink(struct drm_i915_private *dev_priv,
 		intel_runtime_pm_put(dev_priv);
 
 	i915_gem_retire_requests(dev_priv);
-	if (unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	/* expedite the RCU grace period to free some request slabs */
-	synchronize_rcu_expedited();
+	i915_gem_shrinker_unlock(&dev_priv->drm, unlock);
 
 	return count;
 }
@@ -296,8 +304,7 @@ i915_gem_shrinker_count(struct shrinker *shrinker, struct shrink_control *sc)
 			count += obj->base.size >> PAGE_SHIFT;
 	}
 
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return count;
 }
@@ -324,8 +331,8 @@ i915_gem_shrinker_scan(struct shrinker *shrinker, struct shrink_control *sc)
 					 sc->nr_to_scan - freed,
 					 I915_SHRINK_BOUND |
 					 I915_SHRINK_UNBOUND);
-	if (unlock)
-		mutex_unlock(&dev->struct_mutex);
+
+	i915_gem_shrinker_unlock(dev, unlock);
 
 	return freed;
 }
@@ -367,8 +374,7 @@ i915_gem_shrinker_unlock_uninterruptible(struct drm_i915_private *dev_priv,
 					 struct shrinker_lock_uninterruptible *slu)
 {
 	dev_priv->mm.interruptible = slu->was_interruptible;
-	if (slu->unlock)
-		mutex_unlock(&dev_priv->drm.struct_mutex);
+	i915_gem_shrinker_unlock(&dev_priv->drm, slu->unlock);
 }
 
 static int
diff --git a/drivers/gpu/drm/i915/i915_guc_submission.c b/drivers/gpu/drm/i915/i915_guc_submission.c
index 832ac9e45801..1642fff9cf13 100644
--- a/drivers/gpu/drm/i915/i915_guc_submission.c
+++ b/drivers/gpu/drm/i915/i915_guc_submission.c
@@ -30,16 +30,25 @@
 /**
  * DOC: GuC-based command submission
  *
- * i915_guc_client:
- * We use the term client to avoid confusion with contexts. A i915_guc_client is
- * equivalent to GuC object guc_context_desc. This context descriptor is
- * allocated from a pool of 1024 entries. Kernel driver will allocate doorbell
- * and workqueue for it. Also the process descriptor (guc_process_desc), which
- * is mapped to client space. So the client can write Work Item then ring the
- * doorbell.
+ * GuC client:
+ * A i915_guc_client refers to a submission path through GuC. Currently, there
+ * is only one of these (the execbuf_client) and this one is charged with all
+ * submissions to the GuC. This struct is the owner of a doorbell, a process
+ * descriptor and a workqueue (all of them inside a single gem object that
+ * contains all required pages for these elements).
  *
- * To simplify the implementation, we allocate one gem object that contains all
- * pages for doorbell, process descriptor and workqueue.
+ * GuC stage descriptor:
+ * During initialization, the driver allocates a static pool of 1024 such
+ * descriptors, and shares them with the GuC.
+ * Currently, there exists a 1:1 mapping between a i915_guc_client and a
+ * guc_stage_desc (via the client's stage_id), so effectively only one
+ * gets used. This stage descriptor lets the GuC know about the doorbell,
+ * workqueue and process descriptor. Theoretically, it also lets the GuC
+ * know about our HW contexts (context ID, etc...), but we actually
+ * employ a kind of submission where the GuC uses the LRCA sent via the work
+ * item instead (the single guc_stage_desc associated to execbuf client
+ * contains information about the default kernel context only, but this is
+ * essentially unused). This is called a "proxy" submission.
  *
  * The Scratch registers:
  * There are 16 MMIO-based registers start from 0xC180. The kernel driver writes
@@ -62,34 +71,91 @@
  * ELSP context descriptor dword into Work Item.
  * See guc_wq_item_append()
  *
+ * ADS:
+ * The Additional Data Struct (ADS) has pointers for different buffers used by
+ * the GuC. One single gem object contains the ADS struct itself (guc_ads), the
+ * scheduling policies (guc_policies), a structure describing a collection of
+ * register sets (guc_mmio_reg_state) and some extra pages for the GuC to save
+ * its internal state for sleep.
+ *
  */
 
+static inline bool is_high_priority(struct i915_guc_client* client)
+{
+	return client->priority <= GUC_CLIENT_PRIORITY_HIGH;
+}
+
+static int __reserve_doorbell(struct i915_guc_client *client)
+{
+	unsigned long offset;
+	unsigned long end;
+	u16 id;
+
+	GEM_BUG_ON(client->doorbell_id != GUC_DOORBELL_INVALID);
+
+	/*
+	 * The bitmap tracks which doorbell registers are currently in use.
+	 * It is split into two halves; the first half is used for normal
+	 * priority contexts, the second half for high-priority ones.
+	 */
+	offset = 0;
+	end = GUC_NUM_DOORBELLS/2;
+	if (is_high_priority(client)) {
+		offset = end;
+		end += offset;
+	}
+
+	id = find_next_zero_bit(client->guc->doorbell_bitmap, offset, end);
+	if (id == end)
+		return -ENOSPC;
+
+	__set_bit(id, client->guc->doorbell_bitmap);
+	client->doorbell_id = id;
+	DRM_DEBUG_DRIVER("client %u (high prio=%s) reserved doorbell: %d\n",
+			 client->stage_id, yesno(is_high_priority(client)),
+			 id);
+	return 0;
+}
+
+static void __unreserve_doorbell(struct i915_guc_client *client)
+{
+	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
+
+	__clear_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+	client->doorbell_id = GUC_DOORBELL_INVALID;
+}
+
 /*
  * Tell the GuC to allocate or deallocate a specific doorbell
  */
 
-static int guc_allocate_doorbell(struct intel_guc *guc,
-				 struct i915_guc_client *client)
+static int __guc_allocate_doorbell(struct intel_guc *guc, u32 stage_id)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_ALLOCATE_DOORBELL,
-		client->ctx_index
+		stage_id
 	};
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-static int guc_release_doorbell(struct intel_guc *guc,
-				struct i915_guc_client *client)
+static int __guc_deallocate_doorbell(struct intel_guc *guc, u32 stage_id)
 {
 	u32 action[] = {
 		INTEL_GUC_ACTION_DEALLOCATE_DOORBELL,
-		client->ctx_index
+		stage_id
 	};
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
+static struct guc_stage_desc *__get_stage_desc(struct i915_guc_client *client)
+{
+	struct guc_stage_desc *base = client->guc->stage_desc_pool_vaddr;
+
+	return &base[client->stage_id];
+}
+
 /*
  * Initialise, update, or clear doorbell data shared with the GuC
  *
@@ -97,107 +163,129 @@ static int guc_release_doorbell(struct intel_guc *guc,
  * client object which contains the page being used for the doorbell
  */
 
-static int guc_update_doorbell_id(struct intel_guc *guc,
-				  struct i915_guc_client *client,
-				  u16 new_id)
+static void __update_doorbell_desc(struct i915_guc_client *client, u16 new_id)
 {
-	struct sg_table *sg = guc->ctx_pool_vma->pages;
-	void *doorbell_bitmap = guc->doorbell_bitmap;
-	struct guc_doorbell_info *doorbell;
-	struct guc_context_desc desc;
-	size_t len;
+	struct guc_stage_desc *desc;
 
-	doorbell = client->vaddr + client->doorbell_offset;
+	/* Update the GuC's idea of the doorbell ID */
+	desc = __get_stage_desc(client);
+	desc->db_id = new_id;
+}
 
-	if (client->doorbell_id != GUC_INVALID_DOORBELL_ID &&
-	    test_bit(client->doorbell_id, doorbell_bitmap)) {
-		/* Deactivate the old doorbell */
-		doorbell->db_status = GUC_DOORBELL_DISABLED;
-		(void)guc_release_doorbell(guc, client);
-		__clear_bit(client->doorbell_id, doorbell_bitmap);
-	}
+static struct guc_doorbell_info *__get_doorbell(struct i915_guc_client *client)
+{
+	return client->vaddr + client->doorbell_offset;
+}
 
-	/* Update the GuC's idea of the doorbell ID */
-	len = sg_pcopy_to_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
-	if (len != sizeof(desc))
-		return -EFAULT;
-	desc.db_id = new_id;
-	len = sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
-	if (len != sizeof(desc))
-		return -EFAULT;
-
-	client->doorbell_id = new_id;
-	if (new_id == GUC_INVALID_DOORBELL_ID)
-		return 0;
+static bool has_doorbell(struct i915_guc_client *client)
+{
+	if (client->doorbell_id == GUC_DOORBELL_INVALID)
+		return false;
 
-	/* Activate the new doorbell */
-	__set_bit(new_id, doorbell_bitmap);
+	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
+}
+
+static int __create_doorbell(struct i915_guc_client *client)
+{
+	struct guc_doorbell_info *doorbell;
+	int err;
+
+	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_ENABLED;
 	doorbell->cookie = client->doorbell_cookie;
-	return guc_allocate_doorbell(guc, client);
+
+	err = __guc_allocate_doorbell(client->guc, client->stage_id);
+	if (err) {
+		doorbell->db_status = GUC_DOORBELL_DISABLED;
+		doorbell->cookie = 0;
+	}
+	return err;
 }
 
-static void guc_disable_doorbell(struct intel_guc *guc,
-				 struct i915_guc_client *client)
+static int __destroy_doorbell(struct i915_guc_client *client)
 {
-	(void)guc_update_doorbell_id(guc, client, GUC_INVALID_DOORBELL_ID);
+	struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
+	struct guc_doorbell_info *doorbell;
+	u16 db_id = client->doorbell_id;
 
-	/* XXX: wait for any interrupts */
-	/* XXX: wait for workqueue to drain */
+	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
+
+	doorbell = __get_doorbell(client);
+	doorbell->db_status = GUC_DOORBELL_DISABLED;
+	doorbell->cookie = 0;
+
+	/* Doorbell release flow requires that we wait for GEN8_DRB_VALID bit
+	 * to go to zero after updating db_status before we call the GuC to
+	 * release the doorbell */
+	if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10))
+		WARN_ONCE(true, "Doorbell never became invalid after disable\n");
+
+	return __guc_deallocate_doorbell(client->guc, client->stage_id);
 }
 
-static uint16_t
-select_doorbell_register(struct intel_guc *guc, uint32_t priority)
+static int create_doorbell(struct i915_guc_client *client)
 {
-	/*
-	 * The bitmap tracks which doorbell registers are currently in use.
-	 * It is split into two halves; the first half is used for normal
-	 * priority contexts, the second half for high-priority ones.
-	 * Note that logically higher priorities are numerically less than
-	 * normal ones, so the test below means "is it high-priority?"
-	 */
-	const bool hi_pri = (priority <= GUC_CTX_PRIORITY_HIGH);
-	const uint16_t half = GUC_MAX_DOORBELLS / 2;
-	const uint16_t start = hi_pri ? half : 0;
-	const uint16_t end = start + half;
-	uint16_t id;
+	int ret;
 
-	id = find_next_zero_bit(guc->doorbell_bitmap, end, start);
-	if (id == end)
-		id = GUC_INVALID_DOORBELL_ID;
+	ret = __reserve_doorbell(client);
+	if (ret)
+		return ret;
 
-	DRM_DEBUG_DRIVER("assigned %s priority doorbell id 0x%x\n",
-			hi_pri ? "high" : "normal", id);
+	__update_doorbell_desc(client, client->doorbell_id);
+
+	ret = __create_doorbell(client);
+	if (ret)
+		goto err;
+
+	return 0;
 
-	return id;
+err:
+	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
+	__unreserve_doorbell(client);
+	return ret;
 }
 
-/*
- * Select, assign and relase doorbell cachelines
- *
- * These functions track which doorbell cachelines are in use.
- * The data they manipulate is protected by the intel_guc_send lock.
- */
+static int destroy_doorbell(struct i915_guc_client *client)
+{
+	int err;
+
+	GEM_BUG_ON(!has_doorbell(client));
+
+	/* XXX: wait for any interrupts */
+	/* XXX: wait for workqueue to drain */
+
+	err = __destroy_doorbell(client);
+	if (err)
+		return err;
+
+	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
+
+	__unreserve_doorbell(client);
 
-static uint32_t select_doorbell_cacheline(struct intel_guc *guc)
+	return 0;
+}
+
+static unsigned long __select_cacheline(struct intel_guc* guc)
 {
-	const uint32_t cacheline_size = cache_line_size();
-	uint32_t offset;
+	unsigned long offset;
 
 	/* Doorbell uses a single cache line within a page */
 	offset = offset_in_page(guc->db_cacheline);
 
 	/* Moving to next cache line to reduce contention */
-	guc->db_cacheline += cacheline_size;
-
-	DRM_DEBUG_DRIVER("selected doorbell cacheline 0x%x, next 0x%x, linesize %u\n",
-			offset, guc->db_cacheline, cacheline_size);
+	guc->db_cacheline += cache_line_size();
 
+	DRM_DEBUG_DRIVER("reserved cacheline 0x%lx, next 0x%x, linesize %u\n",
+			offset, guc->db_cacheline, cache_line_size());
 	return offset;
 }
 
+static inline struct guc_process_desc *
+__get_process_desc(struct i915_guc_client *client)
+{
+	return client->vaddr + client->proc_desc_offset;
+}
+
 /*
  * Initialise the process descriptor shared with the GuC firmware.
  */
@@ -206,9 +294,7 @@ static void guc_proc_desc_init(struct intel_guc *guc,
 {
 	struct guc_process_desc *desc;
 
-	desc = client->vaddr + client->proc_desc_offset;
-
-	memset(desc, 0, sizeof(*desc));
+	desc = memset(__get_process_desc(client), 0, sizeof(*desc));
 
 	/*
 	 * XXX: pDoorbell and WQVBaseAddress are pointers in process address
@@ -219,42 +305,41 @@ static void guc_proc_desc_init(struct intel_guc *guc,
 	desc->wq_base_addr = 0;
 	desc->db_base_addr = 0;
 
-	desc->context_id = client->ctx_index;
+	desc->stage_id = client->stage_id;
 	desc->wq_size_bytes = client->wq_size;
 	desc->wq_status = WQ_STATUS_ACTIVE;
 	desc->priority = client->priority;
 }
 
 /*
- * Initialise/clear the context descriptor shared with the GuC firmware.
+ * Initialise/clear the stage descriptor shared with the GuC firmware.
  *
  * This descriptor tells the GuC where (in GGTT space) to find the important
  * data structures relating to this client (doorbell, process descriptor,
  * write queue, etc).
  */
-
-static void guc_ctx_desc_init(struct intel_guc *guc,
-			      struct i915_guc_client *client)
+static void guc_stage_desc_init(struct intel_guc *guc,
+				struct i915_guc_client *client)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct intel_engine_cs *engine;
 	struct i915_gem_context *ctx = client->owner;
-	struct guc_context_desc desc;
-	struct sg_table *sg;
+	struct guc_stage_desc *desc;
 	unsigned int tmp;
 	u32 gfx_addr;
 
-	memset(&desc, 0, sizeof(desc));
+	desc = __get_stage_desc(client);
+	memset(desc, 0, sizeof(*desc));
 
-	desc.attribute = GUC_CTX_DESC_ATTR_ACTIVE | GUC_CTX_DESC_ATTR_KERNEL;
-	desc.context_id = client->ctx_index;
-	desc.priority = client->priority;
-	desc.db_id = client->doorbell_id;
+	desc->attribute = GUC_STAGE_DESC_ATTR_ACTIVE | GUC_STAGE_DESC_ATTR_KERNEL;
+	desc->stage_id = client->stage_id;
+	desc->priority = client->priority;
+	desc->db_id = client->doorbell_id;
 
 	for_each_engine_masked(engine, dev_priv, client->engines, tmp) {
 		struct intel_context *ce = &ctx->engine[engine->id];
 		uint32_t guc_engine_id = engine->guc_id;
-		struct guc_execlist_context *lrc = &desc.lrc[guc_engine_id];
+		struct guc_execlist_context *lrc = &desc->lrc[guc_engine_id];
 
 		/* TODO: We have a design issue to be solved here. Only when we
 		 * receive the first batch, we know which engine is used by the
@@ -266,12 +351,22 @@ static void guc_ctx_desc_init(struct intel_guc *guc,
 		if (!ce->state)
 			break;	/* XXX: continue? */
 
+		/*
+		 * XXX: When this is a GUC_STAGE_DESC_ATTR_KERNEL client (proxy
+		 * submission or, in other words, not using a direct submission
+		 * model) the KMD's LRCA is not used for any work submission.
+		 * Instead, the GuC uses the LRCA of the user mode context (see
+		 * guc_wq_item_append below).
+		 */
 		lrc->context_desc = lower_32_bits(ce->lrc_desc);
 
 		/* The state page is after PPHWSP */
-		lrc->ring_lcra =
+		lrc->ring_lrca =
 			guc_ggtt_offset(ce->state) + LRC_STATE_PN * PAGE_SIZE;
-		lrc->context_id = (client->ctx_index << GUC_ELC_CTXID_OFFSET) |
+
+		/* XXX: In direct submission, the GuC wants the HW context id
+		 * here. In proxy submission, it wants the stage id */
+		lrc->context_id = (client->stage_id << GUC_ELC_CTXID_OFFSET) |
 				(guc_engine_id << GUC_ELC_ENGINE_OFFSET);
 
 		lrc->ring_begin = guc_ggtt_offset(ce->ring->vma);
@@ -279,50 +374,36 @@ static void guc_ctx_desc_init(struct intel_guc *guc,
 		lrc->ring_next_free_location = lrc->ring_begin;
 		lrc->ring_current_tail_pointer_value = 0;
 
-		desc.engines_used |= (1 << guc_engine_id);
+		desc->engines_used |= (1 << guc_engine_id);
 	}
 
 	DRM_DEBUG_DRIVER("Host engines 0x%x => GuC engines used 0x%x\n",
-			client->engines, desc.engines_used);
-	WARN_ON(desc.engines_used == 0);
+			client->engines, desc->engines_used);
+	WARN_ON(desc->engines_used == 0);
 
 	/*
 	 * The doorbell, process descriptor, and workqueue are all parts
 	 * of the client object, which the GuC will reference via the GGTT
 	 */
 	gfx_addr = guc_ggtt_offset(client->vma);
-	desc.db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
+	desc->db_trigger_phy = sg_dma_address(client->vma->pages->sgl) +
 				client->doorbell_offset;
-	desc.db_trigger_cpu =
-		(uintptr_t)client->vaddr + client->doorbell_offset;
-	desc.db_trigger_uk = gfx_addr + client->doorbell_offset;
-	desc.process_desc = gfx_addr + client->proc_desc_offset;
-	desc.wq_addr = gfx_addr + client->wq_offset;
-	desc.wq_size = client->wq_size;
-
-	/*
-	 * XXX: Take LRCs from an existing context if this is not an
-	 * IsKMDCreatedContext client
-	 */
-	desc.desc_private = (uintptr_t)client;
+	desc->db_trigger_cpu = (uintptr_t)__get_doorbell(client);
+	desc->db_trigger_uk = gfx_addr + client->doorbell_offset;
+	desc->process_desc = gfx_addr + client->proc_desc_offset;
+	desc->wq_addr = gfx_addr + client->wq_offset;
+	desc->wq_size = client->wq_size;
 
-	/* Pool context is pinned already */
-	sg = guc->ctx_pool_vma->pages;
-	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+	desc->desc_private = (uintptr_t)client;
 }
 
-static void guc_ctx_desc_fini(struct intel_guc *guc,
-			      struct i915_guc_client *client)
+static void guc_stage_desc_fini(struct intel_guc *guc,
+				struct i915_guc_client *client)
 {
-	struct guc_context_desc desc;
-	struct sg_table *sg;
-
-	memset(&desc, 0, sizeof(desc));
+	struct guc_stage_desc *desc;
 
-	sg = guc->ctx_pool_vma->pages;
-	sg_pcopy_from_buffer(sg->sgl, sg->nents, &desc, sizeof(desc),
-			     sizeof(desc) * client->ctx_index);
+	desc = __get_stage_desc(client);
+	memset(desc, 0, sizeof(*desc));
 }
 
 /**
@@ -345,8 +426,7 @@ int i915_guc_wq_reserve(struct drm_i915_gem_request *request)
 {
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	struct i915_guc_client *client = request->i915->guc.execbuf_client;
-	struct guc_process_desc *desc = client->vaddr +
-					client->proc_desc_offset;
+	struct guc_process_desc *desc = __get_process_desc(client);
 	u32 freespace;
 	int ret;
 
@@ -391,19 +471,17 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	const size_t wqi_size = sizeof(struct guc_wq_item);
 	const u32 wqi_len = wqi_size/sizeof(u32) - 1;
 	struct intel_engine_cs *engine = rq->engine;
-	struct guc_process_desc *desc;
+	struct guc_process_desc *desc = __get_process_desc(client);
 	struct guc_wq_item *wqi;
 	u32 freespace, tail, wq_off;
 
-	desc = client->vaddr + client->proc_desc_offset;
-
 	/* Free space is guaranteed, see i915_guc_wq_reserve() above */
 	freespace = CIRC_SPACE(client->wq_tail, desc->head, client->wq_size);
 	GEM_BUG_ON(freespace < wqi_size);
 
 	/* The GuC firmware wants the tail index in QWords, not bytes */
 	tail = rq->tail;
-	GEM_BUG_ON(tail & 7);
+	assert_ring_tail_valid(rq->ring, rq->tail);
 	tail >>= 3;
 	GEM_BUG_ON(tail > WQ_RING_TAIL_MAX);
 
@@ -436,19 +514,27 @@ static void guc_wq_item_append(struct i915_guc_client *client,
 	/* The GuC wants only the low-order word of the context descriptor */
 	wqi->context_desc = (u32)intel_lr_context_descriptor(rq->ctx, engine);
 
-	wqi->ring_tail = tail << WQ_RING_TAIL_SHIFT;
+	wqi->submit_element_info = tail << WQ_RING_TAIL_SHIFT;
 	wqi->fence_id = rq->global_seqno;
 }
 
+static void guc_reset_wq(struct i915_guc_client *client)
+{
+	struct guc_process_desc *desc = __get_process_desc(client);
+
+	desc->head = 0;
+	desc->tail = 0;
+
+	client->wq_tail = 0;
+}
+
 static int guc_ring_doorbell(struct i915_guc_client *client)
 {
-	struct guc_process_desc *desc;
+	struct guc_process_desc *desc = __get_process_desc(client);
 	union guc_doorbell_qw db_cmp, db_exc, db_ret;
 	union guc_doorbell_qw *db;
 	int attempt = 2, ret = -EAGAIN;
 
-	desc = client->vaddr + client->proc_desc_offset;
-
 	/* Update the tail so it is visible to GuC */
 	desc->tail = client->wq_tail;
 
@@ -463,7 +549,7 @@ static int guc_ring_doorbell(struct i915_guc_client *client)
 		db_exc.cookie = 1;
 
 	/* pointer of current doorbell cacheline */
-	db = client->vaddr + client->doorbell_offset;
+	db = (union guc_doorbell_qw *)__get_doorbell(client);
 
 	while (attempt--) {
 		/* lets ring the doorbell */
@@ -573,23 +659,10 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlist_port;
 	struct drm_i915_gem_request *last = port[0].request;
-	unsigned long flags;
 	struct rb_node *rb;
 	bool submit = false;
 
-	/* After execlist_first is updated, the tasklet will be rescheduled.
-	 *
-	 * If we are currently running (inside the tasklet) and a third
-	 * party queues a request and so updates engine->execlist_first under
-	 * the spinlock (which we have elided), it will atomically set the
-	 * TASKLET_SCHED flag causing the us to be re-executed and pick up
-	 * the change in state (the update to TASKLET_SCHED incurs a memory
-	 * barrier making this cross-cpu checking safe).
-	 */
-	if (!READ_ONCE(engine->execlist_first))
-		return false;
-
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
 	while (rb) {
 		struct drm_i915_gem_request *rq =
@@ -609,8 +682,8 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		RB_CLEAR_NODE(&rq->priotree.node);
 		rq->priotree.priority = INT_MAX;
 
-		trace_i915_gem_request_in(rq, port - engine->execlist_port);
 		i915_guc_submit(rq);
+		trace_i915_gem_request_in(rq, port - engine->execlist_port);
 		last = rq;
 		submit = true;
 	}
@@ -619,7 +692,7 @@ static bool i915_guc_dequeue(struct intel_engine_cs *engine)
 		nested_enable_signaling(last);
 		engine->execlist_first = rb;
 	}
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	return submit;
 }
@@ -695,93 +768,100 @@ err:
 	return vma;
 }
 
-static void
-guc_client_free(struct drm_i915_private *dev_priv,
-		struct i915_guc_client *client)
+/* Check that a doorbell register is in the expected state */
+static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 {
-	struct intel_guc *guc = &dev_priv->guc;
-
-	if (!client)
-		return;
-
-	/*
-	 * XXX: wait for any outstanding submissions before freeing memory.
-	 * Be sure to drop any locks
-	 */
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	u32 drbregl;
+	bool valid;
 
-	if (client->vaddr) {
-		/*
-		 * If we got as far as setting up a doorbell, make sure we
-		 * shut it down before unmapping & deallocating the memory.
-		 */
-		guc_disable_doorbell(guc, client);
+	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
 
-		i915_gem_object_unpin_map(client->vma->obj);
-	}
+	drbregl = I915_READ(GEN8_DRBREGL(db_id));
+	valid = drbregl & GEN8_DRB_VALID;
 
-	i915_vma_unpin_and_release(&client->vma);
+	if (test_bit(db_id, guc->doorbell_bitmap) == valid)
+		return true;
 
-	if (client->ctx_index != GUC_INVALID_CTX_ID) {
-		guc_ctx_desc_fini(guc, client);
-		ida_simple_remove(&guc->ctx_ids, client->ctx_index);
-	}
+	DRM_DEBUG_DRIVER("Doorbell %d has unexpected state (0x%x): valid=%s\n",
+			 db_id, drbregl, yesno(valid));
 
-	kfree(client);
+	return false;
 }
 
-/* Check that a doorbell register is in the expected state */
-static bool guc_doorbell_check(struct intel_guc *guc, uint16_t db_id)
+/*
+ * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and
+ * reloaded the GuC FW) we can use this function to tell the GuC to reassign the
+ * doorbell to the rightful owner.
+ */
+static int __reset_doorbell(struct i915_guc_client* client, u16 db_id)
 {
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	i915_reg_t drbreg = GEN8_DRBREGL(db_id);
-	uint32_t value = I915_READ(drbreg);
-	bool enabled = (value & GUC_DOORBELL_ENABLED) != 0;
-	bool expected = test_bit(db_id, guc->doorbell_bitmap);
-
-	if (enabled == expected)
-		return true;
+	int err;
 
-	DRM_DEBUG_DRIVER("Doorbell %d (reg 0x%x) 0x%x, should be %s\n",
-			 db_id, drbreg.reg, value,
-			 expected ? "active" : "inactive");
+	__update_doorbell_desc(client, db_id);
+	err = __create_doorbell(client);
+	if (!err)
+		err = __destroy_doorbell(client);
 
-	return false;
+	return err;
 }
 
 /*
- * Borrow the first client to set up & tear down each unused doorbell
- * in turn, to ensure that all doorbell h/w is (re)initialised.
+ * Set up & tear down each unused doorbell in turn, to ensure that all doorbell
+ * HW is (re)initialised. For that end, we might have to borrow the first
+ * client. Also, tell GuC about all the doorbells in use by all clients.
+ * We do this because the KMD, the GuC and the doorbell HW can easily go out of
+ * sync (e.g. we can reset the GuC, but not the doorbel HW).
  */
-static void guc_init_doorbell_hw(struct intel_guc *guc)
+static int guc_init_doorbell_hw(struct intel_guc *guc)
 {
 	struct i915_guc_client *client = guc->execbuf_client;
-	uint16_t db_id;
-	int i, err;
-
-	guc_disable_doorbell(guc, client);
+	bool recreate_first_client = false;
+	u16 db_id;
+	int ret;
 
-	for (i = 0; i < GUC_MAX_DOORBELLS; ++i) {
-		/* Skip if doorbell is OK */
-		if (guc_doorbell_check(guc, i))
+	/* For unused doorbells, make sure they are disabled */
+	for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) {
+		if (doorbell_ok(guc, db_id))
 			continue;
 
-		err = guc_update_doorbell_id(guc, client, i);
-		if (err)
-			DRM_DEBUG_DRIVER("Doorbell %d update failed, err %d\n",
-					i, err);
+		if (has_doorbell(client)) {
+			/* Borrow execbuf_client (we will recreate it later) */
+			destroy_doorbell(client);
+			recreate_first_client = true;
+		}
+
+		ret = __reset_doorbell(client, db_id);
+		WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret);
 	}
 
-	db_id = select_doorbell_register(guc, client->priority);
-	WARN_ON(db_id == GUC_INVALID_DOORBELL_ID);
+	if (recreate_first_client) {
+		ret = __reserve_doorbell(client);
+		if (unlikely(ret)) {
+			DRM_ERROR("Couldn't re-reserve first client db: %d\n", ret);
+			return ret;
+		}
+
+		__update_doorbell_desc(client, client->doorbell_id);
+	}
 
-	err = guc_update_doorbell_id(guc, client, db_id);
-	if (err)
-		DRM_WARN("Failed to restore doorbell to %d, err %d\n",
-			 db_id, err);
+	/* Now for every client (and not only execbuf_client) make sure their
+	 * doorbells are known by the GuC */
+	//for (client = client_list; client != NULL; client = client->next)
+	{
+		ret = __create_doorbell(client);
+		if (ret) {
+			DRM_ERROR("Couldn't recreate client %u doorbell: %d\n",
+				client->stage_id, ret);
+			return ret;
+		}
+	}
 
-	/* Read back & verify all doorbell registers */
-	for (i = 0; i < GUC_MAX_DOORBELLS; ++i)
-		(void)guc_doorbell_check(guc, i);
+	/* Read back & verify all (used & unused) doorbell registers */
+	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
+		WARN_ON(!doorbell_ok(guc, db_id));
+
+	return 0;
 }
 
 /**
@@ -807,49 +887,46 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_vma *vma;
 	void *vaddr;
-	uint16_t db_id;
+	int ret;
 
 	client = kzalloc(sizeof(*client), GFP_KERNEL);
 	if (!client)
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 
-	client->owner = ctx;
 	client->guc = guc;
+	client->owner = ctx;
 	client->engines = engines;
 	client->priority = priority;
-	client->doorbell_id = GUC_INVALID_DOORBELL_ID;
+	client->doorbell_id = GUC_DOORBELL_INVALID;
+	client->wq_offset = GUC_DB_SIZE;
+	client->wq_size = GUC_WQ_SIZE;
+	spin_lock_init(&client->wq_lock);
 
-	client->ctx_index = (uint32_t)ida_simple_get(&guc->ctx_ids, 0,
-			GUC_MAX_GPU_CONTEXTS, GFP_KERNEL);
-	if (client->ctx_index >= GUC_MAX_GPU_CONTEXTS) {
-		client->ctx_index = GUC_INVALID_CTX_ID;
-		goto err;
-	}
+	ret = ida_simple_get(&guc->stage_ids, 0, GUC_MAX_STAGE_DESCRIPTORS,
+				GFP_KERNEL);
+	if (ret < 0)
+		goto err_client;
+
+	client->stage_id = ret;
 
 	/* The first page is doorbell/proc_desc. Two followed pages are wq. */
 	vma = intel_guc_allocate_vma(guc, GUC_DB_SIZE + GUC_WQ_SIZE);
-	if (IS_ERR(vma))
-		goto err;
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err_id;
+	}
 
 	/* We'll keep just the first (doorbell/proc) page permanently kmap'd. */
 	client->vma = vma;
 
 	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr))
-		goto err;
-
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_vma;
+	}
 	client->vaddr = vaddr;
 
-	spin_lock_init(&client->wq_lock);
-	client->wq_offset = GUC_DB_SIZE;
-	client->wq_size = GUC_WQ_SIZE;
-
-	db_id = select_doorbell_register(guc, client->priority);
-	if (db_id == GUC_INVALID_DOORBELL_ID)
-		/* XXX: evict a doorbell instead? */
-		goto err;
-
-	client->doorbell_offset = select_doorbell_cacheline(guc);
+	client->doorbell_offset = __select_cacheline(guc);
 
 	/*
 	 * Since the doorbell only requires a single cacheline, we can save
@@ -862,28 +939,47 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 		client->proc_desc_offset = (GUC_DB_SIZE / 2);
 
 	guc_proc_desc_init(guc, client);
-	guc_ctx_desc_init(guc, client);
+	guc_stage_desc_init(guc, client);
 
-	/* For runtime client allocation we need to enable the doorbell. Not
-	 * required yet for the static execbuf_client as this special kernel
-	 * client is enabled from i915_guc_submission_enable().
-	 *
-	 * guc_update_doorbell_id(guc, client, db_id);
-	 */
+	ret = create_doorbell(client);
+	if (ret)
+		goto err_vaddr;
 
-	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: ctx_index %u\n",
-		priority, client, client->engines, client->ctx_index);
-	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%x\n",
-		client->doorbell_id, client->doorbell_offset);
+	DRM_DEBUG_DRIVER("new priority %u client %p for engine(s) 0x%x: stage_id %u\n",
+			 priority, client, client->engines, client->stage_id);
+	DRM_DEBUG_DRIVER("doorbell id %u, cacheline offset 0x%lx\n",
+			 client->doorbell_id, client->doorbell_offset);
 
 	return client;
 
-err:
-	guc_client_free(dev_priv, client);
-	return NULL;
+err_vaddr:
+	i915_gem_object_unpin_map(client->vma->obj);
+err_vma:
+	i915_vma_unpin_and_release(&client->vma);
+err_id:
+	ida_simple_remove(&guc->stage_ids, client->stage_id);
+err_client:
+	kfree(client);
+	return ERR_PTR(ret);
 }
 
+static void guc_client_free(struct i915_guc_client *client)
+{
+	/*
+	 * XXX: wait for any outstanding submissions before freeing memory.
+	 * Be sure to drop any locks
+	 */
 
+	/* FIXME: in many cases, by the time we get here the GuC has been
+	 * reset, so we cannot destroy the doorbell properly. Ignore the
+	 * error message for now */
+	destroy_doorbell(client);
+	guc_stage_desc_fini(client->guc, client);
+	i915_gem_object_unpin_map(client->vma->obj);
+	i915_vma_unpin_and_release(&client->vma);
+	ida_simple_remove(&client->guc->stage_ids, client->stage_id);
+	kfree(client);
+}
 
 static void guc_policies_init(struct guc_policies *policies)
 {
@@ -893,7 +989,7 @@ static void guc_policies_init(struct guc_policies *policies)
 	policies->dpc_promote_time = 500000;
 	policies->max_num_work_items = POLICY_MAX_NUM_WI;
 
-	for (p = 0; p < GUC_CTX_PRIORITY_NUM; p++) {
+	for (p = 0; p < GUC_CLIENT_PRIORITY_NUM; p++) {
 		for (i = GUC_RENDER_ENGINE; i < GUC_MAX_ENGINES_NUM; i++) {
 			policy = &policies->policy[p][i];
 
@@ -907,7 +1003,7 @@ static void guc_policies_init(struct guc_policies *policies)
 	policies->is_valid = 1;
 }
 
-static void guc_addon_create(struct intel_guc *guc)
+static int guc_ads_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct i915_vma *vma;
@@ -923,14 +1019,13 @@ static void guc_addon_create(struct intel_guc *guc)
 	enum intel_engine_id id;
 	u32 base;
 
-	vma = guc->ads_vma;
-	if (!vma) {
-		vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob)));
-		if (IS_ERR(vma))
-			return;
+	GEM_BUG_ON(guc->ads_vma);
 
-		guc->ads_vma = vma;
-	}
+	vma = intel_guc_allocate_vma(guc, PAGE_ALIGN(sizeof(*blob)));
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	guc->ads_vma = vma;
 
 	page = i915_vma_first_page(vma);
 	blob = kmap(page);
@@ -940,11 +1035,11 @@ static void guc_addon_create(struct intel_guc *guc)
 
 	/* MMIO reg state */
 	for_each_engine(engine, dev_priv, id) {
-		blob->reg_state.mmio_white_list[engine->guc_id].mmio_start =
+		blob->reg_state.white_list[engine->guc_id].mmio_start =
 			engine->mmio_base + GUC_MMIO_WHITE_LIST_START;
 
 		/* Nothing to be saved or restored for now. */
-		blob->reg_state.mmio_white_list[engine->guc_id].count = 0;
+		blob->reg_state.white_list[engine->guc_id].count = 0;
 	}
 
 	/*
@@ -967,67 +1062,75 @@ static void guc_addon_create(struct intel_guc *guc)
 	blob->ads.reg_state_addr = base + ptr_offset(blob, reg_state);
 
 	kunmap(page);
+
+	return 0;
+}
+
+static void guc_ads_destroy(struct intel_guc *guc)
+{
+	i915_vma_unpin_and_release(&guc->ads_vma);
 }
 
 /*
- * Set up the memory resources to be shared with the GuC.  At this point,
- * we require just one object that can be mapped through the GGTT.
+ * Set up the memory resources to be shared with the GuC (via the GGTT)
+ * at firmware loading time.
  */
 int i915_guc_submission_init(struct drm_i915_private *dev_priv)
 {
-	const size_t ctxsize = sizeof(struct guc_context_desc);
-	const size_t poolsize = GUC_MAX_GPU_CONTEXTS * ctxsize;
-	const size_t gemsize = round_up(poolsize, PAGE_SIZE);
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_vma *vma;
+	void *vaddr;
+	int ret;
 
-	if (!HAS_GUC_SCHED(dev_priv))
+	if (guc->stage_desc_pool)
 		return 0;
 
-	/* Wipe bitmap & delete client in case of reinitialisation */
-	bitmap_clear(guc->doorbell_bitmap, 0, GUC_MAX_DOORBELLS);
-	i915_guc_submission_disable(dev_priv);
-
-	if (!i915.enable_guc_submission)
-		return 0; /* not enabled  */
-
-	if (guc->ctx_pool_vma)
-		return 0; /* already allocated */
-
-	vma = intel_guc_allocate_vma(guc, gemsize);
+	vma = intel_guc_allocate_vma(guc,
+				PAGE_ALIGN(sizeof(struct guc_stage_desc) *
+					GUC_MAX_STAGE_DESCRIPTORS));
 	if (IS_ERR(vma))
 		return PTR_ERR(vma);
 
-	guc->ctx_pool_vma = vma;
-	ida_init(&guc->ctx_ids);
-	intel_guc_log_create(guc);
-	guc_addon_create(guc);
-
-	guc->execbuf_client = guc_client_alloc(dev_priv,
-					       INTEL_INFO(dev_priv)->ring_mask,
-					       GUC_CTX_PRIORITY_KMD_NORMAL,
-					       dev_priv->kernel_context);
-	if (!guc->execbuf_client) {
-		DRM_ERROR("Failed to create GuC client for execbuf!\n");
-		goto err;
+	guc->stage_desc_pool = vma;
+
+	vaddr = i915_gem_object_pin_map(guc->stage_desc_pool->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		ret = PTR_ERR(vaddr);
+		goto err_vma;
 	}
 
+	guc->stage_desc_pool_vaddr = vaddr;
+
+	ret = intel_guc_log_create(guc);
+	if (ret < 0)
+		goto err_vaddr;
+
+	ret = guc_ads_create(guc);
+	if (ret < 0)
+		goto err_log;
+
+	ida_init(&guc->stage_ids);
+
 	return 0;
 
-err:
-	i915_guc_submission_fini(dev_priv);
-	return -ENOMEM;
+err_log:
+	intel_guc_log_destroy(guc);
+err_vaddr:
+	i915_gem_object_unpin_map(guc->stage_desc_pool->obj);
+err_vma:
+	i915_vma_unpin_and_release(&guc->stage_desc_pool);
+	return ret;
 }
 
-static void guc_reset_wq(struct i915_guc_client *client)
+void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
 {
-	struct guc_process_desc *desc = client->vaddr +
-					client->proc_desc_offset;
-
-	desc->head = 0;
-	desc->tail = 0;
+	struct intel_guc *guc = &dev_priv->guc;
 
-	client->wq_tail = 0;
+	ida_destroy(&guc->stage_ids);
+	guc_ads_destroy(guc);
+	intel_guc_log_destroy(guc);
+	i915_gem_object_unpin_map(guc->stage_desc_pool->obj);
+	i915_vma_unpin_and_release(&guc->stage_desc_pool);
 }
 
 static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
@@ -1072,20 +1175,60 @@ static void guc_interrupts_capture(struct drm_i915_private *dev_priv)
 	dev_priv->rps.pm_intrmsk_mbz &= ~GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
 }
 
+static void guc_interrupts_release(struct drm_i915_private *dev_priv)
+{
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+	int irqs;
+
+	/*
+	 * tell all command streamers NOT to forward interrupts or vblank
+	 * to GuC.
+	 */
+	irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
+	irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
+	for_each_engine(engine, dev_priv, id)
+		I915_WRITE(RING_MODE_GEN7(engine), irqs);
+
+	/* route all GT interrupts to the host */
+	I915_WRITE(GUC_BCS_RCS_IER, 0);
+	I915_WRITE(GUC_VCS2_VCS1_IER, 0);
+	I915_WRITE(GUC_WD_VECS_IER, 0);
+
+	dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
+	dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+}
+
 int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
 	struct i915_guc_client *client = guc->execbuf_client;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
+	int err;
+
+	if (!client) {
+		client = guc_client_alloc(dev_priv,
+					  INTEL_INFO(dev_priv)->ring_mask,
+					  GUC_CLIENT_PRIORITY_KMD_NORMAL,
+					  dev_priv->kernel_context);
+		if (IS_ERR(client)) {
+			DRM_ERROR("Failed to create GuC client for execbuf!\n");
+			return PTR_ERR(client);
+		}
 
-	if (!client)
-		return -ENODEV;
+		guc->execbuf_client = client;
+	}
 
-	intel_guc_sample_forcewake(guc);
+	err = intel_guc_sample_forcewake(guc);
+	if (err)
+		goto err_execbuf_client;
 
 	guc_reset_wq(client);
-	guc_init_doorbell_hw(guc);
+
+	err = guc_init_doorbell_hw(guc);
+	if (err)
+		goto err_execbuf_client;
 
 	/* Take over from manual control of ELSP (execlists) */
 	guc_interrupts_capture(dev_priv);
@@ -1112,30 +1255,11 @@ int i915_guc_submission_enable(struct drm_i915_private *dev_priv)
 	}
 
 	return 0;
-}
 
-static void guc_interrupts_release(struct drm_i915_private *dev_priv)
-{
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-	int irqs;
-
-	/*
-	 * tell all command streamers NOT to forward interrupts or vblank
-	 * to GuC.
-	 */
-	irqs = _MASKED_FIELD(GFX_FORWARD_VBLANK_MASK, GFX_FORWARD_VBLANK_NEVER);
-	irqs |= _MASKED_BIT_DISABLE(GFX_INTERRUPT_STEERING);
-	for_each_engine(engine, dev_priv, id)
-		I915_WRITE(RING_MODE_GEN7(engine), irqs);
-
-	/* route all GT interrupts to the host */
-	I915_WRITE(GUC_BCS_RCS_IER, 0);
-	I915_WRITE(GUC_VCS2_VCS1_IER, 0);
-	I915_WRITE(GUC_WD_VECS_IER, 0);
-
-	dev_priv->rps.pm_intrmsk_mbz |= GEN8_PMINTR_DISABLE_REDIRECT_TO_GUC;
-	dev_priv->rps.pm_intrmsk_mbz &= ~ARAT_EXPIRED_INTRMSK;
+err_execbuf_client:
+	guc_client_free(guc->execbuf_client);
+	guc->execbuf_client = NULL;
+	return err;
 }
 
 void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
@@ -1144,30 +1268,11 @@ void i915_guc_submission_disable(struct drm_i915_private *dev_priv)
 
 	guc_interrupts_release(dev_priv);
 
-	if (!guc->execbuf_client)
-		return;
-
 	/* Revert back to manual ELSP submission */
 	intel_engines_reset_default_submission(dev_priv);
-}
-
-void i915_guc_submission_fini(struct drm_i915_private *dev_priv)
-{
-	struct intel_guc *guc = &dev_priv->guc;
-	struct i915_guc_client *client;
 
-	client = fetch_and_zero(&guc->execbuf_client);
-	if (!client)
-		return;
-
-	guc_client_free(dev_priv, client);
-
-	i915_vma_unpin_and_release(&guc->ads_vma);
-	i915_vma_unpin_and_release(&guc->log.vma);
-
-	if (guc->ctx_pool_vma)
-		ida_destroy(&guc->ctx_ids);
-	i915_vma_unpin_and_release(&guc->ctx_pool_vma);
+	guc_client_free(guc->execbuf_client);
+	guc->execbuf_client = NULL;
 }
 
 /**
@@ -1196,7 +1301,6 @@ int intel_guc_suspend(struct drm_i915_private *dev_priv)
 	return intel_guc_send(guc, data, ARRAY_SIZE(data));
 }
 
-
 /**
  * intel_guc_resume() - notify GuC resuming from suspend state
  * @dev_priv:	i915 device private
@@ -1222,5 +1326,3 @@ int intel_guc_resume(struct drm_i915_private *dev_priv)
 
 	return intel_guc_send(guc, data, ARRAY_SIZE(data));
 }
-
-
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 8163d5024ff8..fd97fe00cd0d 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1742,8 +1742,8 @@ static void gen9_guc_irq_handler(struct drm_i915_private *dev_priv, u32 gt_iir)
 			I915_WRITE(SOFT_SCRATCH(15), msg & ~flush);
 
 			/* Handle flush interrupt in bottom half */
-			queue_work(dev_priv->guc.log.flush_wq,
-				   &dev_priv->guc.log.flush_work);
+			queue_work(dev_priv->guc.log.runtime.flush_wq,
+				   &dev_priv->guc.log.runtime.flush_work);
 
 			dev_priv->guc.log.flush_interrupt_count++;
 		} else {
@@ -4252,12 +4252,12 @@ void intel_irq_init(struct drm_i915_private *dev_priv)
 	dev_priv->rps.pm_intrmsk_mbz = 0;
 
 	/*
-	 * SNB,IVB can while VLV,CHV may hard hang on looping batchbuffer
+	 * SNB,IVB,HSW can while VLV,CHV may hard hang on looping batchbuffer
 	 * if GEN6_PM_UP_EI_EXPIRED is masked.
 	 *
 	 * TODO: verify if this can be reproduced on VLV,CHV.
 	 */
-	if (INTEL_INFO(dev_priv)->gen <= 7 && !IS_HASWELL(dev_priv))
+	if (INTEL_INFO(dev_priv)->gen <= 7)
 		dev_priv->rps.pm_intrmsk_mbz |= GEN6_PM_RP_UP_EI_EXPIRED;
 
 	if (INTEL_INFO(dev_priv)->gen >= 8)
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index 732101ed57fb..f87b0c4e564d 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -61,6 +61,7 @@
 	.has_overlay = 1, .overlay_needs_physical = 1, \
 	.has_gmch_display = 1, \
 	.hws_needs_physical = 1, \
+	.unfenced_needs_alignment = 1, \
 	.ring_mask = RENDER_RING, \
 	GEN_DEFAULT_PIPEOFFSETS, \
 	CURSOR_OFFSETS
@@ -102,6 +103,7 @@ static const struct intel_device_info intel_i915g_info = {
 	.platform = INTEL_I915G, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i915gm_info = {
@@ -113,6 +115,7 @@ static const struct intel_device_info intel_i915gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i945g_info = {
@@ -121,6 +124,7 @@ static const struct intel_device_info intel_i945g_info = {
 	.has_hotplug = 1, .cursor_needs_physical = 1,
 	.has_overlay = 1, .overlay_needs_physical = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_i945gm_info = {
@@ -131,6 +135,7 @@ static const struct intel_device_info intel_i945gm_info = {
 	.supports_tv = 1,
 	.has_fbc = 1,
 	.hws_needs_physical = 1,
+	.unfenced_needs_alignment = 1,
 };
 
 static const struct intel_device_info intel_g33_info = {
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 8c121187ff39..060b171480d5 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -1705,7 +1705,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 	 */
 	if (WARN_ON(stream->sample_flags != props->sample_flags)) {
 		ret = -ENODEV;
-		goto err_alloc;
+		goto err_flags;
 	}
 
 	list_add(&stream->link, &dev_priv->perf.streams);
@@ -1728,6 +1728,7 @@ i915_perf_open_ioctl_locked(struct drm_i915_private *dev_priv,
 
 err_open:
 	list_del(&stream->link);
+err_flags:
 	if (stream->ops->destroy)
 		stream->ops->destroy(stream);
 err_alloc:
@@ -1793,6 +1794,11 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 		if (ret)
 			return ret;
 
+		if (id == 0 || id >= DRM_I915_PERF_PROP_MAX) {
+			DRM_DEBUG("Unknown i915 perf property ID\n");
+			return -EINVAL;
+		}
+
 		switch ((enum drm_i915_perf_property_id)id) {
 		case DRM_I915_PERF_PROP_CTX_HANDLE:
 			props->single_context = 1;
@@ -1862,9 +1868,8 @@ static int read_properties_unlocked(struct drm_i915_private *dev_priv,
 			props->oa_periodic = true;
 			props->oa_period_exponent = value;
 			break;
-		default:
+		case DRM_I915_PERF_PROP_MAX:
 			MISSING_CASE(id);
-			DRM_DEBUG("Unknown i915 perf property ID\n");
 			return -EINVAL;
 		}
 
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 04c8f69fcc62..11b12f412492 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -7829,7 +7829,14 @@ enum {
 #define  TRANS_DDI_EDP_INPUT_B_ONOFF	(5<<12)
 #define  TRANS_DDI_EDP_INPUT_C_ONOFF	(6<<12)
 #define  TRANS_DDI_DP_VC_PAYLOAD_ALLOC	(1<<8)
+#define  TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE (1<<7)
+#define  TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ (1<<6)
 #define  TRANS_DDI_BFI_ENABLE		(1<<4)
+#define  TRANS_DDI_HIGH_TMDS_CHAR_RATE	(1<<4)
+#define  TRANS_DDI_HDMI_SCRAMBLING	(1<<0)
+#define  TRANS_DDI_HDMI_SCRAMBLING_MASK (TRANS_DDI_HDMI_SCRAMBLER_CTS_ENABLE \
+					| TRANS_DDI_HDMI_SCRAMBLER_RESET_FREQ \
+					| TRANS_DDI_HDMI_SCRAMBLING)
 
 /* DisplayPort Transport Control */
 #define _DP_TP_CTL_A			0x64040
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 94a3a3299910..c5455d36b617 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -25,6 +25,24 @@
 #ifndef __I915_UTILS_H
 #define __I915_UTILS_H
 
+#undef WARN_ON
+/* Many gcc seem to no see through this and fall over :( */
+#if 0
+#define WARN_ON(x) ({ \
+	bool __i915_warn_cond = (x); \
+	if (__builtin_constant_p(__i915_warn_cond)) \
+		BUILD_BUG_ON(__i915_warn_cond); \
+	WARN(__i915_warn_cond, "WARN_ON(" #x ")"); })
+#else
+#define WARN_ON(x) WARN((x), "%s", "WARN_ON(" __stringify(x) ")")
+#endif
+
+#undef WARN_ON_ONCE
+#define WARN_ON_ONCE(x) WARN_ONCE((x), "%s", "WARN_ON_ONCE(" __stringify(x) ")")
+
+#define MISSING_CASE(x) WARN(1, "Missing switch case (%lu) in %s\n", \
+			     (long)(x), __func__)
+
 #if GCC_VERSION >= 70000
 #define add_overflows(A, B) \
 	__builtin_add_overflow_p((A), (B), (typeof((A) + (B)))0)
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index ba986edee312..9ccbf26124c6 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -47,11 +47,12 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
 unsigned int intel_engine_wakeup(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+	unsigned long flags;
 	unsigned int result;
 
-	spin_lock_irq(&b->irq_lock);
+	spin_lock_irqsave(&b->irq_lock, flags);
 	result = __intel_breadcrumbs_wakeup(b);
-	spin_unlock_irq(&b->irq_lock);
+	spin_unlock_irqrestore(&b->irq_lock, flags);
 
 	return result;
 }
@@ -579,6 +580,8 @@ static int intel_breadcrumbs_signaler(void *arg)
 	signaler_set_rtpriority();
 
 	do {
+		bool do_schedule = true;
+
 		set_current_state(TASK_INTERRUPTIBLE);
 
 		/* We are either woken up by the interrupt bottom-half,
@@ -625,9 +628,23 @@ static int intel_breadcrumbs_signaler(void *arg)
 			spin_unlock_irq(&b->rb_lock);
 
 			i915_gem_request_put(request);
-		} else {
+
+			/* If the engine is saturated we may be continually
+			 * processing completed requests. This angers the
+			 * NMI watchdog if we never let anything else
+			 * have access to the CPU. Let's pretend to be nice
+			 * and relinquish the CPU if we burn through the
+			 * entire RT timeslice!
+			 */
+			do_schedule = need_resched();
+		}
+
+		if (unlikely(do_schedule)) {
 			DEFINE_WAIT(exec);
 
+			if (kthread_should_park())
+				kthread_parkme();
+
 			if (kthread_should_stop()) {
 				GEM_BUG_ON(request);
 				break;
@@ -640,9 +657,6 @@ static int intel_breadcrumbs_signaler(void *arg)
 
 			if (request)
 				remove_wait_queue(&request->execute, &exec);
-
-			if (kthread_should_park())
-				kthread_parkme();
 		}
 		i915_gem_request_put(request);
 	} while (1);
diff --git a/drivers/gpu/drm/i915/intel_cdclk.c b/drivers/gpu/drm/i915/intel_cdclk.c
index c2cc33f3d888..dd3ad52b7dfe 100644
--- a/drivers/gpu/drm/i915/intel_cdclk.c
+++ b/drivers/gpu/drm/i915/intel_cdclk.c
@@ -1442,16 +1442,33 @@ static int bdw_adjust_min_pipe_pixel_rate(struct intel_crtc_state *crtc_state,
 	if (IS_BROADWELL(dev_priv) && crtc_state->ips_enabled)
 		pixel_rate = DIV_ROUND_UP(pixel_rate * 100, 95);
 
-	/* BSpec says "Do not use DisplayPort with CDCLK less than
-	 * 432 MHz, audio enabled, port width x4, and link rate
-	 * HBR2 (5.4 GHz), or else there may be audio corruption or
-	 * screen corruption."
+	/* BSpec says "Do not use DisplayPort with CDCLK less than 432 MHz,
+	 * audio enabled, port width x4, and link rate HBR2 (5.4 GHz), or else
+	 * there may be audio corruption or screen corruption." This cdclk
+	 * restriction for GLK is 316.8 MHz and since GLK can output two
+	 * pixels per clock, the pixel rate becomes 2 * 316.8 MHz.
 	 */
 	if (intel_crtc_has_dp_encoder(crtc_state) &&
 	    crtc_state->has_audio &&
 	    crtc_state->port_clock >= 540000 &&
-	    crtc_state->lane_count == 4)
-		pixel_rate = max(432000, pixel_rate);
+	    crtc_state->lane_count == 4) {
+		if (IS_GEMINILAKE(dev_priv))
+			pixel_rate = max(2 * 316800, pixel_rate);
+		else
+			pixel_rate = max(432000, pixel_rate);
+	}
+
+	/* According to BSpec, "The CD clock frequency must be at least twice
+	 * the frequency of the Azalia BCLK." and BCLK is 96 MHz by default.
+	 * The check for GLK has to be adjusted as the platform can output
+	 * two pixels per clock.
+	 */
+	if (crtc_state->has_audio && INTEL_GEN(dev_priv) >= 9) {
+		if (IS_GEMINILAKE(dev_priv))
+			pixel_rate = max(2 * 2 * 96000, pixel_rate);
+		else
+			pixel_rate = max(2 * 96000, pixel_rate);
+	}
 
 	return pixel_rate;
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index 8c82607294c6..2797bf37c3ac 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -669,15 +669,16 @@ static const struct dmi_system_id intel_spurious_crt_detect[] = {
 	{ }
 };
 
-static enum drm_connector_status
-intel_crt_detect(struct drm_connector *connector, bool force)
+static int
+intel_crt_detect(struct drm_connector *connector,
+		 struct drm_modeset_acquire_ctx *ctx,
+		 bool force)
 {
 	struct drm_i915_private *dev_priv = to_i915(connector->dev);
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct intel_encoder *intel_encoder = &crt->base;
-	enum drm_connector_status status;
+	int status, ret;
 	struct intel_load_detect_pipe tmp;
-	struct drm_modeset_acquire_ctx ctx;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s] force=%d\n",
 		      connector->base.id, connector->name,
@@ -721,10 +722,9 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 		goto out;
 	}
 
-	drm_modeset_acquire_init(&ctx, 0);
-
 	/* for pre-945g platforms use load detect */
-	if (intel_get_load_detect_pipe(connector, NULL, &tmp, &ctx)) {
+	ret = intel_get_load_detect_pipe(connector, NULL, &tmp, ctx);
+	if (ret > 0) {
 		if (intel_crt_detect_ddc(connector))
 			status = connector_status_connected;
 		else if (INTEL_GEN(dev_priv) < 4)
@@ -734,12 +734,11 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 			status = connector_status_disconnected;
 		else
 			status = connector_status_unknown;
-		intel_release_load_detect_pipe(connector, &tmp, &ctx);
-	} else
+		intel_release_load_detect_pipe(connector, &tmp, ctx);
+	} else if (ret == 0)
 		status = connector_status_unknown;
-
-	drm_modeset_drop_locks(&ctx);
-	drm_modeset_acquire_fini(&ctx);
+	else if (ret < 0)
+		status = ret;
 
 out:
 	intel_display_power_put(dev_priv, intel_encoder->power_domain);
@@ -811,7 +810,6 @@ void intel_crt_reset(struct drm_encoder *encoder)
 
 static const struct drm_connector_funcs intel_crt_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.detect = intel_crt_detect,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
@@ -823,6 +821,7 @@ static const struct drm_connector_funcs intel_crt_connector_funcs = {
 };
 
 static const struct drm_connector_helper_funcs intel_crt_connector_helper_funcs = {
+	.detect_ctx = intel_crt_detect,
 	.mode_valid = intel_crt_mode_valid,
 	.get_modes = intel_crt_get_modes,
 };
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 36832257cc9b..1575bde0cf90 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -49,7 +49,7 @@ MODULE_FIRMWARE(I915_CSR_SKL);
 MODULE_FIRMWARE(I915_CSR_BXT);
 #define BXT_CSR_VERSION_REQUIRED	CSR_VERSION(1, 7)
 
-#define FIRMWARE_URL  "https://01.org/linuxgraphics/intel-linux-graphics-firmwares"
+#define FIRMWARE_URL  "https://01.org/linuxgraphics/downloads/firmware"
 
 
 
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index d8214ba8da14..0914ad96a71b 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -539,7 +539,7 @@ intel_ddi_get_buf_trans_fdi(struct drm_i915_private *dev_priv,
  * values in advance. This function programs the correct values for
  * DP/eDP/FDI use cases.
  */
-void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
+static void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	u32 iboost_bit = 0;
@@ -806,7 +806,7 @@ void hsw_fdi_link_train(struct intel_crtc *crtc,
 		   DP_TP_CTL_ENABLE);
 }
 
-void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
+static void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder)
 {
 	struct intel_dp *intel_dp = enc_to_intel_dp(&encoder->base);
 	struct intel_digital_port *intel_dig_port =
@@ -837,7 +837,8 @@ intel_ddi_get_crtc_encoder(struct intel_crtc *crtc)
 	return ret;
 }
 
-static struct intel_encoder *
+/* Finds the only possible encoder associated with the given CRTC. */
+struct intel_encoder *
 intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
@@ -1127,72 +1128,6 @@ void intel_ddi_clock_get(struct intel_encoder *encoder,
 		bxt_ddi_clock_get(encoder, pipe_config);
 }
 
-static bool
-hsw_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	struct intel_shared_dpll *pll;
-
-	pll = intel_get_shared_dpll(intel_crtc, crtc_state,
-				    encoder);
-	if (!pll)
-		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
-				 pipe_name(intel_crtc->pipe));
-
-	return pll;
-}
-
-static bool
-skl_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	struct intel_shared_dpll *pll;
-
-	pll = intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
-	if (pll == NULL) {
-		DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
-				 pipe_name(intel_crtc->pipe));
-		return false;
-	}
-
-	return true;
-}
-
-static bool
-bxt_ddi_pll_select(struct intel_crtc *intel_crtc,
-		   struct intel_crtc_state *crtc_state,
-		   struct intel_encoder *encoder)
-{
-	return !!intel_get_shared_dpll(intel_crtc, crtc_state, encoder);
-}
-
-/*
- * Tries to find a *shared* PLL for the CRTC and store it in
- * intel_crtc->ddi_pll_sel.
- *
- * For private DPLLs, compute_config() should do the selection for us. This
- * function should be folded into compute_config() eventually.
- */
-bool intel_ddi_pll_select(struct intel_crtc *intel_crtc,
-			  struct intel_crtc_state *crtc_state)
-{
-	struct drm_i915_private *dev_priv = to_i915(intel_crtc->base.dev);
-	struct intel_encoder *encoder =
-		intel_ddi_get_crtc_new_encoder(crtc_state);
-
-	if (IS_GEN9_BC(dev_priv))
-		return skl_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-	else if (IS_GEN9_LP(dev_priv))
-		return bxt_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-	else
-		return hsw_ddi_pll_select(intel_crtc, crtc_state,
-					  encoder);
-}
-
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state)
 {
 	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
@@ -1309,6 +1244,11 @@ void intel_ddi_enable_transcoder_func(const struct intel_crtc_state *crtc_state)
 			temp |= TRANS_DDI_MODE_SELECT_HDMI;
 		else
 			temp |= TRANS_DDI_MODE_SELECT_DVI;
+
+		if (crtc_state->hdmi_scrambling)
+			temp |= TRANS_DDI_HDMI_SCRAMBLING_MASK;
+		if (crtc_state->hdmi_high_tmds_clock_ratio)
+			temp |= TRANS_DDI_HIGH_TMDS_CHAR_RATE;
 	} else if (type == INTEL_OUTPUT_ANALOG) {
 		temp |= TRANS_DDI_MODE_SELECT_FDI;
 		temp |= (crtc_state->fdi_lanes - 1) << 1;
@@ -1676,8 +1616,8 @@ uint32_t ddi_signal_levels(struct intel_dp *intel_dp)
 	return DDI_BUF_TRANS_SELECT(level);
 }
 
-void intel_ddi_clk_select(struct intel_encoder *encoder,
-			  struct intel_shared_dpll *pll)
+static void intel_ddi_clk_select(struct intel_encoder *encoder,
+				 struct intel_shared_dpll *pll)
 {
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	enum port port = intel_ddi_get_encoder_port(encoder);
@@ -1881,6 +1821,12 @@ static void intel_enable_ddi(struct intel_encoder *intel_encoder,
 	if (type == INTEL_OUTPUT_HDMI) {
 		struct intel_digital_port *intel_dig_port =
 			enc_to_dig_port(encoder);
+		bool clock_ratio = pipe_config->hdmi_high_tmds_clock_ratio;
+		bool scrambling = pipe_config->hdmi_scrambling;
+
+		intel_hdmi_handle_sink_scrambling(intel_encoder,
+						  conn_state->connector,
+						  clock_ratio, scrambling);
 
 		/* In HDMI/DVI mode, the port width, and swing/emphasis values
 		 * are ignored so nothing special needs to be done besides
@@ -1914,6 +1860,12 @@ static void intel_disable_ddi(struct intel_encoder *intel_encoder,
 	if (old_crtc_state->has_audio)
 		intel_audio_codec_disable(intel_encoder);
 
+	if (type == INTEL_OUTPUT_HDMI) {
+		intel_hdmi_handle_sink_scrambling(intel_encoder,
+						  old_conn_state->connector,
+						  false, false);
+	}
+
 	if (type == INTEL_OUTPUT_EDP) {
 		struct intel_dp *intel_dp = enc_to_intel_dp(encoder);
 
@@ -2040,6 +1992,12 @@ void intel_ddi_get_config(struct intel_encoder *encoder,
 
 		if (intel_hdmi->infoframe_enabled(&encoder->base, pipe_config))
 			pipe_config->has_infoframe = true;
+
+		if ((temp & TRANS_DDI_HDMI_SCRAMBLING_MASK) ==
+			TRANS_DDI_HDMI_SCRAMBLING_MASK)
+			pipe_config->hdmi_scrambling = true;
+		if (temp & TRANS_DDI_HIGH_TMDS_CHAR_RATE)
+			pipe_config->hdmi_high_tmds_clock_ratio = true;
 		/* fall through */
 	case TRANS_DDI_MODE_SELECT_DVI:
 		pipe_config->lane_count = 4;
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index e27ea89efd67..3617927af269 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -1997,7 +1997,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane)
 	unsigned int cpp = fb->format->cpp[plane];
 
 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		return cpp;
 	case I915_FORMAT_MOD_X_TILED:
 		if (IS_GEN2(dev_priv))
@@ -2033,7 +2033,7 @@ intel_tile_width_bytes(const struct drm_framebuffer *fb, int plane)
 static unsigned int
 intel_tile_height(const struct drm_framebuffer *fb, int plane)
 {
-	if (fb->modifier == DRM_FORMAT_MOD_NONE)
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
 		return 1;
 	else
 		return intel_tile_size(to_i915(fb->dev)) /
@@ -2107,7 +2107,7 @@ static unsigned int intel_surf_alignment(const struct drm_framebuffer *fb,
 		return 4096;
 
 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		return intel_linear_alignment(dev_priv);
 	case I915_FORMAT_MOD_X_TILED:
 		if (INTEL_GEN(dev_priv) >= 9)
@@ -2290,7 +2290,7 @@ static u32 intel_adjust_tile_offset(int *x, int *y,
 
 	WARN_ON(new_offset > old_offset);
 
-	if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int pitch_tiles;
 
@@ -2345,7 +2345,7 @@ static u32 _intel_compute_tile_offset(const struct drm_i915_private *dev_priv,
 	if (alignment)
 		alignment--;
 
-	if (fb_modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb_modifier != DRM_FORMAT_MOD_LINEAR) {
 		unsigned int tile_size, tile_width, tile_height;
 		unsigned int tile_rows, tiles, pitch_tiles;
 
@@ -2471,7 +2471,7 @@ intel_fill_fb_info(struct drm_i915_private *dev_priv,
 						    DRM_ROTATE_0, tile_size);
 		offset /= tile_size;
 
-		if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+		if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 			unsigned int tile_width, tile_height;
 			unsigned int pitch_tiles;
 			struct drm_rect r;
@@ -2803,7 +2803,7 @@ static int skl_max_plane_width(const struct drm_framebuffer *fb, int plane,
 	int cpp = fb->format->cpp[plane];
 
 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
 		switch (cpp) {
 		case 8:
@@ -2962,28 +2962,27 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	return 0;
 }
 
-static void i9xx_update_primary_plane(struct drm_plane *primary,
-				      const struct intel_crtc_state *crtc_state,
-				      const struct intel_plane_state *plane_state)
+static u32 i9xx_plane_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_i915_private *dev_priv = to_i915(primary->dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int plane = intel_crtc->plane;
-	u32 linear_offset;
-	u32 dspcntr;
-	i915_reg_t reg = DSPCNTR(plane);
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
-	int x = plane_state->base.src.x1 >> 16;
-	int y = plane_state->base.src.y1 >> 16;
-	unsigned long irqflags;
+	u32 dspcntr;
 
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
+	dspcntr = DISPLAY_PLANE_ENABLE | DISPPLANE_GAMMA_ENABLE;
 
-	dspcntr |= DISPLAY_PLANE_ENABLE;
+	if (IS_G4X(dev_priv) || IS_GEN5(dev_priv) ||
+	    IS_GEN6(dev_priv) || IS_IVYBRIDGE(dev_priv))
+		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
+
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
 
 	if (INTEL_GEN(dev_priv) < 4) {
-		if (intel_crtc->pipe == PIPE_B)
+		if (crtc->pipe == PIPE_B)
 			dspcntr |= DISPPLANE_SEL_PIPE_B;
 	}
 
@@ -3010,7 +3009,8 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 		dspcntr |= DISPPLANE_RGBX101010;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}
 
 	if (INTEL_GEN(dev_priv) >= 4 &&
@@ -3023,25 +3023,66 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	if (rotation & DRM_REFLECT_X)
 		dspcntr |= DISPPLANE_MIRROR;
 
-	if (IS_G4X(dev_priv))
-		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
+	return dspcntr;
+}
 
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
+int i9xx_check_plane_surface(struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	int src_x = plane_state->base.src.x1 >> 16;
+	int src_y = plane_state->base.src.y1 >> 16;
+	u32 offset;
+
+	intel_add_fb_offsets(&src_x, &src_y, plane_state, 0);
 
 	if (INTEL_GEN(dev_priv) >= 4)
-		intel_crtc->dspaddr_offset =
-			intel_compute_tile_offset(&x, &y, plane_state, 0);
+		offset = intel_compute_tile_offset(&src_x, &src_y,
+						   plane_state, 0);
+	else
+		offset = 0;
 
-	if (rotation & DRM_ROTATE_180) {
-		x += crtc_state->pipe_src_w - 1;
-		y += crtc_state->pipe_src_h - 1;
-	} else if (rotation & DRM_REFLECT_X) {
-		x += crtc_state->pipe_src_w - 1;
+	/* HSW/BDW do this automagically in hardware */
+	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv)) {
+		unsigned int rotation = plane_state->base.rotation;
+		int src_w = drm_rect_width(&plane_state->base.src) >> 16;
+		int src_h = drm_rect_height(&plane_state->base.src) >> 16;
+
+		if (rotation & DRM_ROTATE_180) {
+			src_x += src_w - 1;
+			src_y += src_h - 1;
+		} else if (rotation & DRM_REFLECT_X) {
+			src_x += src_w - 1;
+		}
 	}
 
+	plane_state->main.offset = offset;
+	plane_state->main.x = src_x;
+	plane_state->main.y = src_y;
+
+	return 0;
+}
+
+static void i9xx_update_primary_plane(struct drm_plane *primary,
+				      const struct intel_crtc_state *crtc_state,
+				      const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv = to_i915(primary->dev);
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	int plane = intel_crtc->plane;
+	u32 linear_offset;
+	u32 dspcntr = plane_state->ctl;
+	i915_reg_t reg = DSPCNTR(plane);
+	int x = plane_state->main.x;
+	int y = plane_state->main.y;
+	unsigned long irqflags;
+
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 
-	if (INTEL_GEN(dev_priv) < 4)
+	if (INTEL_GEN(dev_priv) >= 4)
+		intel_crtc->dspaddr_offset = plane_state->main.offset;
+	else
 		intel_crtc->dspaddr_offset = linear_offset;
 
 	intel_crtc->adjusted_x = x;
@@ -3068,7 +3109,12 @@ static void i9xx_update_primary_plane(struct drm_plane *primary,
 	I915_WRITE_FW(reg, dspcntr);
 
 	I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]);
-	if (INTEL_GEN(dev_priv) >= 4) {
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
+		I915_WRITE_FW(DSPSURF(plane),
+			      intel_plane_ggtt_offset(plane_state) +
+			      intel_crtc->dspaddr_offset);
+		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
+	} else if (INTEL_GEN(dev_priv) >= 4) {
 		I915_WRITE_FW(DSPSURF(plane),
 			      intel_plane_ggtt_offset(plane_state) +
 			      intel_crtc->dspaddr_offset);
@@ -3105,101 +3151,10 @@ static void i9xx_disable_primary_plane(struct drm_plane *primary,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void ironlake_update_primary_plane(struct drm_plane *primary,
-					  const struct intel_crtc_state *crtc_state,
-					  const struct intel_plane_state *plane_state)
-{
-	struct drm_device *dev = primary->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc_state->base.crtc);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int plane = intel_crtc->plane;
-	u32 linear_offset;
-	u32 dspcntr;
-	i915_reg_t reg = DSPCNTR(plane);
-	unsigned int rotation = plane_state->base.rotation;
-	int x = plane_state->base.src.x1 >> 16;
-	int y = plane_state->base.src.y1 >> 16;
-	unsigned long irqflags;
-
-	dspcntr = DISPPLANE_GAMMA_ENABLE;
-	dspcntr |= DISPLAY_PLANE_ENABLE;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		dspcntr |= DISPPLANE_PIPE_CSC_ENABLE;
-
-	switch (fb->format->format) {
-	case DRM_FORMAT_C8:
-		dspcntr |= DISPPLANE_8BPP;
-		break;
-	case DRM_FORMAT_RGB565:
-		dspcntr |= DISPPLANE_BGRX565;
-		break;
-	case DRM_FORMAT_XRGB8888:
-		dspcntr |= DISPPLANE_BGRX888;
-		break;
-	case DRM_FORMAT_XBGR8888:
-		dspcntr |= DISPPLANE_RGBX888;
-		break;
-	case DRM_FORMAT_XRGB2101010:
-		dspcntr |= DISPPLANE_BGRX101010;
-		break;
-	case DRM_FORMAT_XBGR2101010:
-		dspcntr |= DISPPLANE_RGBX101010;
-		break;
-	default:
-		BUG();
-	}
-
-	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
-		dspcntr |= DISPPLANE_TILED;
-
-	if (rotation & DRM_ROTATE_180)
-		dspcntr |= DISPPLANE_ROTATE_180;
-
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv))
-		dspcntr |= DISPPLANE_TRICKLE_FEED_DISABLE;
-
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-
-	intel_crtc->dspaddr_offset =
-		intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	/* HSW+ does this automagically in hardware */
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) &&
-	    rotation & DRM_ROTATE_180) {
-		x += crtc_state->pipe_src_w - 1;
-		y += crtc_state->pipe_src_h - 1;
-	}
-
-	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
-
-	intel_crtc->adjusted_x = x;
-	intel_crtc->adjusted_y = y;
-
-	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
-
-	I915_WRITE_FW(reg, dspcntr);
-
-	I915_WRITE_FW(DSPSTRIDE(plane), fb->pitches[0]);
-	I915_WRITE_FW(DSPSURF(plane),
-		      intel_plane_ggtt_offset(plane_state) +
-		      intel_crtc->dspaddr_offset);
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) {
-		I915_WRITE_FW(DSPOFFSET(plane), (y << 16) | x);
-	} else {
-		I915_WRITE_FW(DSPTILEOFF(plane), (y << 16) | x);
-		I915_WRITE_FW(DSPLINOFF(plane), linear_offset);
-	}
-	POSTING_READ_FW(reg);
-
-	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
-}
-
 static u32
 intel_fb_stride_alignment(const struct drm_framebuffer *fb, int plane)
 {
-	if (fb->modifier == DRM_FORMAT_MOD_NONE)
+	if (fb->modifier == DRM_FORMAT_MOD_LINEAR)
 		return 64;
 	else
 		return intel_tile_width_bytes(fb, plane);
@@ -3254,7 +3209,7 @@ u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 	return stride;
 }
 
-u32 skl_plane_ctl_format(uint32_t pixel_format)
+static u32 skl_plane_ctl_format(uint32_t pixel_format)
 {
 	switch (pixel_format) {
 	case DRM_FORMAT_C8:
@@ -3295,10 +3250,10 @@ u32 skl_plane_ctl_format(uint32_t pixel_format)
 	return 0;
 }
 
-u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
+static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 {
 	switch (fb_modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		break;
 	case I915_FORMAT_MOD_X_TILED:
 		return PLANE_CTL_TILED_X;
@@ -3313,7 +3268,7 @@ u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 	return 0;
 }
 
-u32 skl_plane_ctl_rotation(unsigned int rotation)
+static u32 skl_plane_ctl_rotation(unsigned int rotation)
 {
 	switch (rotation) {
 	case DRM_ROTATE_0:
@@ -3335,6 +3290,37 @@ u32 skl_plane_ctl_rotation(unsigned int rotation)
 	return 0;
 }
 
+u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
+		  const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
+	unsigned int rotation = plane_state->base.rotation;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	u32 plane_ctl;
+
+	plane_ctl = PLANE_CTL_ENABLE;
+
+	if (!IS_GEMINILAKE(dev_priv)) {
+		plane_ctl |=
+			PLANE_CTL_PIPE_GAMMA_ENABLE |
+			PLANE_CTL_PIPE_CSC_ENABLE |
+			PLANE_CTL_PLANE_GAMMA_DISABLE;
+	}
+
+	plane_ctl |= skl_plane_ctl_format(fb->format->format);
+	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
+	plane_ctl |= skl_plane_ctl_rotation(rotation);
+
+	if (key->flags & I915_SET_COLORKEY_DESTINATION)
+		plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
+	else if (key->flags & I915_SET_COLORKEY_SOURCE)
+		plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
+
+	return plane_ctl;
+}
+
 static void skylake_update_primary_plane(struct drm_plane *plane,
 					 const struct intel_crtc_state *crtc_state,
 					 const struct intel_plane_state *plane_state)
@@ -3345,7 +3331,7 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	struct drm_framebuffer *fb = plane_state->base.fb;
 	enum plane_id plane_id = to_intel_plane(plane)->id;
 	enum pipe pipe = to_intel_plane(plane)->pipe;
-	u32 plane_ctl;
+	u32 plane_ctl = plane_state->ctl;
 	unsigned int rotation = plane_state->base.rotation;
 	u32 stride = skl_plane_stride(fb, 0, rotation);
 	u32 surf_addr = plane_state->main.offset;
@@ -3360,19 +3346,6 @@ static void skylake_update_primary_plane(struct drm_plane *plane,
 	int dst_h = drm_rect_height(&plane_state->base.dst);
 	unsigned long irqflags;
 
-	plane_ctl = PLANE_CTL_ENABLE;
-
-	if (!IS_GEMINILAKE(dev_priv)) {
-		plane_ctl |=
-			PLANE_CTL_PIPE_GAMMA_ENABLE |
-			PLANE_CTL_PIPE_CSC_ENABLE |
-			PLANE_CTL_PLANE_GAMMA_DISABLE;
-	}
-
-	plane_ctl |= skl_plane_ctl_format(fb->format->format);
-	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
-	plane_ctl |= skl_plane_ctl_rotation(rotation);
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -3439,17 +3412,6 @@ static void skylake_disable_primary_plane(struct drm_plane *primary,
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-/* Assume fb object is pinned & idle & fenced and just update base pointers */
-static int
-intel_pipe_set_base_atomic(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-			   int x, int y, enum mode_set_atomic state)
-{
-	/* Support for kgdboc is disabled, this needs a major rework. */
-	DRM_ERROR("legacy panic handler not supported any more.\n");
-
-	return -ENODEV;
-}
-
 static void intel_complete_page_flips(struct drm_i915_private *dev_priv)
 {
 	struct intel_crtc *crtc;
@@ -6317,6 +6279,17 @@ intel_reduce_m_n_ratio(uint32_t *num, uint32_t *den)
 static void compute_m_n(unsigned int m, unsigned int n,
 			uint32_t *ret_m, uint32_t *ret_n)
 {
+	/*
+	 * Reduce M/N as much as possible without loss in precision. Several DP
+	 * dongles in particular seem to be fussy about too large *link* M/N
+	 * values. The passed in values are more likely to have the least
+	 * significant bits zero than M after rounding below, so do this first.
+	 */
+	while ((m & 1) == 0 && (n & 1) == 0) {
+		m >>= 1;
+		n >>= 1;
+	}
+
 	*ret_n = min_t(unsigned int, roundup_pow_of_two(n), DATA_LINK_N_MAX);
 	*ret_m = div_u64((uint64_t) m * *ret_n, n);
 	intel_reduce_m_n_ratio(ret_m, ret_n);
@@ -8406,7 +8379,7 @@ skylake_get_initial_plane_config(struct intel_crtc *crtc,
 	tiling = val & PLANE_CTL_TILED_MASK;
 	switch (tiling) {
 	case PLANE_CTL_TILED_LINEAR:
-		fb->modifier = DRM_FORMAT_MOD_NONE;
+		fb->modifier = DRM_FORMAT_MOD_LINEAR;
 		break;
 	case PLANE_CTL_TILED_X:
 		plane_config->tiling = I915_TILING_X;
@@ -8862,8 +8835,14 @@ static int haswell_crtc_compute_clock(struct intel_crtc *crtc,
 				      struct intel_crtc_state *crtc_state)
 {
 	if (!intel_crtc_has_type(crtc_state, INTEL_OUTPUT_DSI)) {
-		if (!intel_ddi_pll_select(crtc, crtc_state))
+		struct intel_encoder *encoder =
+			intel_ddi_get_crtc_new_encoder(crtc_state);
+
+		if (!intel_get_shared_dpll(crtc, crtc_state, encoder)) {
+			DRM_DEBUG_DRIVER("failed to find PLL for pipe %c\n",
+					 pipe_name(crtc->pipe));
 			return -EINVAL;
+		}
 	}
 
 	crtc->lowfreq_avail = false;
@@ -9159,6 +9138,31 @@ out:
 	return active;
 }
 
+static u32 i845_cursor_ctl(const struct intel_crtc_state *crtc_state,
+			   const struct intel_plane_state *plane_state)
+{
+	unsigned int width = plane_state->base.crtc_w;
+	unsigned int stride = roundup_pow_of_two(width) * 4;
+
+	switch (stride) {
+	default:
+		WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
+			  width, stride);
+		stride = 256;
+		/* fallthrough */
+	case 256:
+	case 512:
+	case 1024:
+	case 2048:
+		break;
+	}
+
+	return CURSOR_ENABLE |
+		CURSOR_GAMMA_ENABLE |
+		CURSOR_FORMAT_ARGB |
+		CURSOR_STRIDE(stride);
+}
+
 static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 			       const struct intel_plane_state *plane_state)
 {
@@ -9170,26 +9174,8 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 	if (plane_state && plane_state->base.visible) {
 		unsigned int width = plane_state->base.crtc_w;
 		unsigned int height = plane_state->base.crtc_h;
-		unsigned int stride = roundup_pow_of_two(width) * 4;
-
-		switch (stride) {
-		default:
-			WARN_ONCE(1, "Invalid cursor width/stride, width=%u, stride=%u\n",
-				  width, stride);
-			stride = 256;
-			/* fallthrough */
-		case 256:
-		case 512:
-		case 1024:
-		case 2048:
-			break;
-		}
-
-		cntl |= CURSOR_ENABLE |
-			CURSOR_GAMMA_ENABLE |
-			CURSOR_FORMAT_ARGB |
-			CURSOR_STRIDE(stride);
 
+		cntl = plane_state->ctl;
 		size = (height << 12) | width;
 	}
 
@@ -9222,6 +9208,43 @@ static void i845_update_cursor(struct drm_crtc *crtc, u32 base,
 	}
 }
 
+static u32 i9xx_cursor_ctl(const struct intel_crtc_state *crtc_state,
+			   const struct intel_plane_state *plane_state)
+{
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	struct intel_crtc *crtc = to_intel_crtc(crtc_state->base.crtc);
+	enum pipe pipe = crtc->pipe;
+	u32 cntl;
+
+	cntl = MCURSOR_GAMMA_ENABLE;
+
+	if (HAS_DDI(dev_priv))
+		cntl |= CURSOR_PIPE_CSC_ENABLE;
+
+	cntl |= pipe << 28; /* Connect to correct pipe */
+
+	switch (plane_state->base.crtc_w) {
+	case 64:
+		cntl |= CURSOR_MODE_64_ARGB_AX;
+		break;
+	case 128:
+		cntl |= CURSOR_MODE_128_ARGB_AX;
+		break;
+	case 256:
+		cntl |= CURSOR_MODE_256_ARGB_AX;
+		break;
+	default:
+		MISSING_CASE(plane_state->base.crtc_w);
+		return 0;
+	}
+
+	if (plane_state->base.rotation & DRM_ROTATE_180)
+		cntl |= CURSOR_ROTATE_180;
+
+	return cntl;
+}
+
 static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
 			       const struct intel_plane_state *plane_state)
 {
@@ -9231,30 +9254,8 @@ static void i9xx_update_cursor(struct drm_crtc *crtc, u32 base,
 	int pipe = intel_crtc->pipe;
 	uint32_t cntl = 0;
 
-	if (plane_state && plane_state->base.visible) {
-		cntl = MCURSOR_GAMMA_ENABLE;
-		switch (plane_state->base.crtc_w) {
-			case 64:
-				cntl |= CURSOR_MODE_64_ARGB_AX;
-				break;
-			case 128:
-				cntl |= CURSOR_MODE_128_ARGB_AX;
-				break;
-			case 256:
-				cntl |= CURSOR_MODE_256_ARGB_AX;
-				break;
-			default:
-				MISSING_CASE(plane_state->base.crtc_w);
-				return;
-		}
-		cntl |= pipe << 28; /* Connect to correct pipe */
-
-		if (HAS_DDI(dev_priv))
-			cntl |= CURSOR_PIPE_CSC_ENABLE;
-
-		if (plane_state->base.rotation & DRM_ROTATE_180)
-			cntl |= CURSOR_ROTATE_180;
-	}
+	if (plane_state && plane_state->base.visible)
+		cntl = plane_state->ctl;
 
 	if (intel_crtc->cursor_cntl != cntl) {
 		I915_WRITE_FW(CURCNTR(pipe), cntl);
@@ -9491,10 +9492,10 @@ static int intel_modeset_setup_plane_state(struct drm_atomic_state *state,
 	return 0;
 }
 
-bool intel_get_load_detect_pipe(struct drm_connector *connector,
-				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old,
-				struct drm_modeset_acquire_ctx *ctx)
+int intel_get_load_detect_pipe(struct drm_connector *connector,
+			       struct drm_display_mode *mode,
+			       struct intel_load_detect_pipe *old,
+			       struct drm_modeset_acquire_ctx *ctx)
 {
 	struct intel_crtc *intel_crtc;
 	struct intel_encoder *intel_encoder =
@@ -9517,10 +9518,7 @@ bool intel_get_load_detect_pipe(struct drm_connector *connector,
 
 	old->restore_state = NULL;
 
-retry:
-	ret = drm_modeset_lock(&config->connection_mutex, ctx);
-	if (ret)
-		goto fail;
+	WARN_ON(!drm_modeset_is_locked(&config->connection_mutex));
 
 	/*
 	 * Algorithm gets a little messy:
@@ -9670,10 +9668,8 @@ fail:
 		restore_state = NULL;
 	}
 
-	if (ret == -EDEADLK) {
-		drm_modeset_backoff(ctx);
-		goto retry;
-	}
+	if (ret == -EDEADLK)
+		return ret;
 
 	return false;
 }
@@ -10354,7 +10350,7 @@ static void skl_do_mmio_flip(struct intel_crtc *intel_crtc,
 	ctl = I915_READ(PLANE_CTL(pipe, 0));
 	ctl &= ~PLANE_CTL_TILED_MASK;
 	switch (fb->modifier) {
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 		break;
 	case I915_FORMAT_MOD_X_TILED:
 		ctl |= PLANE_CTL_TILED_X;
@@ -10715,7 +10711,7 @@ out_hang:
 		state = drm_atomic_state_alloc(dev);
 		if (!state)
 			return -ENOMEM;
-		state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
+		state->acquire_ctx = dev->mode_config.acquire_ctx;
 
 retry:
 		plane_state = drm_atomic_get_plane_state(state, primary);
@@ -11005,7 +11001,6 @@ static int intel_crtc_atomic_check(struct drm_crtc *crtc,
 }
 
 static const struct drm_crtc_helper_funcs intel_helper_funcs = {
-	.mode_set_base_atomic = intel_pipe_set_base_atomic,
 	.atomic_begin = intel_begin_crtc_commit,
 	.atomic_flush = intel_finish_crtc_commit,
 	.atomic_check = intel_crtc_atomic_check,
@@ -11709,6 +11704,9 @@ intel_pipe_config_compare(struct drm_i915_private *dev_priv,
 	if ((INTEL_GEN(dev_priv) < 8 && !IS_HASWELL(dev_priv)) ||
 	    IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
 		PIPE_CONF_CHECK_I(limited_color_range);
+
+	PIPE_CONF_CHECK_I(hdmi_scrambling);
+	PIPE_CONF_CHECK_I(hdmi_high_tmds_clock_ratio);
 	PIPE_CONF_CHECK_I(has_infoframe);
 
 	PIPE_CONF_CHECK_I(has_audio);
@@ -13009,17 +13007,6 @@ static int intel_atomic_commit(struct drm_device *dev,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	int ret = 0;
 
-	/*
-	 * The intel_legacy_cursor_update() fast path takes care
-	 * of avoiding the vblank waits for simple cursor
-	 * movement and flips. For cursor on/off and size changes,
-	 * we want to perform the vblank waits so that watermark
-	 * updates happen during the correct frames. Gen9+ have
-	 * double buffered watermarks and so shouldn't need this.
-	 */
-	if (INTEL_GEN(dev_priv) < 9)
-		state->legacy_cursor_update = false;
-
 	ret = drm_atomic_helper_setup_commit(state, nonblock);
 	if (ret)
 		return ret;
@@ -13035,6 +13022,26 @@ static int intel_atomic_commit(struct drm_device *dev,
 		return ret;
 	}
 
+	/*
+	 * The intel_legacy_cursor_update() fast path takes care
+	 * of avoiding the vblank waits for simple cursor
+	 * movement and flips. For cursor on/off and size changes,
+	 * we want to perform the vblank waits so that watermark
+	 * updates happen during the correct frames. Gen9+ have
+	 * double buffered watermarks and so shouldn't need this.
+	 *
+	 * Do this after drm_atomic_helper_setup_commit() and
+	 * intel_atomic_prepare_commit() because we still want
+	 * to skip the flip and fb cleanup waits. Although that
+	 * does risk yanking the mapping from under the display
+	 * engine.
+	 *
+	 * FIXME doing watermarks and fb cleanup from a vblank worker
+	 * (assuming we had any) would solve these problems.
+	 */
+	if (INTEL_GEN(dev_priv) < 9)
+		state->legacy_cursor_update = false;
+
 	drm_atomic_helper_swap_state(state, true);
 	dev_priv->wm.distrust_bios_wm = false;
 	intel_shared_dpll_swap_state(state);
@@ -13075,7 +13082,7 @@ void intel_crtc_restore_mode(struct drm_crtc *crtc)
 		return;
 	}
 
-	state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
+	state->acquire_ctx = crtc->dev->mode_config.acquire_ctx;
 
 retry:
 	crtc_state = drm_atomic_get_crtc_state(state, crtc);
@@ -13098,50 +13105,8 @@ out:
 	drm_atomic_state_put(state);
 }
 
-/*
- * FIXME: Remove this once i915 is fully DRIVER_ATOMIC by calling
- *        drm_atomic_helper_legacy_gamma_set() directly.
- */
-static int intel_atomic_legacy_gamma_set(struct drm_crtc *crtc,
-					 u16 *red, u16 *green, u16 *blue,
-					 uint32_t size)
-{
-	struct drm_device *dev = crtc->dev;
-	struct drm_mode_config *config = &dev->mode_config;
-	struct drm_crtc_state *state;
-	int ret;
-
-	ret = drm_atomic_helper_legacy_gamma_set(crtc, red, green, blue, size);
-	if (ret)
-		return ret;
-
-	/*
-	 * Make sure we update the legacy properties so this works when
-	 * atomic is not enabled.
-	 */
-
-	state = crtc->state;
-
-	drm_object_property_set_value(&crtc->base,
-				      config->degamma_lut_property,
-				      (state->degamma_lut) ?
-				      state->degamma_lut->base.id : 0);
-
-	drm_object_property_set_value(&crtc->base,
-				      config->ctm_property,
-				      (state->ctm) ?
-				      state->ctm->base.id : 0);
-
-	drm_object_property_set_value(&crtc->base,
-				      config->gamma_lut_property,
-				      (state->gamma_lut) ?
-				      state->gamma_lut->base.id : 0);
-
-	return 0;
-}
-
 static const struct drm_crtc_funcs intel_crtc_funcs = {
-	.gamma_set = intel_atomic_legacy_gamma_set,
+	.gamma_set = drm_atomic_helper_legacy_gamma_set,
 	.set_config = drm_atomic_helper_set_config,
 	.set_property = drm_atomic_helper_crtc_set_property,
 	.destroy = intel_crtc_destroy,
@@ -13344,6 +13309,14 @@ intel_check_primary_plane(struct drm_plane *plane,
 		ret = skl_check_plane_surface(state);
 		if (ret)
 			return ret;
+
+		state->ctl = skl_plane_ctl(crtc_state, state);
+	} else {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = i9xx_plane_ctl(crtc_state, state);
 	}
 
 	return 0;
@@ -13603,12 +13576,6 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 
 		primary->update_plane = skylake_update_primary_plane;
 		primary->disable_plane = skylake_disable_primary_plane;
-	} else if (HAS_PCH_SPLIT(dev_priv)) {
-		intel_primary_formats = i965_primary_formats;
-		num_formats = ARRAY_SIZE(i965_primary_formats);
-
-		primary->update_plane = ironlake_update_primary_plane;
-		primary->disable_plane = i9xx_disable_primary_plane;
 	} else if (INTEL_GEN(dev_priv) >= 4) {
 		intel_primary_formats = i965_primary_formats;
 		num_formats = ARRAY_SIZE(i965_primary_formats);
@@ -13680,6 +13647,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 			 struct intel_crtc_state *crtc_state,
 			 struct intel_plane_state *state)
 {
+	struct drm_i915_private *dev_priv = to_i915(plane->dev);
 	struct drm_framebuffer *fb = state->base.fb;
 	struct drm_i915_gem_object *obj = intel_fb_obj(fb);
 	enum pipe pipe = to_intel_plane(plane)->pipe;
@@ -13699,7 +13667,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 		return 0;
 
 	/* Check for which cursor types we support */
-	if (!cursor_size_ok(to_i915(plane->dev), state->base.crtc_w,
+	if (!cursor_size_ok(dev_priv, state->base.crtc_w,
 			    state->base.crtc_h)) {
 		DRM_DEBUG("Cursor dimension %dx%d not supported\n",
 			  state->base.crtc_w, state->base.crtc_h);
@@ -13712,7 +13680,7 @@ intel_check_cursor_plane(struct drm_plane *plane,
 		return -ENOMEM;
 	}
 
-	if (fb->modifier != DRM_FORMAT_MOD_NONE) {
+	if (fb->modifier != DRM_FORMAT_MOD_LINEAR) {
 		DRM_DEBUG_KMS("cursor cannot be tiled\n");
 		return -EINVAL;
 	}
@@ -13727,12 +13695,17 @@ intel_check_cursor_plane(struct drm_plane *plane,
 	 * display power well must be turned off and on again.
 	 * Refuse the put the cursor into that compromised position.
 	 */
-	if (IS_CHERRYVIEW(to_i915(plane->dev)) && pipe == PIPE_C &&
+	if (IS_CHERRYVIEW(dev_priv) && pipe == PIPE_C &&
 	    state->base.visible && state->base.crtc_x < 0) {
 		DRM_DEBUG_KMS("CHV cursor C not allowed to straddle the left screen edge\n");
 		return -EINVAL;
 	}
 
+	if (IS_I845G(dev_priv) || IS_I865G(dev_priv))
+		state->ctl = i845_cursor_ctl(crtc_state, state);
+	else
+		state->ctl = i9xx_cursor_ctl(crtc_state, state);
+
 	return 0;
 }
 
@@ -14368,7 +14341,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 				      mode_cmd->modifier[0]);
 			goto err;
 		}
-	case DRM_FORMAT_MOD_NONE:
+	case DRM_FORMAT_MOD_LINEAR:
 	case I915_FORMAT_MOD_X_TILED:
 		break;
 	default:
@@ -14391,7 +14364,7 @@ static int intel_framebuffer_init(struct intel_framebuffer *intel_fb,
 					   mode_cmd->pixel_format);
 	if (mode_cmd->pitches[0] > pitch_limit) {
 		DRM_DEBUG_KMS("%s pitch (%u) must be at most %d\n",
-			      mode_cmd->modifier[0] != DRM_FORMAT_MOD_NONE ?
+			      mode_cmd->modifier[0] != DRM_FORMAT_MOD_LINEAR ?
 			      "tiled" : "linear",
 			      mode_cmd->pitches[0], pitch_limit);
 		goto err;
@@ -15084,6 +15057,7 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	struct drm_connector *crt = NULL;
 	struct intel_load_detect_pipe load_detect_temp;
 	struct drm_modeset_acquire_ctx *ctx = dev->mode_config.acquire_ctx;
+	int ret;
 
 	/* We can't just switch on the pipe A, we need to set things up with a
 	 * proper mode and output configuration. As a gross hack, enable pipe A
@@ -15100,7 +15074,10 @@ static void intel_enable_pipe_a(struct drm_device *dev)
 	if (!crt)
 		return;
 
-	if (intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx))
+	ret = intel_get_load_detect_pipe(crt, NULL, &load_detect_temp, ctx);
+	WARN(ret < 0, "All modeset mutexes are locked, but intel_get_load_detect_pipe failed\n");
+
+	if (ret > 0)
 		intel_release_load_detect_pipe(crt, &load_detect_temp, ctx);
 }
 
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index fd96a6cf7326..ee77b519835c 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -4566,7 +4566,7 @@ intel_dp_unset_edid(struct intel_dp *intel_dp)
 	intel_dp->has_audio = false;
 }
 
-static enum drm_connector_status
+static int
 intel_dp_long_pulse(struct intel_connector *intel_connector)
 {
 	struct drm_connector *connector = &intel_connector->base;
@@ -4577,6 +4577,8 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 	enum drm_connector_status status;
 	u8 sink_irq_vector = 0;
 
+	WARN_ON(!drm_modeset_is_locked(&connector->dev->mode_config.connection_mutex));
+
 	intel_display_power_get(to_i915(dev), intel_dp->aux_power_domain);
 
 	/* Can't disconnect eDP, but you can close the lid... */
@@ -4634,16 +4636,20 @@ intel_dp_long_pulse(struct intel_connector *intel_connector)
 		 */
 		status = connector_status_disconnected;
 		goto out;
-	} else if (connector->status == connector_status_connected) {
+	} else {
 		/*
-		 * If display was connected already and is still connected
-		 * check links status, there has been known issues of
-		 * link loss triggerring long pulse!!!!
+		 * If display is now connected check links status,
+		 * there has been known issues of link loss triggerring
+		 * long pulse.
+		 *
+		 * Some sinks (eg. ASUS PB287Q) seem to perform some
+		 * weird HPD ping pong during modesets. So we can apparently
+		 * end up with HPD going low during a modeset, and then
+		 * going back up soon after. And once that happens we must
+		 * retrain the link to get a picture. That's in case no
+		 * userspace component reacted to intermittent HPD dip.
 		 */
-		drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 		intel_dp_check_link_status(intel_dp);
-		drm_modeset_unlock(&dev->mode_config.connection_mutex);
-		goto out;
 	}
 
 	/*
@@ -4682,11 +4688,13 @@ out:
 	return status;
 }
 
-static enum drm_connector_status
-intel_dp_detect(struct drm_connector *connector, bool force)
+static int
+intel_dp_detect(struct drm_connector *connector,
+		struct drm_modeset_acquire_ctx *ctx,
+		bool force)
 {
 	struct intel_dp *intel_dp = intel_attached_dp(connector);
-	enum drm_connector_status status = connector->status;
+	int status = connector->status;
 
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n",
 		      connector->base.id, connector->name);
@@ -5014,7 +5022,6 @@ void intel_dp_encoder_reset(struct drm_encoder *encoder)
 
 static const struct drm_connector_funcs intel_dp_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.detect = intel_dp_detect,
 	.force = intel_dp_force,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = intel_dp_set_property,
@@ -5027,6 +5034,7 @@ static const struct drm_connector_funcs intel_dp_connector_funcs = {
 };
 
 static const struct drm_connector_helper_funcs intel_dp_connector_helper_funcs = {
+	.detect_ctx = intel_dp_detect,
 	.get_modes = intel_dp_get_modes,
 	.mode_valid = intel_dp_mode_valid,
 };
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 51228fe4283b..aaee3949a422 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -398,6 +398,9 @@ struct intel_plane_state {
 		int x, y;
 	} aux;
 
+	/* plane control register */
+	u32 ctl;
+
 	/*
 	 * scaler_id
 	 *    = -1 : not using a scaler
@@ -729,6 +732,12 @@ struct intel_crtc_state {
 
 	/* bitmask of visible planes (enum plane_id) */
 	u8 active_planes;
+
+	/* HDMI scrambling status */
+	bool hdmi_scrambling;
+
+	/* HDMI High TMDS char rate ratio */
+	bool hdmi_high_tmds_clock_ratio;
 };
 
 struct intel_crtc {
@@ -1220,12 +1229,9 @@ void intel_crt_init(struct drm_i915_private *dev_priv);
 void intel_crt_reset(struct drm_encoder *encoder);
 
 /* intel_ddi.c */
-void intel_ddi_clk_select(struct intel_encoder *encoder,
-			  struct intel_shared_dpll *pll);
 void intel_ddi_fdi_post_disable(struct intel_encoder *intel_encoder,
 				struct intel_crtc_state *old_crtc_state,
 				struct drm_connector_state *old_conn_state);
-void intel_prepare_dp_ddi_buffers(struct intel_encoder *encoder);
 void hsw_fdi_link_train(struct intel_crtc *crtc,
 			const struct intel_crtc_state *crtc_state);
 void intel_ddi_init(struct drm_i915_private *dev_priv, enum port port);
@@ -1236,8 +1242,8 @@ void intel_ddi_disable_transcoder_func(struct drm_i915_private *dev_priv,
 				       enum transcoder cpu_transcoder);
 void intel_ddi_enable_pipe_clock(const struct intel_crtc_state *crtc_state);
 void intel_ddi_disable_pipe_clock(const  struct intel_crtc_state *crtc_state);
-bool intel_ddi_pll_select(struct intel_crtc *crtc,
-			  struct intel_crtc_state *crtc_state);
+struct intel_encoder *
+intel_ddi_get_crtc_new_encoder(struct intel_crtc_state *crtc_state);
 void intel_ddi_set_pipe_settings(const struct intel_crtc_state *crtc_state);
 void intel_ddi_prepare_link_retrain(struct intel_dp *intel_dp);
 bool intel_ddi_connector_get_hw_state(struct intel_connector *intel_connector);
@@ -1246,7 +1252,6 @@ bool intel_ddi_is_audio_enabled(struct drm_i915_private *dev_priv,
 void intel_ddi_get_config(struct intel_encoder *encoder,
 			  struct intel_crtc_state *pipe_config);
 
-void intel_ddi_init_dp_buf_reg(struct intel_encoder *encoder);
 void intel_ddi_clock_get(struct intel_encoder *encoder,
 			 struct intel_crtc_state *pipe_config);
 void intel_ddi_set_vc_payload_alloc(const struct intel_crtc_state *crtc_state,
@@ -1353,10 +1358,10 @@ int ironlake_get_lanes_required(int target_clock, int link_bw, int bpp);
 void vlv_wait_port_ready(struct drm_i915_private *dev_priv,
 			 struct intel_digital_port *dport,
 			 unsigned int expected_mask);
-bool intel_get_load_detect_pipe(struct drm_connector *connector,
-				struct drm_display_mode *mode,
-				struct intel_load_detect_pipe *old,
-				struct drm_modeset_acquire_ctx *ctx);
+int intel_get_load_detect_pipe(struct drm_connector *connector,
+			       struct drm_display_mode *mode,
+			       struct intel_load_detect_pipe *old,
+			       struct drm_modeset_acquire_ctx *ctx);
 void intel_release_load_detect_pipe(struct drm_connector *connector,
 				    struct intel_load_detect_pipe *old,
 				    struct drm_modeset_acquire_ctx *ctx);
@@ -1445,12 +1450,12 @@ static inline u32 intel_plane_ggtt_offset(const struct intel_plane_state *state)
 	return i915_ggtt_offset(state->vma);
 }
 
-u32 skl_plane_ctl_format(uint32_t pixel_format);
-u32 skl_plane_ctl_tiling(uint64_t fb_modifier);
-u32 skl_plane_ctl_rotation(unsigned int rotation);
+u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
+		  const struct intel_plane_state *plane_state);
 u32 skl_plane_stride(const struct drm_framebuffer *fb, int plane,
 		     unsigned int rotation);
 int skl_check_plane_surface(struct intel_plane_state *plane_state);
+int i9xx_check_plane_surface(struct intel_plane_state *plane_state);
 
 /* intel_csr.c */
 void intel_csr_ucode_init(struct drm_i915_private *);
@@ -1620,6 +1625,10 @@ struct intel_hdmi *enc_to_intel_hdmi(struct drm_encoder *encoder);
 bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 			       struct intel_crtc_state *pipe_config,
 			       struct drm_connector_state *conn_state);
+void intel_hdmi_handle_sink_scrambling(struct intel_encoder *intel_encoder,
+				       struct drm_connector *connector,
+				       bool high_tmds_clock_ratio,
+				       bool scrambling);
 void intel_dp_dual_mode_set_tmds_output(struct intel_hdmi *hdmi, bool enable);
 
 
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 4200faa520c7..854e8e0c836b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -36,45 +36,45 @@ static const struct engine_info {
 	int (*init_execlists)(struct intel_engine_cs *engine);
 } intel_engines[] = {
 	[RCS] = {
-		.name = "render ring",
-		.exec_id = I915_EXEC_RENDER,
+		.name = "rcs",
 		.hw_id = RCS_HW,
+		.exec_id = I915_EXEC_RENDER,
 		.mmio_base = RENDER_RING_BASE,
 		.irq_shift = GEN8_RCS_IRQ_SHIFT,
 		.init_execlists = logical_render_ring_init,
 		.init_legacy = intel_init_render_ring_buffer,
 	},
 	[BCS] = {
-		.name = "blitter ring",
-		.exec_id = I915_EXEC_BLT,
+		.name = "bcs",
 		.hw_id = BCS_HW,
+		.exec_id = I915_EXEC_BLT,
 		.mmio_base = BLT_RING_BASE,
 		.irq_shift = GEN8_BCS_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_blt_ring_buffer,
 	},
 	[VCS] = {
-		.name = "bsd ring",
-		.exec_id = I915_EXEC_BSD,
+		.name = "vcs",
 		.hw_id = VCS_HW,
+		.exec_id = I915_EXEC_BSD,
 		.mmio_base = GEN6_BSD_RING_BASE,
 		.irq_shift = GEN8_VCS1_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_bsd_ring_buffer,
 	},
 	[VCS2] = {
-		.name = "bsd2 ring",
-		.exec_id = I915_EXEC_BSD,
+		.name = "vcs2",
 		.hw_id = VCS2_HW,
+		.exec_id = I915_EXEC_BSD,
 		.mmio_base = GEN8_BSD2_RING_BASE,
 		.irq_shift = GEN8_VCS2_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
 		.init_legacy = intel_init_bsd2_ring_buffer,
 	},
 	[VECS] = {
-		.name = "video enhancement ring",
-		.exec_id = I915_EXEC_VEBOX,
+		.name = "vecs",
 		.hw_id = VECS_HW,
+		.exec_id = I915_EXEC_VEBOX,
 		.mmio_base = VEBOX_RING_BASE,
 		.irq_shift = GEN8_VECS_IRQ_SHIFT,
 		.init_execlists = logical_xcs_ring_init,
@@ -242,12 +242,12 @@ void intel_engine_init_global_seqno(struct intel_engine_cs *engine, u32 seqno)
 		void *semaphores;
 
 		/* Semaphores are in noncoherent memory, flush to be safe */
-		semaphores = kmap(page);
+		semaphores = kmap_atomic(page);
 		memset(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 		       0, I915_NUM_ENGINES * gen8_semaphore_seqno_size);
 		drm_clflush_virt_range(semaphores + GEN8_SEMAPHORE_OFFSET(engine->id, 0),
 				       I915_NUM_ENGINES * gen8_semaphore_seqno_size);
-		kunmap(page);
+		kunmap_atomic(semaphores);
 	}
 
 	intel_write_status_page(engine, I915_GEM_HWS_INDEX, seqno);
@@ -1111,6 +1111,15 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	if (READ_ONCE(dev_priv->gt.active_requests))
+		return false;
+
+	/* If the driver is wedged, HW state may be very inconsistent and
+	 * report that it is still busy, even though we have stopped using it.
+	 */
+	if (i915_terminally_wedged(&dev_priv->gpu_error))
+		return true;
+
 	for_each_engine(engine, dev_priv, id) {
 		if (!intel_engine_is_idle(engine))
 			return false;
diff --git a/drivers/gpu/drm/i915/intel_guc_fwif.h b/drivers/gpu/drm/i915/intel_guc_fwif.h
index 25691f0e4c50..cb36cbf3818f 100644
--- a/drivers/gpu/drm/i915/intel_guc_fwif.h
+++ b/drivers/gpu/drm/i915/intel_guc_fwif.h
@@ -26,14 +26,14 @@
 #define GFXCORE_FAMILY_GEN9		12
 #define GFXCORE_FAMILY_UNKNOWN		0x7fffffff
 
-#define GUC_CTX_PRIORITY_KMD_HIGH	0
-#define GUC_CTX_PRIORITY_HIGH		1
-#define GUC_CTX_PRIORITY_KMD_NORMAL	2
-#define GUC_CTX_PRIORITY_NORMAL		3
-#define GUC_CTX_PRIORITY_NUM		4
+#define GUC_CLIENT_PRIORITY_KMD_HIGH	0
+#define GUC_CLIENT_PRIORITY_HIGH	1
+#define GUC_CLIENT_PRIORITY_KMD_NORMAL	2
+#define GUC_CLIENT_PRIORITY_NORMAL	3
+#define GUC_CLIENT_PRIORITY_NUM		4
 
-#define GUC_MAX_GPU_CONTEXTS		1024
-#define	GUC_INVALID_CTX_ID		GUC_MAX_GPU_CONTEXTS
+#define GUC_MAX_STAGE_DESCRIPTORS	1024
+#define	GUC_INVALID_STAGE_ID		GUC_MAX_STAGE_DESCRIPTORS
 
 #define GUC_RENDER_ENGINE		0
 #define GUC_VIDEO_ENGINE		1
@@ -68,14 +68,14 @@
 #define GUC_DOORBELL_ENABLED		1
 #define GUC_DOORBELL_DISABLED		0
 
-#define GUC_CTX_DESC_ATTR_ACTIVE	(1 << 0)
-#define GUC_CTX_DESC_ATTR_PENDING_DB	(1 << 1)
-#define GUC_CTX_DESC_ATTR_KERNEL	(1 << 2)
-#define GUC_CTX_DESC_ATTR_PREEMPT	(1 << 3)
-#define GUC_CTX_DESC_ATTR_RESET		(1 << 4)
-#define GUC_CTX_DESC_ATTR_WQLOCKED	(1 << 5)
-#define GUC_CTX_DESC_ATTR_PCH		(1 << 6)
-#define GUC_CTX_DESC_ATTR_TERMINATED	(1 << 7)
+#define GUC_STAGE_DESC_ATTR_ACTIVE	BIT(0)
+#define GUC_STAGE_DESC_ATTR_PENDING_DB	BIT(1)
+#define GUC_STAGE_DESC_ATTR_KERNEL	BIT(2)
+#define GUC_STAGE_DESC_ATTR_PREEMPT	BIT(3)
+#define GUC_STAGE_DESC_ATTR_RESET	BIT(4)
+#define GUC_STAGE_DESC_ATTR_WQLOCKED	BIT(5)
+#define GUC_STAGE_DESC_ATTR_PCH		BIT(6)
+#define GUC_STAGE_DESC_ATTR_TERMINATED	BIT(7)
 
 /* The guc control data is 10 DWORDs */
 #define GUC_CTL_CTXINFO			0
@@ -241,8 +241,8 @@ union guc_doorbell_qw {
 	u64 value_qw;
 } __packed;
 
-#define GUC_MAX_DOORBELLS		256
-#define GUC_INVALID_DOORBELL_ID		(GUC_MAX_DOORBELLS)
+#define GUC_NUM_DOORBELLS	256
+#define GUC_DOORBELL_INVALID	(GUC_NUM_DOORBELLS)
 
 #define GUC_DB_SIZE			(PAGE_SIZE)
 #define GUC_WQ_SIZE			(PAGE_SIZE * 2)
@@ -251,12 +251,12 @@ union guc_doorbell_qw {
 struct guc_wq_item {
 	u32 header;
 	u32 context_desc;
-	u32 ring_tail;
+	u32 submit_element_info;
 	u32 fence_id;
 } __packed;
 
 struct guc_process_desc {
-	u32 context_id;
+	u32 stage_id;
 	u64 db_base_addr;
 	u32 head;
 	u32 tail;
@@ -278,7 +278,7 @@ struct guc_execlist_context {
 	u32 context_desc;
 	u32 context_id;
 	u32 ring_status;
-	u32 ring_lcra;
+	u32 ring_lrca;
 	u32 ring_begin;
 	u32 ring_end;
 	u32 ring_next_free_location;
@@ -289,10 +289,18 @@ struct guc_execlist_context {
 	u16 engine_submit_queue_count;
 } __packed;
 
-/*Context descriptor for communicating between uKernel and Driver*/
-struct guc_context_desc {
+/*
+ * This structure describes a stage set arranged for a particular communication
+ * between uKernel (GuC) and Driver (KMD). Technically, this is known as a
+ * "GuC Context descriptor" in the specs, but we use the term "stage descriptor"
+ * to avoid confusion with all the other things already named "context" in the
+ * driver. A static pool of these descriptors are stored inside a GEM object
+ * (stage_desc_pool) which is held for the entire lifetime of our interaction
+ * with the GuC, being allocated before the GuC is loaded with its firmware.
+ */
+struct guc_stage_desc {
 	u32 sched_common_area;
-	u32 context_id;
+	u32 stage_id;
 	u32 pas_id;
 	u8 engines_used;
 	u64 db_trigger_cpu;
@@ -359,7 +367,7 @@ struct guc_policy {
 } __packed;
 
 struct guc_policies {
-	struct guc_policy policy[GUC_CTX_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
+	struct guc_policy policy[GUC_CLIENT_PRIORITY_NUM][GUC_MAX_ENGINES_NUM];
 
 	/* In micro seconds. How much time to allow before DPC processing is
 	 * called back via interrupt (to prevent DPC queue drain starving).
@@ -401,16 +409,17 @@ struct guc_mmio_regset {
 	u32 number_of_registers;
 } __packed;
 
+/* MMIO registers that are set as non privileged */
+struct mmio_white_list {
+	u32 mmio_start;
+	u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
+	u32 count;
+} __packed;
+
 struct guc_mmio_reg_state {
 	struct guc_mmio_regset global_reg;
 	struct guc_mmio_regset engine_reg[GUC_MAX_ENGINES_NUM];
-
-	/* MMIO registers that are set as non privileged */
-	struct __packed {
-		u32 mmio_start;
-		u32 offsets[GUC_MMIO_WHITE_LIST_MAX];
-		u32 count;
-	} mmio_white_list[GUC_MAX_ENGINES_NUM];
+	struct mmio_white_list white_list[GUC_MAX_ENGINES_NUM];
 } __packed;
 
 /* GuC Additional Data Struct */
diff --git a/drivers/gpu/drm/i915/intel_guc_loader.c b/drivers/gpu/drm/i915/intel_guc_loader.c
index 2f270d02894c..8a1a023e48b2 100644
--- a/drivers/gpu/drm/i915/intel_guc_loader.c
+++ b/drivers/gpu/drm/i915/intel_guc_loader.c
@@ -73,22 +73,6 @@ MODULE_FIRMWARE(I915_BXT_GUC_UCODE);
 #define I915_KBL_GUC_UCODE GUC_FW_PATH(kbl, KBL_FW_MAJOR, KBL_FW_MINOR)
 MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
-/* User-friendly representation of an enum */
-const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
-{
-	switch (status) {
-	case INTEL_UC_FIRMWARE_FAIL:
-		return "FAIL";
-	case INTEL_UC_FIRMWARE_NONE:
-		return "NONE";
-	case INTEL_UC_FIRMWARE_PENDING:
-		return "PENDING";
-	case INTEL_UC_FIRMWARE_SUCCESS:
-		return "SUCCESS";
-	default:
-		return "UNKNOWN!";
-	}
-};
 
 static u32 get_gttype(struct drm_i915_private *dev_priv)
 {
@@ -148,16 +132,14 @@ static void guc_params_init(struct drm_i915_private *dev_priv)
 	} else
 		params[GUC_CTL_DEBUG] = GUC_LOG_DISABLED;
 
-	if (guc->ads_vma) {
+	/* If GuC submission is enabled, set up additional parameters here */
+	if (i915.enable_guc_submission) {
 		u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
+		u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
+		u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
+
 		params[GUC_CTL_DEBUG] |= ads << GUC_ADS_ADDR_SHIFT;
 		params[GUC_CTL_DEBUG] |= GUC_ADS_ENABLED;
-	}
-
-	/* If GuC submission is enabled, set up additional parameters here */
-	if (i915.enable_guc_submission) {
-		u32 pgs = guc_ggtt_offset(dev_priv->guc.ctx_pool_vma);
-		u32 ctx_in_16 = GUC_MAX_GPU_CONTEXTS / 16;
 
 		pgs >>= PAGE_SHIFT;
 		params[GUC_CTL_CTXINFO] = (pgs << GUC_CTL_BASE_ADDR_SHIFT) |
@@ -430,24 +412,3 @@ int intel_guc_select_fw(struct intel_guc *guc)
 
 	return 0;
 }
-
-/**
- * intel_guc_fini() - clean up all allocated resources
- * @dev_priv:	i915 device private
- */
-void intel_guc_fini(struct drm_i915_private *dev_priv)
-{
-	struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
-	struct drm_i915_gem_object *obj;
-
-	mutex_lock(&dev_priv->drm.struct_mutex);
-	i915_guc_submission_disable(dev_priv);
-	i915_guc_submission_fini(dev_priv);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
-
-	obj = fetch_and_zero(&guc_fw->obj);
-	if (obj)
-		i915_gem_object_put(obj);
-
-	guc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
-}
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 5c0f9a49da0e..6fb63a3c65b0 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -66,7 +66,6 @@ static int guc_log_control(struct intel_guc *guc, u32 control_val)
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
 
-
 /*
  * Sub buffer switch callback. Called whenever relay has to switch to a new
  * sub buffer, relay stays on the same sub buffer if 0 is returned.
@@ -139,45 +138,15 @@ static struct rchan_callbacks relay_callbacks = {
 	.remove_buf_file = remove_buf_file_callback,
 };
 
-static void guc_log_remove_relay_file(struct intel_guc *guc)
-{
-	relay_close(guc->log.relay_chan);
-}
-
-static int guc_log_create_relay_channel(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct rchan *guc_log_relay_chan;
-	size_t n_subbufs, subbuf_size;
-
-	/* Keep the size of sub buffers same as shared log buffer */
-	subbuf_size = guc->log.vma->obj->base.size;
-
-	/* Store up to 8 snapshots, which is large enough to buffer sufficient
-	 * boot time logs and provides enough leeway to User, in terms of
-	 * latency, for consuming the logs from relay. Also doesn't take
-	 * up too much memory.
-	 */
-	n_subbufs = 8;
-
-	guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size,
-					n_subbufs, &relay_callbacks, dev_priv);
-	if (!guc_log_relay_chan) {
-		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
-		return -ENOMEM;
-	}
-
-	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
-	guc->log.relay_chan = guc_log_relay_chan;
-	return 0;
-}
-
-static int guc_log_create_relay_file(struct intel_guc *guc)
+static int guc_log_relay_file_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	struct dentry *log_dir;
 	int ret;
 
+	if (i915.guc_log_level < 0)
+		return 0;
+
 	/* For now create the log file in /sys/kernel/debug/dri/0 dir */
 	log_dir = dev_priv->drm.primary->debugfs_root;
 
@@ -197,8 +166,8 @@ static int guc_log_create_relay_file(struct intel_guc *guc)
 		return -ENODEV;
 	}
 
-	ret = relay_late_setup_files(guc->log.relay_chan, "guc_log", log_dir);
-	if (ret) {
+	ret = relay_late_setup_files(guc->log.runtime.relay_chan, "guc_log", log_dir);
+	if (ret < 0 && ret != -EEXIST) {
 		DRM_ERROR("Couldn't associate relay chan with file %d\n", ret);
 		return ret;
 	}
@@ -214,15 +183,15 @@ static void guc_move_to_next_buf(struct intel_guc *guc)
 	smp_wmb();
 
 	/* All data has been written, so now move the offset of sub buffer. */
-	relay_reserve(guc->log.relay_chan, guc->log.vma->obj->base.size);
+	relay_reserve(guc->log.runtime.relay_chan, guc->log.vma->obj->base.size);
 
 	/* Switch to the next sub buffer */
-	relay_flush(guc->log.relay_chan);
+	relay_flush(guc->log.runtime.relay_chan);
 }
 
 static void *guc_get_write_buffer(struct intel_guc *guc)
 {
-	if (!guc->log.relay_chan)
+	if (!guc->log.runtime.relay_chan)
 		return NULL;
 
 	/* Just get the base address of a new sub buffer and copy data into it
@@ -233,7 +202,7 @@ static void *guc_get_write_buffer(struct intel_guc *guc)
 	 * done without using relay_reserve() along with relay_write(). So its
 	 * better to use relay_reserve() alone.
 	 */
-	return relay_reserve(guc->log.relay_chan, 0);
+	return relay_reserve(guc->log.runtime.relay_chan, 0);
 }
 
 static bool guc_check_log_buf_overflow(struct intel_guc *guc,
@@ -284,11 +253,11 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
 	void *src_data, *dst_data;
 	bool new_overflow;
 
-	if (WARN_ON(!guc->log.buf_addr))
+	if (WARN_ON(!guc->log.runtime.buf_addr))
 		return;
 
 	/* Get the pointer to shared GuC log buffer */
-	log_buf_state = src_data = guc->log.buf_addr;
+	log_buf_state = src_data = guc->log.runtime.buf_addr;
 
 	/* Get the pointer to local buffer to store the logs */
 	log_buf_snapshot_state = dst_data = guc_get_write_buffer(guc);
@@ -371,153 +340,113 @@ static void guc_read_update_log_buffer(struct intel_guc *guc)
 	}
 }
 
-static void guc_log_cleanup(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-
-	lockdep_assert_held(&dev_priv->drm.struct_mutex);
-
-	/* First disable the flush interrupt */
-	gen9_disable_guc_interrupts(dev_priv);
-
-	if (guc->log.flush_wq)
-		destroy_workqueue(guc->log.flush_wq);
-
-	guc->log.flush_wq = NULL;
-
-	if (guc->log.relay_chan)
-		guc_log_remove_relay_file(guc);
-
-	guc->log.relay_chan = NULL;
-
-	if (guc->log.buf_addr)
-		i915_gem_object_unpin_map(guc->log.vma->obj);
-
-	guc->log.buf_addr = NULL;
-}
-
 static void capture_logs_work(struct work_struct *work)
 {
 	struct intel_guc *guc =
-		container_of(work, struct intel_guc, log.flush_work);
+		container_of(work, struct intel_guc, log.runtime.flush_work);
 
 	guc_log_capture_logs(guc);
 }
 
-static int guc_log_create_extras(struct intel_guc *guc)
+static bool guc_log_has_runtime(struct intel_guc *guc)
+{
+	return guc->log.runtime.buf_addr != NULL;
+}
+
+static int guc_log_runtime_create(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	void *vaddr;
-	int ret;
+	struct rchan *guc_log_relay_chan;
+	size_t n_subbufs, subbuf_size;
+	int ret = 0;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	/* Nothing to do */
-	if (i915.guc_log_level < 0)
-		return 0;
-
-	if (!guc->log.buf_addr) {
-		/* Create a WC (Uncached for read) vmalloc mapping of log
-		 * buffer pages, so that we can directly get the data
-		 * (up-to-date) from memory.
-		 */
-		vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
-		if (IS_ERR(vaddr)) {
-			ret = PTR_ERR(vaddr);
-			DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
-			return ret;
-		}
+	GEM_BUG_ON(guc_log_has_runtime(guc));
 
-		guc->log.buf_addr = vaddr;
+	/* Create a WC (Uncached for read) vmalloc mapping of log
+	 * buffer pages, so that we can directly get the data
+	 * (up-to-date) from memory.
+	 */
+	vaddr = i915_gem_object_pin_map(guc->log.vma->obj, I915_MAP_WC);
+	if (IS_ERR(vaddr)) {
+		DRM_ERROR("Couldn't map log buffer pages %d\n", ret);
+		return PTR_ERR(vaddr);
 	}
 
-	if (!guc->log.relay_chan) {
-		/* Create a relay channel, so that we have buffers for storing
-		 * the GuC firmware logs, the channel will be linked with a file
-		 * later on when debugfs is registered.
-		 */
-		ret = guc_log_create_relay_channel(guc);
-		if (ret)
-			return ret;
-	}
+	guc->log.runtime.buf_addr = vaddr;
 
-	if (!guc->log.flush_wq) {
-		INIT_WORK(&guc->log.flush_work, capture_logs_work);
-
-		 /*
-		 * GuC log buffer flush work item has to do register access to
-		 * send the ack to GuC and this work item, if not synced before
-		 * suspend, can potentially get executed after the GFX device is
-		 * suspended.
-		 * By marking the WQ as freezable, we don't have to bother about
-		 * flushing of this work item from the suspend hooks, the pending
-		 * work item if any will be either executed before the suspend
-		 * or scheduled later on resume. This way the handling of work
-		 * item can be kept same between system suspend & rpm suspend.
-		 */
-		guc->log.flush_wq = alloc_ordered_workqueue("i915-guc_log",
-							    WQ_HIGHPRI | WQ_FREEZABLE);
-		if (guc->log.flush_wq == NULL) {
-			DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
-			return -ENOMEM;
-		}
-	}
-
-	return 0;
-}
-
-void intel_guc_log_create(struct intel_guc *guc)
-{
-	struct i915_vma *vma;
-	unsigned long offset;
-	uint32_t size, flags;
+	 /* Keep the size of sub buffers same as shared log buffer */
+	subbuf_size = guc->log.vma->obj->base.size;
 
-	if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
-		i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
+	/* Store up to 8 snapshots, which is large enough to buffer sufficient
+	 * boot time logs and provides enough leeway to User, in terms of
+	 * latency, for consuming the logs from relay. Also doesn't take
+	 * up too much memory.
+	 */
+	n_subbufs = 8;
 
-	/* The first page is to save log buffer state. Allocate one
-	 * extra page for others in case for overlap */
-	size = (1 + GUC_LOG_DPC_PAGES + 1 +
-		GUC_LOG_ISR_PAGES + 1 +
-		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
+	/* Create a relay channel, so that we have buffers for storing
+	 * the GuC firmware logs, the channel will be linked with a file
+	 * later on when debugfs is registered.
+	 */
+	guc_log_relay_chan = relay_open(NULL, NULL, subbuf_size,
+					n_subbufs, &relay_callbacks, dev_priv);
+	if (!guc_log_relay_chan) {
+		DRM_ERROR("Couldn't create relay chan for GuC logging\n");
 
-	vma = guc->log.vma;
-	if (!vma) {
-		/* We require SSE 4.1 for fast reads from the GuC log buffer and
-		 * it should be present on the chipsets supporting GuC based
-		 * submisssions.
-		 */
-		if (WARN_ON(!i915_has_memcpy_from_wc())) {
-			/* logging will not be enabled */
-			i915.guc_log_level = -1;
-			return;
-		}
+		ret = -ENOMEM;
+		goto err_vaddr;
+	}
 
-		vma = intel_guc_allocate_vma(guc, size);
-		if (IS_ERR(vma)) {
-			/* logging will be off */
-			i915.guc_log_level = -1;
-			return;
-		}
+	GEM_BUG_ON(guc_log_relay_chan->subbuf_size < subbuf_size);
+	guc->log.runtime.relay_chan = guc_log_relay_chan;
+
+	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
+
+	/*
+	 * GuC log buffer flush work item has to do register access to
+	 * send the ack to GuC and this work item, if not synced before
+	 * suspend, can potentially get executed after the GFX device is
+	 * suspended.
+	 * By marking the WQ as freezable, we don't have to bother about
+	 * flushing of this work item from the suspend hooks, the pending
+	 * work item if any will be either executed before the suspend
+	 * or scheduled later on resume. This way the handling of work
+	 * item can be kept same between system suspend & rpm suspend.
+	 */
+	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
+						WQ_HIGHPRI | WQ_FREEZABLE);
+	if (!guc->log.runtime.flush_wq) {
+		DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
+		ret = -ENOMEM;
+		goto err_relaychan;
+	}
 
-		guc->log.vma = vma;
+	return 0;
 
-		if (guc_log_create_extras(guc)) {
-			guc_log_cleanup(guc);
-			i915_vma_unpin_and_release(&guc->log.vma);
-			i915.guc_log_level = -1;
-			return;
-		}
-	}
+err_relaychan:
+	relay_close(guc->log.runtime.relay_chan);
+err_vaddr:
+	i915_gem_object_unpin_map(guc->log.vma->obj);
+	guc->log.runtime.buf_addr = NULL;
+	return ret;
+}
 
-	/* each allocated unit is a page */
-	flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
-		(GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
-		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
-		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
+static void guc_log_runtime_destroy(struct intel_guc *guc)
+{
+	/*
+	 * It's possible that the runtime stuff was never allocated because
+	 * guc_log_level was < 0 at the time
+	 **/
+	if (!guc_log_has_runtime(guc))
+		return;
 
-	offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
-	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
+	destroy_workqueue(guc->log.runtime.flush_wq);
+	relay_close(guc->log.runtime.relay_chan);
+	i915_gem_object_unpin_map(guc->log.vma->obj);
+	guc->log.runtime.buf_addr = NULL;
 }
 
 static int guc_log_late_setup(struct intel_guc *guc)
@@ -527,24 +456,25 @@ static int guc_log_late_setup(struct intel_guc *guc)
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (i915.guc_log_level < 0)
-		return -EINVAL;
-
-	/* If log_level was set as -1 at boot time, then setup needed to
-	 * handle log buffer flush interrupts would not have been done yet,
-	 * so do that now.
-	 */
-	ret = guc_log_create_extras(guc);
-	if (ret)
-		goto err;
+	if (!guc_log_has_runtime(guc)) {
+		/* If log_level was set as -1 at boot time, then setup needed to
+		 * handle log buffer flush interrupts would not have been done yet,
+		 * so do that now.
+		 */
+		ret = guc_log_runtime_create(guc);
+		if (ret)
+			goto err;
+	}
 
-	ret = guc_log_create_relay_file(guc);
+	ret = guc_log_relay_file_create(guc);
 	if (ret)
-		goto err;
+		goto err_runtime;
 
 	return 0;
+
+err_runtime:
+	guc_log_runtime_destroy(guc);
 err:
-	guc_log_cleanup(guc);
 	/* logging will remain off */
 	i915.guc_log_level = -1;
 	return ret;
@@ -577,7 +507,7 @@ static void guc_flush_logs(struct intel_guc *guc)
 	/* Before initiating the forceful flush, wait for any pending/ongoing
 	 * flush to complete otherwise forceful flush may not actually happen.
 	 */
-	flush_work(&guc->log.flush_work);
+	flush_work(&guc->log.runtime.flush_work);
 
 	/* Ask GuC to update the log buffer state */
 	guc_log_flush(guc);
@@ -586,6 +516,72 @@ static void guc_flush_logs(struct intel_guc *guc)
 	guc_log_capture_logs(guc);
 }
 
+int intel_guc_log_create(struct intel_guc *guc)
+{
+	struct i915_vma *vma;
+	unsigned long offset;
+	uint32_t size, flags;
+	int ret;
+
+	GEM_BUG_ON(guc->log.vma);
+
+	if (i915.guc_log_level > GUC_LOG_VERBOSITY_MAX)
+		i915.guc_log_level = GUC_LOG_VERBOSITY_MAX;
+
+	/* The first page is to save log buffer state. Allocate one
+	 * extra page for others in case for overlap */
+	size = (1 + GUC_LOG_DPC_PAGES + 1 +
+		GUC_LOG_ISR_PAGES + 1 +
+		GUC_LOG_CRASH_PAGES + 1) << PAGE_SHIFT;
+
+	/* We require SSE 4.1 for fast reads from the GuC log buffer and
+	 * it should be present on the chipsets supporting GuC based
+	 * submisssions.
+	 */
+	if (WARN_ON(!i915_has_memcpy_from_wc())) {
+		ret = -EINVAL;
+		goto err;
+	}
+
+	vma = intel_guc_allocate_vma(guc, size);
+	if (IS_ERR(vma)) {
+		ret = PTR_ERR(vma);
+		goto err;
+	}
+
+	guc->log.vma = vma;
+
+	if (i915.guc_log_level >= 0) {
+		ret = guc_log_runtime_create(guc);
+		if (ret < 0)
+			goto err_vma;
+	}
+
+	/* each allocated unit is a page */
+	flags = GUC_LOG_VALID | GUC_LOG_NOTIFY_ON_HALF_FULL |
+		(GUC_LOG_DPC_PAGES << GUC_LOG_DPC_SHIFT) |
+		(GUC_LOG_ISR_PAGES << GUC_LOG_ISR_SHIFT) |
+		(GUC_LOG_CRASH_PAGES << GUC_LOG_CRASH_SHIFT);
+
+	offset = guc_ggtt_offset(vma) >> PAGE_SHIFT; /* in pages */
+	guc->log.flags = (offset << GUC_LOG_BUF_ADDR_SHIFT) | flags;
+
+	return 0;
+
+err_vma:
+	i915_vma_unpin_and_release(&guc->log.vma);
+err:
+	/* logging will be off */
+	i915.guc_log_level = -1;
+	return ret;
+}
+
+void intel_guc_log_destroy(struct intel_guc *guc)
+{
+	guc_log_runtime_destroy(guc);
+	i915_vma_unpin_and_release(&guc->log.vma);
+}
+
 int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 {
 	struct intel_guc *guc = &dev_priv->guc;
@@ -609,17 +605,22 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 		return ret;
 	}
 
-	i915.guc_log_level = log_param.verbosity;
+	if (log_param.logging_enabled) {
+		i915.guc_log_level = log_param.verbosity;
 
-	/* If log_level was set as -1 at boot time, then the relay channel file
-	 * wouldn't have been created by now and interrupts also would not have
-	 * been enabled.
-	 */
-	if (!dev_priv->guc.log.relay_chan) {
+		/* If log_level was set as -1 at boot time, then the relay channel file
+		 * wouldn't have been created by now and interrupts also would not have
+		 * been enabled. Try again now, just in case.
+		 */
 		ret = guc_log_late_setup(guc);
-		if (!ret)
-			gen9_enable_guc_interrupts(dev_priv);
-	} else if (!log_param.logging_enabled) {
+		if (ret < 0) {
+			DRM_DEBUG_DRIVER("GuC log late setup failed %d\n", ret);
+			return ret;
+		}
+
+		/* GuC logging is currently the only user of Guc2Host interrupts */
+		gen9_enable_guc_interrupts(dev_priv);
+	} else {
 		/* Once logging is disabled, GuC won't generate logs & send an
 		 * interrupt. But there could be some data in the log buffer
 		 * which is yet to be captured. So request GuC to update the log
@@ -629,9 +630,6 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 
 		/* As logging is disabled, update log level to reflect that */
 		i915.guc_log_level = -1;
-	} else {
-		/* In case interrupts were disabled, enable them now */
-		gen9_enable_guc_interrupts(dev_priv);
 	}
 
 	return ret;
@@ -639,7 +637,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 
 void i915_guc_log_register(struct drm_i915_private *dev_priv)
 {
-	if (!i915.enable_guc_submission)
+	if (!i915.enable_guc_submission || i915.guc_log_level < 0)
 		return;
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
@@ -653,6 +651,8 @@ void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 		return;
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
-	guc_log_cleanup(&dev_priv->guc);
+	/* GuC logging is currently the only user of Guc2Host interrupts */
+	gen9_disable_guc_interrupts(dev_priv);
+	guc_log_runtime_destroy(&dev_priv->guc);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 }
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 8c04eca84351..e1ab6432a914 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -45,6 +45,8 @@ static bool is_supported_device(struct drm_i915_private *dev_priv)
 		return true;
 	if (IS_SKYLAKE(dev_priv))
 		return true;
+	if (IS_KABYLAKE(dev_priv) && INTEL_DEVID(dev_priv) == 0x591D)
+		return true;
 	return false;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index 3eec74ca5116..1d623b5e09d6 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -34,6 +34,7 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_edid.h>
+#include <drm/drm_scdc_helper.h>
 #include "intel_drv.h"
 #include <drm/i915_drm.h>
 #include <drm/intel_lpe_audio.h>
@@ -1208,6 +1209,8 @@ static int intel_hdmi_source_max_tmds_clock(struct drm_i915_private *dev_priv)
 {
 	if (IS_G4X(dev_priv))
 		return 165000;
+	else if (IS_GEMINILAKE(dev_priv))
+		return 594000;
 	else if (IS_HASWELL(dev_priv) || INTEL_INFO(dev_priv)->gen >= 8)
 		return 300000;
 	else
@@ -1334,6 +1337,7 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
 	struct drm_i915_private *dev_priv = to_i915(encoder->base.dev);
 	struct drm_display_mode *adjusted_mode = &pipe_config->base.adjusted_mode;
+	struct drm_scdc *scdc = &conn_state->connector->display_info.hdmi.scdc;
 	int clock_8bpc = pipe_config->base.adjusted_mode.crtc_clock;
 	int clock_12bpc = clock_8bpc * 3 / 2;
 	int desired_bpp;
@@ -1403,6 +1407,16 @@ bool intel_hdmi_compute_config(struct intel_encoder *encoder,
 
 	pipe_config->lane_count = 4;
 
+	if (scdc->scrambling.supported && IS_GEMINILAKE(dev_priv)) {
+		if (scdc->scrambling.low_rates)
+			pipe_config->hdmi_scrambling = true;
+
+		if (pipe_config->port_clock > 340000) {
+			pipe_config->hdmi_scrambling = true;
+			pipe_config->hdmi_high_tmds_clock_ratio = true;
+		}
+	}
+
 	return true;
 }
 
@@ -1812,6 +1826,57 @@ intel_hdmi_add_properties(struct intel_hdmi *intel_hdmi, struct drm_connector *c
 	intel_hdmi->aspect_ratio = HDMI_PICTURE_ASPECT_NONE;
 }
 
+/*
+ * intel_hdmi_handle_sink_scrambling: handle sink scrambling/clock ratio setup
+ * @encoder: intel_encoder
+ * @connector: drm_connector
+ * @high_tmds_clock_ratio = bool to indicate if the function needs to set
+ *  or reset the high tmds clock ratio for scrambling
+ * @scrambling: bool to Indicate if the function needs to set or reset
+ *  sink scrambling
+ *
+ * This function handles scrambling on HDMI 2.0 capable sinks.
+ * If required clock rate is > 340 Mhz && scrambling is supported by sink
+ * it enables scrambling. This should be called before enabling the HDMI
+ * 2.0 port, as the sink can choose to disable the scrambling if it doesn't
+ * detect a scrambled clock within 100 ms.
+ */
+void intel_hdmi_handle_sink_scrambling(struct intel_encoder *encoder,
+				       struct drm_connector *connector,
+				       bool high_tmds_clock_ratio,
+				       bool scrambling)
+{
+	struct intel_hdmi *intel_hdmi = enc_to_intel_hdmi(&encoder->base);
+	struct drm_i915_private *dev_priv = connector->dev->dev_private;
+	struct drm_scrambling *sink_scrambling =
+				&connector->display_info.hdmi.scdc.scrambling;
+	struct i2c_adapter *adptr = intel_gmbus_get_adapter(dev_priv,
+							   intel_hdmi->ddc_bus);
+	bool ret;
+
+	if (!sink_scrambling->supported)
+		return;
+
+	DRM_DEBUG_KMS("Setting sink scrambling for enc:%s connector:%s\n",
+		      encoder->base.name, connector->name);
+
+	/* Set TMDS bit clock ratio to 1/40 or 1/10 */
+	ret = drm_scdc_set_high_tmds_clock_ratio(adptr, high_tmds_clock_ratio);
+	if (!ret) {
+		DRM_ERROR("Set TMDS ratio failed\n");
+		return;
+	}
+
+	/* Enable/disable sink scrambling */
+	ret = drm_scdc_set_scrambling(adptr, scrambling);
+	if (!ret) {
+		DRM_ERROR("Set sink scrambling failed\n");
+		return;
+	}
+
+	DRM_DEBUG_KMS("sink scrambling handled\n");
+}
+
 static u8 intel_hdmi_ddc_pin(struct drm_i915_private *dev_priv,
 			     enum port port)
 {
diff --git a/drivers/gpu/drm/i915/intel_hotplug.c b/drivers/gpu/drm/i915/intel_hotplug.c
index 7d210097eefa..f1200272a699 100644
--- a/drivers/gpu/drm/i915/intel_hotplug.c
+++ b/drivers/gpu/drm/i915/intel_hotplug.c
@@ -243,7 +243,8 @@ static bool intel_hpd_irq_event(struct drm_device *dev,
 	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 	old_status = connector->status;
 
-	connector->status = connector->funcs->detect(connector, false);
+	connector->status = drm_helper_probe_detect(connector, NULL, false);
+
 	if (old_status == connector->status)
 		return false;
 
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 7af900bcdc05..9ee819666a4c 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -251,24 +251,6 @@ fail:
 }
 
 /**
- * intel_huc_fini() - clean up resources allocated for HuC
- * @dev_priv: the drm_i915_private device
- *
- * Cleans up by releasing the huc firmware GEM obj.
- */
-void intel_huc_fini(struct drm_i915_private *dev_priv)
-{
-	struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
-	struct drm_i915_gem_object *obj;
-
-	obj = fetch_and_zero(&huc_fw->obj);
-	if (obj)
-		i915_gem_object_put(obj);
-
-	huc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
-}
-
-/**
  * intel_guc_auth_huc() - authenticate ucode
  * @dev_priv: the drm_i915_device
  *
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 7a5b41b1c024..25d8e76489e4 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -131,8 +131,15 @@ err:
 
 static void lpe_audio_platdev_destroy(struct drm_i915_private *dev_priv)
 {
+	/* XXX Note that platform_device_register_full() allocates a dma_mask
+	 * and never frees it. We can't free it here as we cannot guarantee
+	 * this is the last reference (i.e. that the dma_mask will not be
+	 * used after our unregister). So ee choose to leak the sizeof(u64)
+	 * allocation here - it should be fixed in the platform_device rather
+	 * than us fiddle with its internals.
+	 */
+
 	platform_device_unregister(dev_priv->lpe_audio.platdev);
-	kfree(dev_priv->lpe_audio.platdev->dev.dma_mask);
 }
 
 static void lpe_audio_irq_unmask(struct irq_data *d)
@@ -331,6 +338,7 @@ void intel_lpe_audio_teardown(struct drm_i915_private *dev_priv)
  * audio driver and i915
  * @dev_priv: the i915 drm device private data
  * @eld : ELD data
+ * @pipe: pipe id
  * @port: port id
  * @tmds_clk_speed: tmds clock frequency in Hz
  *
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 77168e673e0a..c8f7c631fc1f 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -326,7 +326,7 @@ static u64 execlists_update_context(struct drm_i915_gem_request *rq)
 		rq->ctx->ppgtt ?: rq->i915->mm.aliasing_ppgtt;
 	u32 *reg_state = ce->lrc_reg_state;
 
-	GEM_BUG_ON(!IS_ALIGNED(rq->tail, 8));
+	assert_ring_tail_valid(rq->ring, rq->tail);
 	reg_state[CTX_RING_TAIL+1] = rq->tail;
 
 	/* True 32b PPGTT with dynamic page allocation: update PDP
@@ -399,22 +399,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct drm_i915_gem_request *last;
 	struct execlist_port *port = engine->execlist_port;
-	unsigned long flags;
 	struct rb_node *rb;
 	bool submit = false;
 
-	/* After execlist_first is updated, the tasklet will be rescheduled.
-	 *
-	 * If we are currently running (inside the tasklet) and a third
-	 * party queues a request and so updates engine->execlist_first under
-	 * the spinlock (which we have elided), it will atomically set the
-	 * TASKLET_SCHED flag causing the us to be re-executed and pick up
-	 * the change in state (the update to TASKLET_SCHED incurs a memory
-	 * barrier making this cross-cpu checking safe).
-	 */
-	if (!READ_ONCE(engine->execlist_first))
-		return;
-
 	last = port->request;
 	if (last)
 		/* WaIdleLiteRestore:bdw,skl
@@ -448,7 +435,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	 * and context switches) submission.
 	 */
 
-	spin_lock_irqsave(&engine->timeline->lock, flags);
+	spin_lock_irq(&engine->timeline->lock);
 	rb = engine->execlist_first;
 	while (rb) {
 		struct drm_i915_gem_request *cursor =
@@ -500,7 +487,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		i915_gem_request_assign(&port->request, last);
 		engine->execlist_first = rb;
 	}
-	spin_unlock_irqrestore(&engine->timeline->lock, flags);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	if (submit)
 		execlists_submit_ports(engine);
@@ -530,24 +517,36 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 	intel_uncore_forcewake_get(dev_priv, engine->fw_domains);
 
-	while (test_and_clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
+	/* Prefer doing test_and_clear_bit() as a two stage operation to avoid
+	 * imposing the cost of a locked atomic transaction when submitting a
+	 * new request (outside of the context-switch interrupt).
+	 */
+	while (test_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted)) {
 		u32 __iomem *csb_mmio =
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine));
 		u32 __iomem *buf =
 			dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_BUF_LO(engine, 0));
-		unsigned int csb, head, tail;
-
-		csb = readl(csb_mmio);
-		head = GEN8_CSB_READ_PTR(csb);
-		tail = GEN8_CSB_WRITE_PTR(csb);
-		if (head == tail)
-			break;
+		unsigned int head, tail;
+
+		/* The write will be ordered by the uncached read (itself
+		 * a memory barrier), so we do not need another in the form
+		 * of a locked instruction. The race between the interrupt
+		 * handler and the split test/clear is harmless as we order
+		 * our clear before the CSB read. If the interrupt arrived
+		 * first between the test and the clear, we read the updated
+		 * CSB and clear the bit. If the interrupt arrives as we read
+		 * the CSB or later (i.e. after we had cleared the bit) the bit
+		 * is set and we do a new loop.
+		 */
+		__clear_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+		head = readl(csb_mmio);
+		tail = GEN8_CSB_WRITE_PTR(head);
+		head = GEN8_CSB_READ_PTR(head);
+		while (head != tail) {
+			unsigned int status;
 
-		if (tail < head)
-			tail += GEN8_CSB_ENTRIES;
-		do {
-			unsigned int idx = ++head % GEN8_CSB_ENTRIES;
-			unsigned int status = readl(buf + 2 * idx);
+			if (++head == GEN8_CSB_ENTRIES)
+				head = 0;
 
 			/* We are flying near dragons again.
 			 *
@@ -566,11 +565,12 @@ static void intel_lrc_irq_handler(unsigned long data)
 			 * status notifier.
 			 */
 
+			status = readl(buf + 2 * head);
 			if (!(status & GEN8_CTX_STATUS_COMPLETED_MASK))
 				continue;
 
 			/* Check the context/desc id for this event matches */
-			GEM_DEBUG_BUG_ON(readl(buf + 2 * idx + 1) !=
+			GEM_DEBUG_BUG_ON(readl(buf + 2 * head + 1) !=
 					 port[0].context_id);
 
 			GEM_BUG_ON(port[0].count == 0);
@@ -588,10 +588,9 @@ static void intel_lrc_irq_handler(unsigned long data)
 
 			GEM_BUG_ON(port[0].count == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-		} while (head < tail);
+		}
 
-		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK,
-				     GEN8_CSB_WRITE_PTR(csb) << 8),
+		writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
 		       csb_mmio);
 	}
 
@@ -647,15 +646,14 @@ static void execlists_submit_request(struct drm_i915_gem_request *request)
 static struct intel_engine_cs *
 pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 {
-	struct intel_engine_cs *engine;
+	struct intel_engine_cs *engine =
+		container_of(pt, struct drm_i915_gem_request, priotree)->engine;
+
+	GEM_BUG_ON(!locked);
 
-	engine = container_of(pt,
-			      struct drm_i915_gem_request,
-			      priotree)->engine;
 	if (engine != locked) {
-		if (locked)
-			spin_unlock_irq(&locked->timeline->lock);
-		spin_lock_irq(&engine->timeline->lock);
+		spin_unlock(&locked->timeline->lock);
+		spin_lock(&engine->timeline->lock);
 	}
 
 	return engine;
@@ -663,7 +661,7 @@ pt_lock_engine(struct i915_priotree *pt, struct intel_engine_cs *locked)
 
 static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 {
-	struct intel_engine_cs *engine = NULL;
+	struct intel_engine_cs *engine;
 	struct i915_dependency *dep, *p;
 	struct i915_dependency stack;
 	LIST_HEAD(dfs);
@@ -697,26 +695,23 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 	list_for_each_entry_safe(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
 
-		list_for_each_entry(p, &pt->signalers_list, signal_link)
+		/* Within an engine, there can be no cycle, but we may
+		 * refer to the same dependency chain multiple times
+		 * (redundant dependencies are not eliminated) and across
+		 * engines.
+		 */
+		list_for_each_entry(p, &pt->signalers_list, signal_link) {
+			GEM_BUG_ON(p->signaler->priority < pt->priority);
 			if (prio > READ_ONCE(p->signaler->priority))
 				list_move_tail(&p->dfs_link, &dfs);
+		}
 
 		list_safe_reset_next(dep, p, dfs_link);
-		if (!RB_EMPTY_NODE(&pt->node))
-			continue;
-
-		engine = pt_lock_engine(pt, engine);
-
-		/* If it is not already in the rbtree, we can update the
-		 * priority inplace and skip over it (and its dependencies)
-		 * if it is referenced *again* as we descend the dfs.
-		 */
-		if (prio > pt->priority && RB_EMPTY_NODE(&pt->node)) {
-			pt->priority = prio;
-			list_del_init(&dep->dfs_link);
-		}
 	}
 
+	engine = request->engine;
+	spin_lock_irq(&engine->timeline->lock);
+
 	/* Fifo and depth-first replacement ensure our deps execute before us */
 	list_for_each_entry_safe_reverse(dep, p, &dfs, dfs_link) {
 		struct i915_priotree *pt = dep->signaler;
@@ -728,16 +723,15 @@ static void execlists_schedule(struct drm_i915_gem_request *request, int prio)
 		if (prio <= pt->priority)
 			continue;
 
-		GEM_BUG_ON(RB_EMPTY_NODE(&pt->node));
-
 		pt->priority = prio;
-		rb_erase(&pt->node, &engine->execlist_queue);
-		if (insert_request(pt, &engine->execlist_queue))
-			engine->execlist_first = &pt->node;
+		if (!RB_EMPTY_NODE(&pt->node)) {
+			rb_erase(&pt->node, &engine->execlist_queue);
+			if (insert_request(pt, &engine->execlist_queue))
+				engine->execlist_first = &pt->node;
+		}
 	}
 
-	if (engine)
-		spin_unlock_irq(&engine->timeline->lock);
+	spin_unlock_irq(&engine->timeline->lock);
 
 	/* XXX Do we need to preempt to make room for us and our deps? */
 }
@@ -1255,7 +1249,6 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	ce->lrc_reg_state[CTX_RING_HEAD+1] = request->postfix;
 
 	request->ring->head = request->postfix;
-	request->ring->last_retired_head = -1;
 	intel_ring_update_space(request->ring);
 
 	/* Catch up with any missed context-switch interrupts */
@@ -1268,8 +1261,10 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	GEM_BUG_ON(request->ctx != port[0].request->ctx);
 
 	/* Reset WaIdleLiteRestore:bdw,skl as well */
-	request->tail = request->wa_tail - WA_TAIL_DWORDS * sizeof(u32);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	request->tail =
+		intel_ring_wrap(request->ring,
+				request->wa_tail - WA_TAIL_DWORDS*sizeof(u32));
+	assert_ring_tail_valid(request->ring, request->tail);
 }
 
 static int intel_logical_ring_emit_pdps(struct drm_i915_gem_request *req)
@@ -1480,7 +1475,7 @@ static void gen8_emit_breadcrumb(struct drm_i915_gem_request *request, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);
 
 	gen8_emit_wa_tail(request, cs);
 }
@@ -1508,7 +1503,7 @@ static void gen8_emit_breadcrumb_render(struct drm_i915_gem_request *request,
 	*cs++ = MI_USER_INTERRUPT;
 	*cs++ = MI_NOOP;
 	request->tail = intel_ring_offset(request, cs);
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);
 
 	gen8_emit_wa_tail(request, cs);
 }
@@ -1575,6 +1570,7 @@ static void execlists_set_default_submission(struct intel_engine_cs *engine)
 {
 	engine->submit_request = execlists_submit_request;
 	engine->schedule = execlists_schedule;
+	engine->irq_tasklet.func = intel_lrc_irq_handler;
 }
 
 static void
@@ -2041,7 +2037,6 @@ void intel_lr_context_resume(struct drm_i915_private *dev_priv)
 			i915_gem_object_unpin_map(ce->state->obj);
 
 			ce->ring->head = ce->ring->tail = 0;
-			ce->ring->last_retired_head = -1;
 			intel_ring_update_space(ce->ring);
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 441c01466384..d44465190dc1 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -920,6 +920,8 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	char buf[sizeof(OPREGION_SIGNATURE)];
 	int err = 0;
 	void *base;
+	const void *vbt;
+	u32 vbt_size;
 
 	BUILD_BUG_ON(sizeof(struct opregion_header) != 0x100);
 	BUILD_BUG_ON(sizeof(struct opregion_acpi) != 0x100);
@@ -972,45 +974,46 @@ int intel_opregion_setup(struct drm_i915_private *dev_priv)
 	if (mboxes & MBOX_ASLE_EXT)
 		DRM_DEBUG_DRIVER("ASLE extension supported\n");
 
-	if (!dmi_check_system(intel_no_opregion_vbt)) {
-		const void *vbt = NULL;
-		u32 vbt_size = 0;
-
-		if (opregion->header->opregion_ver >= 2 && opregion->asle &&
-		    opregion->asle->rvda && opregion->asle->rvds) {
-			opregion->rvda = memremap(opregion->asle->rvda,
-						  opregion->asle->rvds,
-						  MEMREMAP_WB);
-			vbt = opregion->rvda;
-			vbt_size = opregion->asle->rvds;
-		}
+	if (dmi_check_system(intel_no_opregion_vbt))
+		goto out;
 
+	if (opregion->header->opregion_ver >= 2 && opregion->asle &&
+	    opregion->asle->rvda && opregion->asle->rvds) {
+		opregion->rvda = memremap(opregion->asle->rvda,
+					  opregion->asle->rvds,
+					  MEMREMAP_WB);
+		vbt = opregion->rvda;
+		vbt_size = opregion->asle->rvds;
 		if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
 			DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (RVDA)\n");
 			opregion->vbt = vbt;
 			opregion->vbt_size = vbt_size;
+			goto out;
 		} else {
-			vbt = base + OPREGION_VBT_OFFSET;
-			/*
-			 * The VBT specification says that if the ASLE ext
-			 * mailbox is not used its area is reserved, but
-			 * on some CHT boards the VBT extends into the
-			 * ASLE ext area. Allow this even though it is
-			 * against the spec, so we do not end up rejecting
-			 * the VBT on those boards (and end up not finding the
-			 * LCD panel because of this).
-			 */
-			vbt_size = (mboxes & MBOX_ASLE_EXT) ?
-				OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
-			vbt_size -= OPREGION_VBT_OFFSET;
-			if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
-				DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
-				opregion->vbt = vbt;
-				opregion->vbt_size = vbt_size;
-			}
+			DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (RVDA)\n");
 		}
 	}
 
+	vbt = base + OPREGION_VBT_OFFSET;
+	/*
+	 * The VBT specification says that if the ASLE ext mailbox is not used
+	 * its area is reserved, but on some CHT boards the VBT extends into the
+	 * ASLE ext area. Allow this even though it is against the spec, so we
+	 * do not end up rejecting the VBT on those boards (and end up not
+	 * finding the LCD panel because of this).
+	 */
+	vbt_size = (mboxes & MBOX_ASLE_EXT) ?
+		OPREGION_ASLE_EXT_OFFSET : OPREGION_SIZE;
+	vbt_size -= OPREGION_VBT_OFFSET;
+	if (intel_bios_is_valid_vbt(vbt, vbt_size)) {
+		DRM_DEBUG_KMS("Found valid VBT in ACPI OpRegion (Mailbox #4)\n");
+		opregion->vbt = vbt;
+		opregion->vbt_size = vbt_size;
+	} else {
+		DRM_DEBUG_KMS("Invalid VBT in ACPI OpRegion (Mailbox #4)\n");
+	}
+
+out:
 	return 0;
 
 err_out:
diff --git a/drivers/gpu/drm/i915/intel_pipe_crc.c b/drivers/gpu/drm/i915/intel_pipe_crc.c
index 9fd9c70baeed..206ee4f0150e 100644
--- a/drivers/gpu/drm/i915/intel_pipe_crc.c
+++ b/drivers/gpu/drm/i915/intel_pipe_crc.c
@@ -522,7 +522,7 @@ static void hsw_trans_edp_pipe_A_crc_wa(struct drm_i915_private *dev_priv,
 		goto unlock;
 	}
 
-	state->acquire_ctx = drm_modeset_legacy_acquire_ctx(&crtc->base);
+	state->acquire_ctx = crtc->base.dev->mode_config.acquire_ctx;
 	pipe_config = intel_atomic_get_crtc_state(state, crtc);
 	if (IS_ERR(pipe_config)) {
 		ret = PTR_ERR(pipe_config);
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index aece0ff88a5d..570bd603f401 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -655,6 +655,29 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 	return wm_size;
 }
 
+static bool intel_wm_plane_visible(const struct intel_crtc_state *crtc_state,
+				   const struct intel_plane_state *plane_state)
+{
+	struct intel_plane *plane = to_intel_plane(plane_state->base.plane);
+
+	/* FIXME check the 'enable' instead */
+	if (!crtc_state->base.active)
+		return false;
+
+	/*
+	 * Treat cursor with fb as always visible since cursor updates
+	 * can happen faster than the vrefresh rate, and the current
+	 * watermark code doesn't handle that correctly. Cursor updates
+	 * which set/clear the fb or change the cursor size are going
+	 * to get throttled by intel_legacy_cursor_update() to work
+	 * around this problem with the watermark code.
+	 */
+	if (plane->id == PLANE_CURSOR)
+		return plane_state->base.fb != NULL;
+	else
+		return plane_state->base.visible;
+}
+
 static struct intel_crtc *single_enabled_crtc(struct drm_i915_private *dev_priv)
 {
 	struct intel_crtc *crtc, *enabled = NULL;
@@ -1961,7 +1984,7 @@ static uint32_t ilk_compute_pri_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;
 
-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
 	cpp = pstate->base.fb->format->cpp[0];
@@ -1990,7 +2013,7 @@ static uint32_t ilk_compute_spr_wm(const struct intel_crtc_state *cstate,
 	uint32_t method1, method2;
 	int cpp;
 
-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
 	cpp = pstate->base.fb->format->cpp[0];
@@ -2013,15 +2036,7 @@ static uint32_t ilk_compute_cur_wm(const struct intel_crtc_state *cstate,
 {
 	int cpp;
 
-	/*
-	 * Treat cursor with fb as always visible since cursor updates
-	 * can happen faster than the vrefresh rate, and the current
-	 * watermark code doesn't handle that correctly. Cursor updates
-	 * which set/clear the fb or change the cursor size are going
-	 * to get throttled by intel_legacy_cursor_update() to work
-	 * around this problem with the watermark code.
-	 */
-	if (!cstate->base.active || !pstate->base.fb)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
 	cpp = pstate->base.fb->format->cpp[0];
@@ -2038,7 +2053,7 @@ static uint32_t ilk_compute_fbc_wm(const struct intel_crtc_state *cstate,
 {
 	int cpp;
 
-	if (!cstate->base.active || !pstate->base.visible)
+	if (!intel_wm_plane_visible(cstate, pstate))
 		return 0;
 
 	cpp = pstate->base.fb->format->cpp[0];
@@ -3346,19 +3361,29 @@ void skl_ddb_get_hw_state(struct drm_i915_private *dev_priv,
  * Caller should take care of dividing & rounding off the value.
  */
 static uint32_t
-skl_plane_downscale_amount(const struct intel_plane_state *pstate)
+skl_plane_downscale_amount(const struct intel_crtc_state *cstate,
+			   const struct intel_plane_state *pstate)
 {
+	struct intel_plane *plane = to_intel_plane(pstate->base.plane);
 	uint32_t downscale_h, downscale_w;
 	uint32_t src_w, src_h, dst_w, dst_h;
 
-	if (WARN_ON(!pstate->base.visible))
+	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
 		return DRM_PLANE_HELPER_NO_SCALING;
 
 	/* n.b., src is 16.16 fixed point, dst is whole integer */
-	src_w = drm_rect_width(&pstate->base.src);
-	src_h = drm_rect_height(&pstate->base.src);
-	dst_w = drm_rect_width(&pstate->base.dst);
-	dst_h = drm_rect_height(&pstate->base.dst);
+	if (plane->id == PLANE_CURSOR) {
+		src_w = pstate->base.src_w;
+		src_h = pstate->base.src_h;
+		dst_w = pstate->base.crtc_w;
+		dst_h = pstate->base.crtc_h;
+	} else {
+		src_w = drm_rect_width(&pstate->base.src);
+		src_h = drm_rect_height(&pstate->base.src);
+		dst_w = drm_rect_width(&pstate->base.dst);
+		dst_h = drm_rect_height(&pstate->base.dst);
+	}
+
 	if (drm_rotation_90_or_270(pstate->base.rotation))
 		swap(dst_w, dst_h);
 
@@ -3374,6 +3399,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 			     const struct drm_plane_state *pstate,
 			     int y)
 {
+	struct intel_plane *plane = to_intel_plane(pstate->plane);
 	struct intel_plane_state *intel_pstate = to_intel_plane_state(pstate);
 	uint32_t down_scale_amount, data_rate;
 	uint32_t width = 0, height = 0;
@@ -3386,7 +3412,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 	fb = pstate->fb;
 	format = fb->format->format;
 
-	if (pstate->plane->type == DRM_PLANE_TYPE_CURSOR)
+	if (plane->id == PLANE_CURSOR)
 		return 0;
 	if (y && format != DRM_FORMAT_NV12)
 		return 0;
@@ -3410,7 +3436,7 @@ skl_plane_relative_data_rate(const struct intel_crtc_state *cstate,
 		data_rate = width * height * fb->format->cpp[0];
 	}
 
-	down_scale_amount = skl_plane_downscale_amount(intel_pstate);
+	down_scale_amount = skl_plane_downscale_amount(cstate, intel_pstate);
 
 	return (uint64_t)data_rate * down_scale_amount >> 16;
 }
@@ -3702,7 +3728,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
 	uint64_t pixel_rate;
 
 	/* Shouldn't reach here on disabled planes... */
-	if (WARN_ON(!pstate->base.visible))
+	if (WARN_ON(!intel_wm_plane_visible(cstate, pstate)))
 		return 0;
 
 	/*
@@ -3710,7 +3736,7 @@ static uint32_t skl_adjusted_plane_pixel_rate(const struct intel_crtc_state *cst
 	 * with additional adjustments for plane-specific scaling.
 	 */
 	adjusted_pixel_rate = cstate->pixel_rate;
-	downscale_amount = skl_plane_downscale_amount(pstate);
+	downscale_amount = skl_plane_downscale_amount(cstate, pstate);
 
 	pixel_rate = adjusted_pixel_rate * downscale_amount >> 16;
 	WARN_ON(pixel_rate != clamp_t(uint32_t, pixel_rate, 0, ~0));
@@ -3727,6 +3753,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 				uint8_t *out_lines, /* out */
 				bool *enabled /* out */)
 {
+	struct intel_plane *plane = to_intel_plane(intel_pstate->base.plane);
 	struct drm_plane_state *pstate = &intel_pstate->base;
 	struct drm_framebuffer *fb = pstate->fb;
 	uint32_t latency = dev_priv->wm.skl_latency[level];
@@ -3746,7 +3773,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	bool apply_memory_bw_wa = skl_needs_memory_bw_wa(state);
 	bool y_tiled, x_tiled;
 
-	if (latency == 0 || !cstate->base.active || !intel_pstate->base.visible) {
+	if (latency == 0 ||
+	    !intel_wm_plane_visible(cstate, intel_pstate)) {
 		*enabled = false;
 		return 0;
 	}
@@ -3762,8 +3790,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv,
 	if (apply_memory_bw_wa && x_tiled)
 		latency += 15;
 
-	width = drm_rect_width(&intel_pstate->base.src) >> 16;
-	height = drm_rect_height(&intel_pstate->base.src) >> 16;
+	if (plane->id == PLANE_CURSOR) {
+		width = intel_pstate->base.crtc_w;
+		height = intel_pstate->base.crtc_h;
+	} else {
+		width = drm_rect_width(&intel_pstate->base.src) >> 16;
+		height = drm_rect_height(&intel_pstate->base.src) >> 16;
+	}
 
 	if (drm_rotation_90_or_270(pstate->rotation))
 		swap(width, height);
@@ -8055,7 +8088,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv)
 	case GEN6_PCODE_TIMEOUT:
 		return -ETIMEDOUT;
 	default:
-		MISSING_CASE(flags)
+		MISSING_CASE(flags);
 		return 0;
 	}
 }
@@ -8355,6 +8388,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 			     const i915_reg_t reg)
 {
 	u32 lower, upper, tmp;
+	int loop = 2;
 
 	/* The register accessed do not need forcewake. We borrow
 	 * uncore lock to prevent concurrent access to range reg.
@@ -8383,7 +8417,7 @@ static u64 vlv_residency_raw(struct drm_i915_private *dev_priv,
 		I915_WRITE_FW(VLV_COUNTER_CONTROL,
 			      _MASKED_BIT_ENABLE(VLV_COUNT_RANGE_HIGH));
 		upper = I915_READ_FW(reg);
-	} while (upper != tmp);
+	} while (upper != tmp && --loop);
 
 	/* Everywhere else we always use VLV_COUNTER_CONTROL with the
 	 * VLV_COUNT_RANGE_HIGH bit set - so it is safe to leave it set
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index d9b8d17c3fc6..66a2b8b83972 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -49,13 +49,7 @@ static int __intel_ring_space(int head, int tail, int size)
 
 void intel_ring_update_space(struct intel_ring *ring)
 {
-	if (ring->last_retired_head != -1) {
-		ring->head = ring->last_retired_head;
-		ring->last_retired_head = -1;
-	}
-
-	ring->space = __intel_ring_space(ring->head & HEAD_ADDR,
-					 ring->tail, ring->size);
+	ring->space = __intel_ring_space(ring->head, ring->tail, ring->size);
 }
 
 static int
@@ -618,12 +612,8 @@ static void reset_ring_common(struct intel_engine_cs *engine,
 		}
 
 		/* If the rq hung, jump to its breadcrumb and skip the batch */
-		if (request->fence.error == -EIO) {
-			struct intel_ring *ring = request->ring;
-
-			ring->head = request->postfix;
-			ring->last_retired_head = -1;
-		}
+		if (request->fence.error == -EIO)
+			request->ring->head = request->postfix;
 	} else {
 		engine->legacy_active_context = NULL;
 	}
@@ -784,7 +774,7 @@ static void i9xx_submit_request(struct drm_i915_gem_request *request)
 
 	i915_gem_request_submit(request);
 
-	GEM_BUG_ON(!IS_ALIGNED(request->tail, 8));
+	assert_ring_tail_valid(request->ring, request->tail);
 	I915_WRITE_TAIL(request->engine, request->tail);
 }
 
@@ -796,7 +786,7 @@ static void i9xx_emit_breadcrumb(struct drm_i915_gem_request *req, u32 *cs)
 	*cs++ = MI_USER_INTERRUPT;
 
 	req->tail = intel_ring_offset(req, cs);
-	GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
+	assert_ring_tail_valid(req->ring, req->tail);
 }
 
 static const int i9xx_emit_breadcrumb_sz = 4;
@@ -835,7 +825,7 @@ static void gen8_render_emit_breadcrumb(struct drm_i915_gem_request *req,
 	*cs++ = MI_NOOP;
 
 	req->tail = intel_ring_offset(req, cs);
-	GEM_BUG_ON(!IS_ALIGNED(req->tail, 8));
+	assert_ring_tail_valid(req->ring, req->tail);
 }
 
 static const int gen8_render_emit_breadcrumb_sz = 8;
@@ -1392,7 +1382,6 @@ intel_engine_create_ring(struct intel_engine_cs *engine, int size)
 	if (IS_I830(engine->i915) || IS_I845G(engine->i915))
 		ring->effective_size -= 2 * CACHELINE_BYTES;
 
-	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);
 
 	vma = intel_ring_create_vma(engine->i915, size);
@@ -1451,6 +1440,8 @@ static int intel_ring_context_pin(struct intel_engine_cs *engine,
 		ret = context_pin(ctx);
 		if (ret)
 			goto error;
+
+		ce->state->obj->mm.dirty = true;
 	}
 
 	/* The kernel context is only used as a placeholder for flushing the
@@ -1571,10 +1562,8 @@ void intel_legacy_submission_resume(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	for_each_engine(engine, dev_priv, id) {
+	for_each_engine(engine, dev_priv, id)
 		engine->buffer->head = engine->buffer->tail;
-		engine->buffer->last_retired_head = -1;
-	}
 }
 
 static int ring_request_alloc(struct drm_i915_gem_request *request)
@@ -2128,7 +2117,7 @@ int intel_init_render_ring_buffer(struct intel_engine_cs *engine)
 
 			num_rings =
 				hweight32(INTEL_INFO(dev_priv)->ring_mask) - 1;
-			engine->emit_breadcrumb_sz += num_rings * 6;
+			engine->emit_breadcrumb_sz += num_rings * 8;
 		}
 	} else if (INTEL_GEN(dev_priv) >= 6) {
 		engine->init_context = intel_rcs_ctx_init;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 847aea554464..a82a0807f64d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -149,16 +149,6 @@ struct intel_ring {
 	int space;
 	int size;
 	int effective_size;
-
-	/** We track the position of the requests in the ring buffer, and
-	 * when each is retired we increment last_retired_head as the GPU
-	 * must have finished processing the request and so we know we
-	 * can advance the ringbuffer up to that position.
-	 *
-	 * last_retired_head is set to -1 after the value is consumed so
-	 * we can detect new retirements.
-	 */
-	u32 last_retired_head;
 };
 
 struct i915_gem_context;
@@ -442,18 +432,10 @@ struct intel_engine_cs {
 	u32 (*get_cmd_length_mask)(u32 cmd_header);
 };
 
-static inline unsigned
+static inline unsigned int
 intel_engine_flag(const struct intel_engine_cs *engine)
 {
-	return 1 << engine->id;
-}
-
-static inline void
-intel_flush_status_page(struct intel_engine_cs *engine, int reg)
-{
-	mb();
-	clflush(&engine->status_page.page_addr[reg]);
-	mb();
+	return BIT(engine->id);
 }
 
 static inline u32
@@ -464,14 +446,22 @@ intel_read_status_page(struct intel_engine_cs *engine, int reg)
 }
 
 static inline void
-intel_write_status_page(struct intel_engine_cs *engine,
-			int reg, u32 value)
+intel_write_status_page(struct intel_engine_cs *engine, int reg, u32 value)
 {
-	mb();
-	clflush(&engine->status_page.page_addr[reg]);
-	engine->status_page.page_addr[reg] = value;
-	clflush(&engine->status_page.page_addr[reg]);
-	mb();
+	/* Writing into the status page should be done sparingly. Since
+	 * we do when we are uncertain of the device state, we take a bit
+	 * of extra paranoia to try and ensure that the HWS takes the value
+	 * we give and that it doesn't end up trapped inside the CPU!
+	 */
+	if (static_cpu_has(X86_FEATURE_CLFLUSH)) {
+		mb();
+		clflush(&engine->status_page.page_addr[reg]);
+		engine->status_page.page_addr[reg] = value;
+		clflush(&engine->status_page.page_addr[reg]);
+		mb();
+	} else {
+		WRITE_ONCE(engine->status_page.page_addr[reg], value);
+	}
 }
 
 /*
@@ -525,12 +515,29 @@ intel_ring_advance(struct drm_i915_gem_request *req, u32 *cs)
 }
 
 static inline u32
-intel_ring_offset(struct drm_i915_gem_request *req, void *addr)
+intel_ring_wrap(const struct intel_ring *ring, u32 pos)
+{
+	return pos & (ring->size - 1);
+}
+
+static inline u32
+intel_ring_offset(const struct drm_i915_gem_request *req, void *addr)
 {
 	/* Don't write ring->size (equivalent to 0) as that hangs some GPUs. */
 	u32 offset = addr - req->ring->vaddr;
 	GEM_BUG_ON(offset > req->ring->size);
-	return offset & (req->ring->size - 1);
+	return intel_ring_wrap(req->ring, offset);
+}
+
+static inline void
+assert_ring_tail_valid(const struct intel_ring *ring, unsigned int tail)
+{
+	/* We could combine these into a single tail operation, but keeping
+	 * them as seperate tests will help identify the cause should one
+	 * ever fire.
+	 */
+	GEM_BUG_ON(!IS_ALIGNED(tail, 8));
+	GEM_BUG_ON(tail >= ring->size);
 }
 
 void intel_ring_update_space(struct intel_ring *ring);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 012bc358a33a..f8a375f8dde6 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -2840,8 +2840,10 @@ void intel_runtime_pm_get(struct drm_i915_private *dev_priv)
 {
 	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct device *kdev = &pdev->dev;
+	int ret;
 
-	pm_runtime_get_sync(kdev);
+	ret = pm_runtime_get_sync(kdev);
+	WARN_ONCE(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 
 	atomic_inc(&dev_priv->pm.wakeref_count);
 	assert_rpm_wakelock_held(dev_priv);
@@ -2871,7 +2873,8 @@ bool intel_runtime_pm_get_if_in_use(struct drm_i915_private *dev_priv)
 		 * function, since the power state is undefined. This applies
 		 * atm to the late/early system suspend/resume handlers.
 		 */
-		WARN_ON_ONCE(ret < 0);
+		WARN_ONCE(ret < 0,
+			  "pm_runtime_get_if_in_use() failed: %d\n", ret);
 		if (ret <= 0)
 			return false;
 	}
@@ -2955,8 +2958,11 @@ void intel_runtime_pm_enable(struct drm_i915_private *dev_priv)
 	 * platforms without RPM support.
 	 */
 	if (!HAS_RUNTIME_PM(dev_priv)) {
+		int ret;
+
 		pm_runtime_dont_use_autosuspend(kdev);
-		pm_runtime_get_sync(kdev);
+		ret = pm_runtime_get_sync(kdev);
+		WARN(ret < 0, "pm_runtime_get_sync() failed: %d\n", ret);
 	} else {
 		pm_runtime_use_autosuspend(kdev);
 	}
diff --git a/drivers/gpu/drm/i915/intel_sprite.c b/drivers/gpu/drm/i915/intel_sprite.c
index b931d0bd7a64..f7d431427115 100644
--- a/drivers/gpu/drm/i915/intel_sprite.c
+++ b/drivers/gpu/drm/i915/intel_sprite.c
@@ -217,7 +217,7 @@ skl_update_plane(struct drm_plane *drm_plane,
 	struct drm_framebuffer *fb = plane_state->base.fb;
 	enum plane_id plane_id = intel_plane->id;
 	enum pipe pipe = intel_plane->pipe;
-	u32 plane_ctl;
+	u32 plane_ctl = plane_state->ctl;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
 	u32 surf_addr = plane_state->main.offset;
 	unsigned int rotation = plane_state->base.rotation;
@@ -232,24 +232,6 @@ skl_update_plane(struct drm_plane *drm_plane,
 	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
 	unsigned long irqflags;
 
-	plane_ctl = PLANE_CTL_ENABLE;
-
-	if (!IS_GEMINILAKE(dev_priv)) {
-		plane_ctl |=
-			PLANE_CTL_PIPE_GAMMA_ENABLE |
-			PLANE_CTL_PIPE_CSC_ENABLE |
-			PLANE_CTL_PLANE_GAMMA_DISABLE;
-	}
-
-	plane_ctl |= skl_plane_ctl_format(fb->format->format);
-	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
-	plane_ctl |= skl_plane_ctl_rotation(rotation);
-
-	if (key->flags & I915_SET_COLORKEY_DESTINATION)
-		plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
-	else if (key->flags & I915_SET_COLORKEY_SOURCE)
-		plane_ctl |= PLANE_CTL_KEY_ENABLE_SOURCE;
-
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -361,32 +343,15 @@ chv_update_csc(struct intel_plane *intel_plane, uint32_t format)
 	I915_WRITE_FW(SPCSCCROCLAMP(plane_id), SPCSC_OMAX(1023) | SPCSC_OMIN(0));
 }
 
-static void
-vlv_update_plane(struct drm_plane *dplane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 vlv_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = dplane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(dplane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
-	enum plane_id plane_id = intel_plane->id;
-	u32 sprctl;
-	u32 sprsurf_offset, linear_offset;
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 sprctl;
 
-	sprctl = SP_ENABLE;
+	sprctl = SP_ENABLE | SP_GAMMA_ENABLE;
 
 	switch (fb->format->format) {
 	case DRM_FORMAT_YUYV:
@@ -423,20 +388,10 @@ vlv_update_plane(struct drm_plane *dplane,
 		sprctl |= SP_FORMAT_RGBA8888;
 		break;
 	default:
-		/*
-		 * If we get here one of the upper layers failed to filter
-		 * out the unsupported plane formats
-		 */
-		BUG();
-		break;
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}
 
-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	sprctl |= SP_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SP_TILED;
 
@@ -449,22 +404,36 @@ vlv_update_plane(struct drm_plane *dplane,
 	if (key->flags & I915_SET_COLORKEY_SOURCE)
 		sprctl |= SP_SOURCE_KEY;
 
+	return sprctl;
+}
+
+static void
+vlv_update_plane(struct drm_plane *dplane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = dplane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(dplane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = intel_plane->pipe;
+	enum plane_id plane_id = intel_plane->id;
+	u32 sprctl = plane_state->ctl;
+	u32 sprsurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
-	src_w--;
-	src_h--;
 	crtc_w--;
 	crtc_h--;
 
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	if (rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	} else if (rotation & DRM_REFLECT_X) {
-		x += src_w;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -516,31 +485,23 @@ vlv_disable_plane(struct drm_plane *dplane, struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void
-ivb_update_plane(struct drm_plane *plane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 ivb_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	enum pipe pipe = intel_plane->pipe;
-	u32 sprctl, sprscale = 0;
-	u32 sprsurf_offset, linear_offset;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 sprctl;
+
+	sprctl = SPRITE_ENABLE | SPRITE_GAMMA_ENABLE;
+
+	if (IS_IVYBRIDGE(dev_priv))
+		sprctl |= SPRITE_TRICKLE_FEED_DISABLE;
 
-	sprctl = SPRITE_ENABLE;
+	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
+		sprctl |= SPRITE_PIPE_CSC_ENABLE;
 
 	switch (fb->format->format) {
 	case DRM_FORMAT_XBGR8888:
@@ -562,34 +523,48 @@ ivb_update_plane(struct drm_plane *plane,
 		sprctl |= SPRITE_FORMAT_YUV422 | SPRITE_YUV_ORDER_VYUY;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}
 
-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	sprctl |= SPRITE_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		sprctl |= SPRITE_TILED;
 
 	if (rotation & DRM_ROTATE_180)
 		sprctl |= SPRITE_ROTATE_180;
 
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		sprctl &= ~SPRITE_TRICKLE_FEED_DISABLE;
-	else
-		sprctl |= SPRITE_TRICKLE_FEED_DISABLE;
-
-	if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv))
-		sprctl |= SPRITE_PIPE_CSC_ENABLE;
-
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
 		sprctl |= SPRITE_DEST_KEY;
 	else if (key->flags & I915_SET_COLORKEY_SOURCE)
 		sprctl |= SPRITE_SOURCE_KEY;
 
+	return sprctl;
+}
+
+static void
+ivb_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	enum pipe pipe = intel_plane->pipe;
+	u32 sprctl = plane_state->ctl, sprscale = 0;
+	u32 sprsurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
@@ -599,16 +574,6 @@ ivb_update_plane(struct drm_plane *plane,
 	if (crtc_w != src_w || crtc_h != src_h)
 		sprscale = SPRITE_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	sprsurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	/* HSW+ does this automagically in hardware */
-	if (!IS_HASWELL(dev_priv) && !IS_BROADWELL(dev_priv) &&
-	    rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -664,31 +629,20 @@ ivb_disable_plane(struct drm_plane *plane, struct drm_crtc *crtc)
 	spin_unlock_irqrestore(&dev_priv->uncore.lock, irqflags);
 }
 
-static void
-ilk_update_plane(struct drm_plane *plane,
-		 const struct intel_crtc_state *crtc_state,
-		 const struct intel_plane_state *plane_state)
+static u32 ilk_sprite_ctl(const struct intel_crtc_state *crtc_state,
+			  const struct intel_plane_state *plane_state)
 {
-	struct drm_device *dev = plane->dev;
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct intel_plane *intel_plane = to_intel_plane(plane);
-	struct drm_framebuffer *fb = plane_state->base.fb;
-	int pipe = intel_plane->pipe;
-	u32 dvscntr, dvsscale;
-	u32 dvssurf_offset, linear_offset;
+	struct drm_i915_private *dev_priv =
+		to_i915(plane_state->base.plane->dev);
+	const struct drm_framebuffer *fb = plane_state->base.fb;
 	unsigned int rotation = plane_state->base.rotation;
 	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
-	int crtc_x = plane_state->base.dst.x1;
-	int crtc_y = plane_state->base.dst.y1;
-	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
-	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
-	uint32_t x = plane_state->base.src.x1 >> 16;
-	uint32_t y = plane_state->base.src.y1 >> 16;
-	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
-	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
-	unsigned long irqflags;
+	u32 dvscntr;
 
-	dvscntr = DVS_ENABLE;
+	dvscntr = DVS_ENABLE | DVS_GAMMA_ENABLE;
+
+	if (IS_GEN6(dev_priv))
+		dvscntr |= DVS_TRICKLE_FEED_DISABLE;
 
 	switch (fb->format->format) {
 	case DRM_FORMAT_XBGR8888:
@@ -710,47 +664,57 @@ ilk_update_plane(struct drm_plane *plane,
 		dvscntr |= DVS_FORMAT_YUV422 | DVS_YUV_ORDER_VYUY;
 		break;
 	default:
-		BUG();
+		MISSING_CASE(fb->format->format);
+		return 0;
 	}
 
-	/*
-	 * Enable gamma to match primary/cursor plane behaviour.
-	 * FIXME should be user controllable via propertiesa.
-	 */
-	dvscntr |= DVS_GAMMA_ENABLE;
-
 	if (fb->modifier == I915_FORMAT_MOD_X_TILED)
 		dvscntr |= DVS_TILED;
 
 	if (rotation & DRM_ROTATE_180)
 		dvscntr |= DVS_ROTATE_180;
 
-	if (IS_GEN6(dev_priv))
-		dvscntr |= DVS_TRICKLE_FEED_DISABLE; /* must disable */
-
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
 		dvscntr |= DVS_DEST_KEY;
 	else if (key->flags & I915_SET_COLORKEY_SOURCE)
 		dvscntr |= DVS_SOURCE_KEY;
 
+	return dvscntr;
+}
+
+static void
+ilk_update_plane(struct drm_plane *plane,
+		 const struct intel_crtc_state *crtc_state,
+		 const struct intel_plane_state *plane_state)
+{
+	struct drm_device *dev = plane->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_plane *intel_plane = to_intel_plane(plane);
+	struct drm_framebuffer *fb = plane_state->base.fb;
+	int pipe = intel_plane->pipe;
+	u32 dvscntr = plane_state->ctl, dvsscale = 0;
+	u32 dvssurf_offset = plane_state->main.offset;
+	u32 linear_offset;
+	const struct drm_intel_sprite_colorkey *key = &plane_state->ckey;
+	int crtc_x = plane_state->base.dst.x1;
+	int crtc_y = plane_state->base.dst.y1;
+	uint32_t crtc_w = drm_rect_width(&plane_state->base.dst);
+	uint32_t crtc_h = drm_rect_height(&plane_state->base.dst);
+	uint32_t x = plane_state->main.x;
+	uint32_t y = plane_state->main.y;
+	uint32_t src_w = drm_rect_width(&plane_state->base.src) >> 16;
+	uint32_t src_h = drm_rect_height(&plane_state->base.src) >> 16;
+	unsigned long irqflags;
+
 	/* Sizes are 0 based */
 	src_w--;
 	src_h--;
 	crtc_w--;
 	crtc_h--;
 
-	dvsscale = 0;
 	if (crtc_w != src_w || crtc_h != src_h)
 		dvsscale = DVS_SCALE_ENABLE | (src_w << 16) | src_h;
 
-	intel_add_fb_offsets(&x, &y, plane_state, 0);
-	dvssurf_offset = intel_compute_tile_offset(&x, &y, plane_state, 0);
-
-	if (rotation & DRM_ROTATE_180) {
-		x += src_w;
-		y += src_h;
-	}
-
 	linear_offset = intel_fb_xy_to_linear(x, y, plane_state, 0);
 
 	spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
@@ -981,6 +945,26 @@ intel_check_sprite_plane(struct drm_plane *plane,
 		ret = skl_check_plane_surface(state);
 		if (ret)
 			return ret;
+
+		state->ctl = skl_plane_ctl(crtc_state, state);
+	} else if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv)) {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = vlv_sprite_ctl(crtc_state, state);
+	} else if (INTEL_GEN(dev_priv) >= 7) {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = ivb_sprite_ctl(crtc_state, state);
+	} else {
+		ret = i9xx_check_plane_surface(state);
+		if (ret)
+			return ret;
+
+		state->ctl = ilk_sprite_ctl(crtc_state, state);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6ed1a3ce47b7..e077c2a9e694 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1315,8 +1315,10 @@ static void intel_tv_find_better_format(struct drm_connector *connector)
  * Currently this always returns CONNECTOR_STATUS_UNKNOWN, as we need to be sure
  * we have a pipe programmed in order to probe the TV.
  */
-static enum drm_connector_status
-intel_tv_detect(struct drm_connector *connector, bool force)
+static int
+intel_tv_detect(struct drm_connector *connector,
+		struct drm_modeset_acquire_ctx *ctx,
+		bool force)
 {
 	struct drm_display_mode mode;
 	struct intel_tv *intel_tv = intel_attached_tv(connector);
@@ -1331,21 +1333,20 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 
 	if (force) {
 		struct intel_load_detect_pipe tmp;
-		struct drm_modeset_acquire_ctx ctx;
+		int ret;
 
-		drm_modeset_acquire_init(&ctx, 0);
+		ret = intel_get_load_detect_pipe(connector, &mode, &tmp, ctx);
+		if (ret < 0)
+			return ret;
 
-		if (intel_get_load_detect_pipe(connector, &mode, &tmp, &ctx)) {
+		if (ret > 0) {
 			type = intel_tv_detect_type(intel_tv, connector);
-			intel_release_load_detect_pipe(connector, &tmp, &ctx);
+			intel_release_load_detect_pipe(connector, &tmp, ctx);
 			status = type < 0 ?
 				connector_status_disconnected :
 				connector_status_connected;
 		} else
 			status = connector_status_unknown;
-
-		drm_modeset_drop_locks(&ctx);
-		drm_modeset_acquire_fini(&ctx);
 	} else
 		return connector->status;
 
@@ -1516,7 +1517,6 @@ out:
 
 static const struct drm_connector_funcs intel_tv_connector_funcs = {
 	.dpms = drm_atomic_helper_connector_dpms,
-	.detect = intel_tv_detect,
 	.late_register = intel_connector_register,
 	.early_unregister = intel_connector_unregister,
 	.destroy = intel_tv_destroy,
@@ -1528,6 +1528,7 @@ static const struct drm_connector_funcs intel_tv_connector_funcs = {
 };
 
 static const struct drm_connector_helper_funcs intel_tv_connector_helper_funcs = {
+	.detect_ctx = intel_tv_detect,
 	.mode_valid = intel_tv_mode_valid,
 	.get_modes = intel_tv_get_modes,
 };
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index d15a7d9d4eb0..c117424f1f50 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -26,6 +26,19 @@
 #include "intel_uc.h"
 #include <linux/firmware.h>
 
+/* Cleans up uC firmware by releasing the firmware GEM obj.
+ */
+static void __intel_uc_fw_fini(struct intel_uc_fw *uc_fw)
+{
+	struct drm_i915_gem_object *obj;
+
+	obj = fetch_and_zero(&uc_fw->obj);
+	if (obj)
+		i915_gem_object_put(obj);
+
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_NONE;
+}
+
 /* Reset GuC providing us with fresh state for both GuC and HuC.
  */
 static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
@@ -83,23 +96,166 @@ void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
 
 void intel_uc_init_early(struct drm_i915_private *dev_priv)
 {
-	mutex_init(&dev_priv->guc.send_mutex);
+	struct intel_guc *guc = &dev_priv->guc;
+
+	mutex_init(&guc->send_mutex);
+	guc->send = intel_guc_send_mmio;
+}
+
+static void fetch_uc_fw(struct drm_i915_private *dev_priv,
+			struct intel_uc_fw *uc_fw)
+{
+	struct pci_dev *pdev = dev_priv->drm.pdev;
+	struct drm_i915_gem_object *obj;
+	const struct firmware *fw = NULL;
+	struct uc_css_header *css;
+	size_t size;
+	int err;
+
+	if (!uc_fw->path)
+		return;
+
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING;
+
+	DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n",
+			 intel_uc_fw_status_repr(uc_fw->fetch_status));
+
+	err = request_firmware(&fw, uc_fw->path, &pdev->dev);
+	if (err)
+		goto fail;
+	if (!fw)
+		goto fail;
+
+	DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n",
+			 uc_fw->path, fw);
+
+	/* Check the size of the blob before examining buffer contents */
+	if (fw->size < sizeof(struct uc_css_header)) {
+		DRM_NOTE("Firmware header is missing\n");
+		goto fail;
+	}
+
+	css = (struct uc_css_header *)fw->data;
+
+	/* Firmware bits always start from header */
+	uc_fw->header_offset = 0;
+	uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
+			      css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
+
+	if (uc_fw->header_size != sizeof(struct uc_css_header)) {
+		DRM_NOTE("CSS header definition mismatch\n");
+		goto fail;
+	}
+
+	/* then, uCode */
+	uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
+	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
+
+	/* now RSA */
+	if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
+		DRM_NOTE("RSA key size is bad\n");
+		goto fail;
+	}
+	uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size;
+	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+
+	/* At least, it should have header, uCode and RSA. Size of all three. */
+	size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size;
+	if (fw->size < size) {
+		DRM_NOTE("Missing firmware components\n");
+		goto fail;
+	}
+
+	/*
+	 * The GuC firmware image has the version number embedded at a
+	 * well-known offset within the firmware blob; note that major / minor
+	 * version are TWO bytes each (i.e. u16), although all pointers and
+	 * offsets are defined in terms of bytes (u8).
+	 */
+	switch (uc_fw->type) {
+	case INTEL_UC_FW_TYPE_GUC:
+		/* Header and uCode will be loaded to WOPCM. Size of the two. */
+		size = uc_fw->header_size + uc_fw->ucode_size;
+
+		/* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
+		if (size > intel_guc_wopcm_size(dev_priv)) {
+			DRM_ERROR("Firmware is too large to fit in WOPCM\n");
+			goto fail;
+		}
+		uc_fw->major_ver_found = css->guc.sw_version >> 16;
+		uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF;
+		break;
+
+	case INTEL_UC_FW_TYPE_HUC:
+		uc_fw->major_ver_found = css->huc.sw_version >> 16;
+		uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF;
+		break;
+
+	default:
+		DRM_ERROR("Unknown firmware type %d\n", uc_fw->type);
+		err = -ENOEXEC;
+		goto fail;
+	}
+
+	if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) {
+		DRM_NOTE("Skipping %s firmware version check\n",
+			 intel_uc_fw_type_repr(uc_fw->type));
+	} else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
+		   uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
+		DRM_NOTE("%s firmware version %d.%d, required %d.%d\n",
+			 intel_uc_fw_type_repr(uc_fw->type),
+			 uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			 uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+		err = -ENOEXEC;
+		goto fail;
+	}
+
+	DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
+			 uc_fw->major_ver_found, uc_fw->minor_ver_found,
+			 uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
+
+	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
+	if (IS_ERR(obj)) {
+		err = PTR_ERR(obj);
+		goto fail;
+	}
+
+	uc_fw->obj = obj;
+	uc_fw->size = fw->size;
+
+	DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n",
+			 uc_fw->obj);
+
+	release_firmware(fw);
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
+	return;
+
+fail:
+	DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n",
+		 uc_fw->path, err);
+	DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n",
+			 err, fw, uc_fw->obj);
+
+	release_firmware(fw);		/* OK even if fw is NULL */
+	uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL;
 }
 
 void intel_uc_init_fw(struct drm_i915_private *dev_priv)
 {
-	if (dev_priv->huc.fw.path)
-		intel_uc_prepare_fw(dev_priv, &dev_priv->huc.fw);
+	fetch_uc_fw(dev_priv, &dev_priv->huc.fw);
+	fetch_uc_fw(dev_priv, &dev_priv->guc.fw);
+}
 
-	if (dev_priv->guc.fw.path)
-		intel_uc_prepare_fw(dev_priv, &dev_priv->guc.fw);
+void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
+{
+	__intel_uc_fw_fini(&dev_priv->guc.fw);
+	__intel_uc_fw_fini(&dev_priv->huc.fw);
 }
 
 int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 {
 	int ret, attempts;
 
-	/* GuC not enabled, nothing to do */
 	if (!i915.enable_guc_loading)
 		return 0;
 
@@ -109,9 +265,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	i915_ggtt_enable_guc(dev_priv);
 
 	if (i915.enable_guc_submission) {
+		/*
+		 * This is stuff we need to have available at fw load time
+		 * if we are planning to enable submission later
+		 */
 		ret = i915_guc_submission_init(dev_priv);
 		if (ret)
-			goto err;
+			goto err_guc;
 	}
 
 	/* WaEnableuKernelHeaderValidFix:skl */
@@ -150,7 +310,7 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 
 		ret = i915_guc_submission_enable(dev_priv);
 		if (ret)
-			goto err_submission;
+			goto err_interrupts;
 	}
 
 	return 0;
@@ -164,11 +324,12 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	 * nonfatal error (i.e. it doesn't prevent driver load, but
 	 * marks the GPU as wedged until reset).
 	 */
+err_interrupts:
+	gen9_disable_guc_interrupts(dev_priv);
 err_submission:
 	if (i915.enable_guc_submission)
 		i915_guc_submission_fini(dev_priv);
-
-err:
+err_guc:
 	i915_ggtt_disable_guc(dev_priv);
 
 	DRM_ERROR("GuC init failed\n");
@@ -185,11 +346,24 @@ err:
 	return ret;
 }
 
+void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
+{
+	if (!i915.enable_guc_loading)
+		return;
+
+	if (i915.enable_guc_submission) {
+		i915_guc_submission_disable(dev_priv);
+		gen9_disable_guc_interrupts(dev_priv);
+		i915_guc_submission_fini(dev_priv);
+	}
+	i915_ggtt_disable_guc(dev_priv);
+}
+
 /*
  * Read GuC command/status register (SOFT_SCRATCH_0)
  * Return true if it contains a response rather than a command
  */
-static bool intel_guc_recv(struct intel_guc *guc, u32 *status)
+static bool guc_recv(struct intel_guc *guc, u32 *status)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
@@ -198,7 +372,10 @@ static bool intel_guc_recv(struct intel_guc *guc, u32 *status)
 	return INTEL_GUC_RECV_IS_RESPONSE(val);
 }
 
-int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
+/*
+ * This function implements the MMIO based host to GuC interface.
+ */
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 	u32 status;
@@ -209,7 +386,7 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 		return -EINVAL;
 
 	mutex_lock(&guc->send_mutex);
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_BLITTER);
 
 	dev_priv->guc.action_count += 1;
 	dev_priv->guc.action_cmd = action[0];
@@ -226,9 +403,9 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 	 * up to that length of time, then switch to a slower sleep-wait loop.
 	 * No inte_guc_send command should ever take longer than 10ms.
 	 */
-	ret = wait_for_us(intel_guc_recv(guc, &status), 10);
+	ret = wait_for_us(guc_recv(guc, &status), 10);
 	if (ret)
-		ret = wait_for(intel_guc_recv(guc, &status), 10);
+		ret = wait_for(guc_recv(guc, &status), 10);
 	if (status != INTEL_GUC_STATUS_SUCCESS) {
 		/*
 		 * Either the GuC explicitly returned an error (which
@@ -247,7 +424,7 @@ int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
 	}
 	dev_priv->guc.action_status = status;
 
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_BLITTER);
 	mutex_unlock(&guc->send_mutex);
 
 	return ret;
@@ -268,136 +445,3 @@ int intel_guc_sample_forcewake(struct intel_guc *guc)
 
 	return intel_guc_send(guc, action, ARRAY_SIZE(action));
 }
-
-void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
-			 struct intel_uc_fw *uc_fw)
-{
-	struct pci_dev *pdev = dev_priv->drm.pdev;
-	struct drm_i915_gem_object *obj;
-	const struct firmware *fw = NULL;
-	struct uc_css_header *css;
-	size_t size;
-	int err;
-
-	uc_fw->fetch_status = INTEL_UC_FIRMWARE_PENDING;
-
-	DRM_DEBUG_DRIVER("before requesting firmware: uC fw fetch status %s\n",
-			 intel_uc_fw_status_repr(uc_fw->fetch_status));
-
-	err = request_firmware(&fw, uc_fw->path, &pdev->dev);
-	if (err)
-		goto fail;
-	if (!fw)
-		goto fail;
-
-	DRM_DEBUG_DRIVER("fetch uC fw from %s succeeded, fw %p\n",
-		uc_fw->path, fw);
-
-	/* Check the size of the blob before examining buffer contents */
-	if (fw->size < sizeof(struct uc_css_header)) {
-		DRM_NOTE("Firmware header is missing\n");
-		goto fail;
-	}
-
-	css = (struct uc_css_header *)fw->data;
-
-	/* Firmware bits always start from header */
-	uc_fw->header_offset = 0;
-	uc_fw->header_size = (css->header_size_dw - css->modulus_size_dw -
-		css->key_size_dw - css->exponent_size_dw) * sizeof(u32);
-
-	if (uc_fw->header_size != sizeof(struct uc_css_header)) {
-		DRM_NOTE("CSS header definition mismatch\n");
-		goto fail;
-	}
-
-	/* then, uCode */
-	uc_fw->ucode_offset = uc_fw->header_offset + uc_fw->header_size;
-	uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
-
-	/* now RSA */
-	if (css->key_size_dw != UOS_RSA_SCRATCH_MAX_COUNT) {
-		DRM_NOTE("RSA key size is bad\n");
-		goto fail;
-	}
-	uc_fw->rsa_offset = uc_fw->ucode_offset + uc_fw->ucode_size;
-	uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
-
-	/* At least, it should have header, uCode and RSA. Size of all three. */
-	size = uc_fw->header_size + uc_fw->ucode_size + uc_fw->rsa_size;
-	if (fw->size < size) {
-		DRM_NOTE("Missing firmware components\n");
-		goto fail;
-	}
-
-	/*
-	 * The GuC firmware image has the version number embedded at a
-	 * well-known offset within the firmware blob; note that major / minor
-	 * version are TWO bytes each (i.e. u16), although all pointers and
-	 * offsets are defined in terms of bytes (u8).
-	 */
-	switch (uc_fw->type) {
-	case INTEL_UC_FW_TYPE_GUC:
-		/* Header and uCode will be loaded to WOPCM. Size of the two. */
-		size = uc_fw->header_size + uc_fw->ucode_size;
-
-		/* Top 32k of WOPCM is reserved (8K stack + 24k RC6 context). */
-		if (size > intel_guc_wopcm_size(dev_priv)) {
-			DRM_ERROR("Firmware is too large to fit in WOPCM\n");
-			goto fail;
-		}
-		uc_fw->major_ver_found = css->guc.sw_version >> 16;
-		uc_fw->minor_ver_found = css->guc.sw_version & 0xFFFF;
-		break;
-
-	case INTEL_UC_FW_TYPE_HUC:
-		uc_fw->major_ver_found = css->huc.sw_version >> 16;
-		uc_fw->minor_ver_found = css->huc.sw_version & 0xFFFF;
-		break;
-
-	default:
-		DRM_ERROR("Unknown firmware type %d\n", uc_fw->type);
-		err = -ENOEXEC;
-		goto fail;
-	}
-
-	if (uc_fw->major_ver_wanted == 0 && uc_fw->minor_ver_wanted == 0) {
-		DRM_NOTE("Skipping uC firmware version check\n");
-	} else if (uc_fw->major_ver_found != uc_fw->major_ver_wanted ||
-		   uc_fw->minor_ver_found < uc_fw->minor_ver_wanted) {
-		DRM_NOTE("uC firmware version %d.%d, required %d.%d\n",
-			uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
-		err = -ENOEXEC;
-		goto fail;
-	}
-
-	DRM_DEBUG_DRIVER("firmware version %d.%d OK (minimum %d.%d)\n",
-			uc_fw->major_ver_found, uc_fw->minor_ver_found,
-			uc_fw->major_ver_wanted, uc_fw->minor_ver_wanted);
-
-	obj = i915_gem_object_create_from_data(dev_priv, fw->data, fw->size);
-	if (IS_ERR(obj)) {
-		err = PTR_ERR(obj);
-		goto fail;
-	}
-
-	uc_fw->obj = obj;
-	uc_fw->size = fw->size;
-
-	DRM_DEBUG_DRIVER("uC fw fetch status SUCCESS, obj %p\n",
-			uc_fw->obj);
-
-	release_firmware(fw);
-	uc_fw->fetch_status = INTEL_UC_FIRMWARE_SUCCESS;
-	return;
-
-fail:
-	DRM_WARN("Failed to fetch valid uC firmware from %s (error %d)\n",
-		 uc_fw->path, err);
-	DRM_DEBUG_DRIVER("uC fw fetch status FAIL; err %d, fw %p, obj %p\n",
-		err, fw, uc_fw->obj);
-
-	release_firmware(fw);		/* OK even if fw is NULL */
-	uc_fw->fetch_status = INTEL_UC_FIRMWARE_FAIL;
-}
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index a35ededfaa40..4b7f73aeddac 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -34,7 +34,9 @@ struct drm_i915_gem_request;
 
 /*
  * This structure primarily describes the GEM object shared with the GuC.
- * The GEM object is held for the entire lifetime of our interaction with
+ * The specs sometimes refer to this object as a "GuC context", but we use
+ * the term "client" to avoid confusion with hardware contexts. This
+ * GEM object is held for the entire lifetime of our interaction with
  * the GuC, being allocated before the GuC is loaded with its firmware.
  * Because there's no way to update the address used by the GuC after
  * initialisation, the shared object must stay pinned into the GGTT as
@@ -44,7 +46,7 @@ struct drm_i915_gem_request;
  *
  * The single GEM object described here is actually made up of several
  * separate areas, as far as the GuC is concerned. The first page (kept
- * kmap'd) includes the "process decriptor" which holds sequence data for
+ * kmap'd) includes the "process descriptor" which holds sequence data for
  * the doorbell, and one cacheline which actually *is* the doorbell; a
  * write to this will "ring the doorbell" (i.e. send an interrupt to the
  * GuC). The subsequent  pages of the client object constitute the work
@@ -72,13 +74,12 @@ struct i915_guc_client {
 
 	uint32_t engines;		/* bitmap of (host) engine ids	*/
 	uint32_t priority;
-	uint32_t ctx_index;
+	u32 stage_id;
 	uint32_t proc_desc_offset;
 
-	uint32_t doorbell_offset;
-	uint32_t doorbell_cookie;
-	uint16_t doorbell_id;
-	uint16_t padding[3];		/* Maintain alignment		*/
+	u16 doorbell_id;
+	unsigned long doorbell_offset;
+	u32 doorbell_cookie;
 
 	spinlock_t wq_lock;
 	uint32_t wq_offset;
@@ -100,11 +101,40 @@ enum intel_uc_fw_status {
 	INTEL_UC_FIRMWARE_SUCCESS
 };
 
+/* User-friendly representation of an enum */
+static inline
+const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status)
+{
+	switch (status) {
+	case INTEL_UC_FIRMWARE_FAIL:
+		return "FAIL";
+	case INTEL_UC_FIRMWARE_NONE:
+		return "NONE";
+	case INTEL_UC_FIRMWARE_PENDING:
+		return "PENDING";
+	case INTEL_UC_FIRMWARE_SUCCESS:
+		return "SUCCESS";
+	}
+	return "<invalid>";
+}
+
 enum intel_uc_fw_type {
 	INTEL_UC_FW_TYPE_GUC,
 	INTEL_UC_FW_TYPE_HUC
 };
 
+/* User-friendly representation of an enum */
+static inline const char *intel_uc_fw_type_repr(enum intel_uc_fw_type type)
+{
+	switch (type) {
+	case INTEL_UC_FW_TYPE_GUC:
+		return "GuC";
+	case INTEL_UC_FW_TYPE_HUC:
+		return "HuC";
+	}
+	return "uC";
+}
+
 /*
  * This structure encapsulates all the data needed during the process
  * of fetching, caching, and loading the firmware image into the GuC.
@@ -133,11 +163,13 @@ struct intel_uc_fw {
 struct intel_guc_log {
 	uint32_t flags;
 	struct i915_vma *vma;
-	void *buf_addr;
-	struct workqueue_struct *flush_wq;
-	struct work_struct flush_work;
-	struct rchan *relay_chan;
-
+	/* The runtime stuff gets created only when GuC logging gets enabled */
+	struct {
+		void *buf_addr;
+		struct workqueue_struct *flush_wq;
+		struct work_struct flush_work;
+		struct rchan *relay_chan;
+	} runtime;
 	/* logging related stats */
 	u32 capture_miss_count;
 	u32 flush_interrupt_count;
@@ -154,12 +186,13 @@ struct intel_guc {
 	bool interrupts_enabled;
 
 	struct i915_vma *ads_vma;
-	struct i915_vma *ctx_pool_vma;
-	struct ida ctx_ids;
+	struct i915_vma *stage_desc_pool;
+	void *stage_desc_pool_vaddr;
+	struct ida stage_ids;
 
 	struct i915_guc_client *execbuf_client;
 
-	DECLARE_BITMAP(doorbell_bitmap, GUC_MAX_DOORBELLS);
+	DECLARE_BITMAP(doorbell_bitmap, GUC_NUM_DOORBELLS);
 	uint32_t db_cacheline;		/* Cyclic counter mod pagesize	*/
 
 	/* Action status & statistics */
@@ -174,6 +207,9 @@ struct intel_guc {
 
 	/* To serialize the intel_guc_send actions */
 	struct mutex send_mutex;
+
+	/* GuC's FW specific send function */
+	int (*send)(struct intel_guc *guc, const u32 *data, u32 len);
 };
 
 struct intel_huc {
@@ -187,17 +223,19 @@ struct intel_huc {
 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
+void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
-void intel_uc_prepare_fw(struct drm_i915_private *dev_priv,
-			 struct intel_uc_fw *uc_fw);
-int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len);
+void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
+int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
+static inline int intel_guc_send(struct intel_guc *guc, const u32 *action, u32 len)
+{
+	return guc->send(guc, action, len);
+}
 
 /* intel_guc_loader.c */
 int intel_guc_select_fw(struct intel_guc *guc);
 int intel_guc_init_hw(struct intel_guc *guc);
-void intel_guc_fini(struct drm_i915_private *dev_priv);
-const char *intel_uc_fw_status_repr(enum intel_uc_fw_status status);
 int intel_guc_suspend(struct drm_i915_private *dev_priv);
 int intel_guc_resume(struct drm_i915_private *dev_priv);
 u32 intel_guc_wopcm_size(struct drm_i915_private *dev_priv);
@@ -212,10 +250,11 @@ void i915_guc_submission_fini(struct drm_i915_private *dev_priv);
 struct i915_vma *intel_guc_allocate_vma(struct intel_guc *guc, u32 size);
 
 /* intel_guc_log.c */
-void intel_guc_log_create(struct intel_guc *guc);
+int intel_guc_log_create(struct intel_guc *guc);
+void intel_guc_log_destroy(struct intel_guc *guc);
+int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 void i915_guc_log_register(struct drm_i915_private *dev_priv);
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv);
-int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val);
 
 static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 {
@@ -227,7 +266,6 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 
 /* intel_huc.c */
 void intel_huc_select_fw(struct intel_huc *huc);
-void intel_huc_fini(struct drm_i915_private  *dev_priv);
 int intel_huc_init_hw(struct intel_huc *huc);
 void intel_guc_auth_huc(struct drm_i915_private *dev_priv);
 
diff --git a/drivers/gpu/drm/i915/intel_uncore.c b/drivers/gpu/drm/i915/intel_uncore.c
index 09f5f02d7901..6d1ea26b2493 100644
--- a/drivers/gpu/drm/i915/intel_uncore.c
+++ b/drivers/gpu/drm/i915/intel_uncore.c
@@ -52,10 +52,10 @@ intel_uncore_forcewake_domain_to_str(const enum forcewake_domain_id id)
 }
 
 static inline void
-fw_domain_reset(const struct intel_uncore_forcewake_domain *d)
+fw_domain_reset(struct drm_i915_private *i915,
+		const struct intel_uncore_forcewake_domain *d)
 {
-	WARN_ON(!i915_mmio_reg_valid(d->reg_set));
-	__raw_i915_write32(d->i915, d->reg_set, d->val_reset);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_reset);
 }
 
 static inline void
@@ -69,9 +69,10 @@ fw_domain_arm_timer(struct intel_uncore_forcewake_domain *d)
 }
 
 static inline void
-fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack_clear(const struct drm_i915_private *i915,
+			 const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) &
+	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
 			     FORCEWAKE_KERNEL) == 0,
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("%s: timed out waiting for forcewake ack to clear.\n",
@@ -79,15 +80,17 @@ fw_domain_wait_ack_clear(const struct intel_uncore_forcewake_domain *d)
 }
 
 static inline void
-fw_domain_get(const struct intel_uncore_forcewake_domain *d)
+fw_domain_get(struct drm_i915_private *i915,
+	      const struct intel_uncore_forcewake_domain *d)
 {
-	__raw_i915_write32(d->i915, d->reg_set, d->val_set);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_set);
 }
 
 static inline void
-fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d)
+fw_domain_wait_ack(const struct drm_i915_private *i915,
+		   const struct intel_uncore_forcewake_domain *d)
 {
-	if (wait_for_atomic((__raw_i915_read32(d->i915, d->reg_ack) &
+	if (wait_for_atomic((__raw_i915_read32(i915, d->reg_ack) &
 			     FORCEWAKE_KERNEL),
 			    FORCEWAKE_ACK_TIMEOUT_MS))
 		DRM_ERROR("%s: timed out waiting for forcewake ack request.\n",
@@ -95,72 +98,59 @@ fw_domain_wait_ack(const struct intel_uncore_forcewake_domain *d)
 }
 
 static inline void
-fw_domain_put(const struct intel_uncore_forcewake_domain *d)
+fw_domain_put(const struct drm_i915_private *i915,
+	      const struct intel_uncore_forcewake_domain *d)
 {
-	__raw_i915_write32(d->i915, d->reg_set, d->val_clear);
-}
-
-static inline void
-fw_domain_posting_read(const struct intel_uncore_forcewake_domain *d)
-{
-	/* something from same cacheline, but not from the set register */
-	if (i915_mmio_reg_valid(d->reg_post))
-		__raw_posting_read(d->i915, d->reg_post);
+	__raw_i915_write32(i915, d->reg_set, i915->uncore.fw_clear);
 }
 
 static void
-fw_domains_get(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
+fw_domains_get(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
 
-	for_each_fw_domain_masked(d, fw_domains, dev_priv) {
-		fw_domain_wait_ack_clear(d);
-		fw_domain_get(d);
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
+
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp) {
+		fw_domain_wait_ack_clear(i915, d);
+		fw_domain_get(i915, d);
 	}
 
-	for_each_fw_domain_masked(d, fw_domains, dev_priv)
-		fw_domain_wait_ack(d);
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_wait_ack(i915, d);
 
-	dev_priv->uncore.fw_domains_active |= fw_domains;
+	i915->uncore.fw_domains_active |= fw_domains;
 }
 
 static void
-fw_domains_put(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
+fw_domains_put(struct drm_i915_private *i915, enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
 
-	for_each_fw_domain_masked(d, fw_domains, dev_priv) {
-		fw_domain_put(d);
-		fw_domain_posting_read(d);
-	}
-
-	dev_priv->uncore.fw_domains_active &= ~fw_domains;
-}
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
 
-static void
-fw_domains_posting_read(struct drm_i915_private *dev_priv)
-{
-	struct intel_uncore_forcewake_domain *d;
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_put(i915, d);
 
-	/* No need to do for all, just do for first found */
-	for_each_fw_domain(d, dev_priv) {
-		fw_domain_posting_read(d);
-		break;
-	}
+	i915->uncore.fw_domains_active &= ~fw_domains;
 }
 
 static void
-fw_domains_reset(struct drm_i915_private *dev_priv, enum forcewake_domains fw_domains)
+fw_domains_reset(struct drm_i915_private *i915,
+		 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *d;
+	unsigned int tmp;
 
-	if (dev_priv->uncore.fw_domains == 0)
+	if (!fw_domains)
 		return;
 
-	for_each_fw_domain_masked(d, fw_domains, dev_priv)
-		fw_domain_reset(d);
+	GEM_BUG_ON(fw_domains & ~i915->uncore.fw_domains);
 
-	fw_domains_posting_read(dev_priv);
+	for_each_fw_domain_masked(d, fw_domains, i915, tmp)
+		fw_domain_reset(i915, d);
 }
 
 static void __gen6_gt_wait_for_thread_c0(struct drm_i915_private *dev_priv)
@@ -236,7 +226,8 @@ intel_uncore_fw_release_timer(struct hrtimer *timer)
 {
 	struct intel_uncore_forcewake_domain *domain =
 	       container_of(timer, struct intel_uncore_forcewake_domain, timer);
-	struct drm_i915_private *dev_priv = domain->i915;
+	struct drm_i915_private *dev_priv =
+		container_of(domain, struct drm_i915_private, uncore.fw_domain[domain->id]);
 	unsigned long irqflags;
 
 	assert_rpm_device_not_suspended(dev_priv);
@@ -266,9 +257,11 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 	 * timers are run before holding.
 	 */
 	while (1) {
+		unsigned int tmp;
+
 		active_domains = 0;
 
-		for_each_fw_domain(domain, dev_priv) {
+		for_each_fw_domain(domain, dev_priv, tmp) {
 			if (hrtimer_cancel(&domain->timer) == 0)
 				continue;
 
@@ -277,7 +270,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 
 		spin_lock_irqsave(&dev_priv->uncore.lock, irqflags);
 
-		for_each_fw_domain(domain, dev_priv) {
+		for_each_fw_domain(domain, dev_priv, tmp) {
 			if (hrtimer_active(&domain->timer))
 				active_domains |= domain->mask;
 		}
@@ -300,7 +293,7 @@ static void intel_uncore_forcewake_reset(struct drm_i915_private *dev_priv,
 	if (fw)
 		dev_priv->uncore.funcs.force_wake_put(dev_priv, fw);
 
-	fw_domains_reset(dev_priv, FORCEWAKE_ALL);
+	fw_domains_reset(dev_priv, dev_priv->uncore.fw_domains);
 
 	if (restore) { /* If reset with a user forcewake, try to restore */
 		if (fw)
@@ -457,13 +450,13 @@ static void __intel_uncore_forcewake_get(struct drm_i915_private *dev_priv,
 					 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;
 
 	fw_domains &= dev_priv->uncore.fw_domains;
 
-	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
 		if (domain->wake_count++)
 			fw_domains &= ~domain->mask;
-	}
 
 	if (fw_domains)
 		dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@@ -520,10 +513,11 @@ static void __intel_uncore_forcewake_put(struct drm_i915_private *dev_priv,
 					 enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;
 
 	fw_domains &= dev_priv->uncore.fw_domains;
 
-	for_each_fw_domain_masked(domain, fw_domains, dev_priv) {
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp) {
 		if (WARN_ON(domain->wake_count == 0))
 			continue;
 
@@ -928,8 +922,11 @@ static noinline void ___force_wake_auto(struct drm_i915_private *dev_priv,
 					enum forcewake_domains fw_domains)
 {
 	struct intel_uncore_forcewake_domain *domain;
+	unsigned int tmp;
+
+	GEM_BUG_ON(fw_domains & ~dev_priv->uncore.fw_domains);
 
-	for_each_fw_domain_masked(domain, fw_domains, dev_priv)
+	for_each_fw_domain_masked(domain, fw_domains, dev_priv, tmp)
 		fw_domain_arm_timer(domain);
 
 	dev_priv->uncore.funcs.force_wake_get(dev_priv, fw_domains);
@@ -1141,41 +1138,27 @@ static void fw_domain_init(struct drm_i915_private *dev_priv,
 
 	WARN_ON(d->wake_count);
 
+	WARN_ON(!i915_mmio_reg_valid(reg_set));
+	WARN_ON(!i915_mmio_reg_valid(reg_ack));
+
 	d->wake_count = 0;
 	d->reg_set = reg_set;
 	d->reg_ack = reg_ack;
 
-	if (IS_GEN6(dev_priv)) {
-		d->val_reset = 0;
-		d->val_set = FORCEWAKE_KERNEL;
-		d->val_clear = 0;
-	} else {
-		/* WaRsClearFWBitsAtReset:bdw,skl */
-		d->val_reset = _MASKED_BIT_DISABLE(0xffff);
-		d->val_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL);
-		d->val_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL);
-	}
-
-	if (IS_VALLEYVIEW(dev_priv) || IS_CHERRYVIEW(dev_priv))
-		d->reg_post = FORCEWAKE_ACK_VLV;
-	else if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv) || IS_GEN8(dev_priv))
-		d->reg_post = ECOBUS;
-
-	d->i915 = dev_priv;
 	d->id = domain_id;
 
 	BUILD_BUG_ON(FORCEWAKE_RENDER != (1 << FW_DOMAIN_ID_RENDER));
 	BUILD_BUG_ON(FORCEWAKE_BLITTER != (1 << FW_DOMAIN_ID_BLITTER));
 	BUILD_BUG_ON(FORCEWAKE_MEDIA != (1 << FW_DOMAIN_ID_MEDIA));
 
-	d->mask = 1 << domain_id;
+	d->mask = BIT(domain_id);
 
 	hrtimer_init(&d->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	d->timer.function = intel_uncore_fw_release_timer;
 
-	dev_priv->uncore.fw_domains |= (1 << domain_id);
+	dev_priv->uncore.fw_domains |= BIT(domain_id);
 
-	fw_domain_reset(d);
+	fw_domain_reset(dev_priv, d);
 }
 
 static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
@@ -1183,6 +1166,17 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 	if (INTEL_GEN(dev_priv) <= 5 || intel_vgpu_active(dev_priv))
 		return;
 
+	if (IS_GEN6(dev_priv)) {
+		dev_priv->uncore.fw_reset = 0;
+		dev_priv->uncore.fw_set = FORCEWAKE_KERNEL;
+		dev_priv->uncore.fw_clear = 0;
+	} else {
+		/* WaRsClearFWBitsAtReset:bdw,skl */
+		dev_priv->uncore.fw_reset = _MASKED_BIT_DISABLE(0xffff);
+		dev_priv->uncore.fw_set = _MASKED_BIT_ENABLE(FORCEWAKE_KERNEL);
+		dev_priv->uncore.fw_clear = _MASKED_BIT_DISABLE(FORCEWAKE_KERNEL);
+	}
+
 	if (IS_GEN9(dev_priv)) {
 		dev_priv->uncore.funcs.force_wake_get = fw_domains_get;
 		dev_priv->uncore.funcs.force_wake_put = fw_domains_put;
@@ -1246,9 +1240,9 @@ static void intel_uncore_fw_domains_init(struct drm_i915_private *dev_priv)
 			       FORCEWAKE_MT, FORCEWAKE_MT_ACK);
 
 		spin_lock_irq(&dev_priv->uncore.lock);
-		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_ALL);
+		fw_domains_get_with_thread_status(dev_priv, FORCEWAKE_RENDER);
 		ecobus = __raw_i915_read32(dev_priv, ECOBUS);
-		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_ALL);
+		fw_domains_put_with_fifo(dev_priv, FORCEWAKE_RENDER);
 		spin_unlock_irq(&dev_priv->uncore.lock);
 
 		if (!(ecobus & FORCEWAKE_MT_ENABLE)) {
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_request.c b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
index 926b24c117d6..98b7aac41eec 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_request.c
@@ -291,8 +291,6 @@ static int begin_live_test(struct live_test *t,
 		return err;
 	}
 
-	i915_gem_retire_requests(i915);
-
 	i915->gpu_error.missed_irq_rings = 0;
 	t->reset_count = i915_reset_count(&i915->gpu_error);
 
@@ -303,7 +301,9 @@ static int end_live_test(struct live_test *t)
 {
 	struct drm_i915_private *i915 = t->i915;
 
-	if (wait_for(intel_engines_are_idle(i915), 1)) {
+	i915_gem_retire_requests(i915);
+
+	if (wait_for(intel_engines_are_idle(i915), 10)) {
 		pr_err("%s(%s): GPU not idle\n", t->func, t->name);
 		return -EIO;
 	}
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 6ec7c731a267..aa31d6c0cdfb 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -235,7 +235,6 @@ static void hang_fini(struct hang *h)
 	i915_gem_object_put(h->hws);
 
 	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
-	i915_gem_retire_requests(h->i915);
 }
 
 static int igt_hang_sanitycheck(void *arg)
diff --git a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
index 99da8f4ef497..302f7d103635 100644
--- a/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
+++ b/drivers/gpu/drm/i915/selftests/mock_dmabuf.c
@@ -129,10 +129,10 @@ static const struct dma_buf_ops mock_dmabuf_ops =  {
 	.map_dma_buf = mock_map_dma_buf,
 	.unmap_dma_buf = mock_unmap_dma_buf,
 	.release = mock_dmabuf_release,
-	.kmap = mock_dmabuf_kmap,
-	.kmap_atomic = mock_dmabuf_kmap_atomic,
-	.kunmap = mock_dmabuf_kunmap,
-	.kunmap_atomic = mock_dmabuf_kunmap_atomic,
+	.map = mock_dmabuf_kmap,
+	.map_atomic = mock_dmabuf_kmap_atomic,
+	.unmap = mock_dmabuf_kunmap,
+	.unmap_atomic = mock_dmabuf_kunmap_atomic,
 	.mmap = mock_dmabuf_mmap,
 	.vmap = mock_dmabuf_vmap,
 	.vunmap = mock_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/i915/selftests/mock_drm.c b/drivers/gpu/drm/i915/selftests/mock_drm.c
index 113dec05c7dc..09c704153456 100644
--- a/drivers/gpu/drm/i915/selftests/mock_drm.c
+++ b/drivers/gpu/drm/i915/selftests/mock_drm.c
@@ -24,31 +24,50 @@
 
 #include "mock_drm.h"
 
-static inline struct inode fake_inode(struct drm_i915_private *i915)
-{
-	return (struct inode){ .i_rdev = i915->drm.primary->index };
-}
-
 struct drm_file *mock_file(struct drm_i915_private *i915)
 {
-	struct inode inode = fake_inode(i915);
-	struct file filp = {};
+	struct file *filp;
+	struct inode *inode;
 	struct drm_file *file;
 	int err;
 
-	err = drm_open(&inode, &filp);
-	if (unlikely(err))
-		return ERR_PTR(err);
+	inode = kzalloc(sizeof(*inode), GFP_KERNEL);
+	if (!inode) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	inode->i_rdev = i915->drm.primary->index;
 
-	file = filp.private_data;
+	filp = kzalloc(sizeof(*filp), GFP_KERNEL);
+	if (!filp) {
+		err = -ENOMEM;
+		goto err_inode;
+	}
+
+	err = drm_open(inode, filp);
+	if (err)
+		goto err_filp;
+
+	file = filp->private_data;
+	memset(&file->filp, POISON_INUSE, sizeof(file->filp));
 	file->authenticated = true;
+
+	kfree(filp);
+	kfree(inode);
 	return file;
+
+err_filp:
+	kfree(filp);
+err_inode:
+	kfree(inode);
+err:
+	return ERR_PTR(err);
 }
 
 void mock_file_free(struct drm_i915_private *i915, struct drm_file *file)
 {
-	struct inode inode = fake_inode(i915);
 	struct file filp = { .private_data = file };
 
-	drm_release(&inode, &filp);
+	drm_release(NULL, &filp);
 }
diff --git a/drivers/gpu/drm/i915/selftests/mock_engine.c b/drivers/gpu/drm/i915/selftests/mock_engine.c
index 8d5ba037064c..0ad624a1db90 100644
--- a/drivers/gpu/drm/i915/selftests/mock_engine.c
+++ b/drivers/gpu/drm/i915/selftests/mock_engine.c
@@ -118,7 +118,6 @@ static struct intel_ring *mock_ring(struct intel_engine_cs *engine)
 	ring->vaddr = (void *)(ring + 1);
 
 	INIT_LIST_HEAD(&ring->request_list);
-	ring->last_retired_head = -1;
 	intel_ring_update_space(ring);
 
 	return ring;
diff --git a/drivers/gpu/drm/i915/selftests/mock_request.c b/drivers/gpu/drm/i915/selftests/mock_request.c
index 0e8d2e7f8c70..8097e3693ec4 100644
--- a/drivers/gpu/drm/i915/selftests/mock_request.c
+++ b/drivers/gpu/drm/i915/selftests/mock_request.c
@@ -35,7 +35,7 @@ mock_request(struct intel_engine_cs *engine,
 
 	/* NB the i915->requests slab cache is enlarged to fit mock_request */
 	request = i915_gem_request_alloc(engine, context);
-	if (!request)
+	if (IS_ERR(request))
 		return NULL;
 
 	mock = container_of(request, typeof(*mock), base);
diff --git a/drivers/gpu/drm/i915/selftests/scatterlist.c b/drivers/gpu/drm/i915/selftests/scatterlist.c
index eb2cda8e2b9f..1cc5d2931753 100644
--- a/drivers/gpu/drm/i915/selftests/scatterlist.c
+++ b/drivers/gpu/drm/i915/selftests/scatterlist.c
@@ -189,6 +189,13 @@ static unsigned int random(unsigned long n,
 	return 1 + (prandom_u32_state(rnd) % 1024);
 }
 
+static inline bool page_contiguous(struct page *first,
+				   struct page *last,
+				   unsigned long npages)
+{
+	return first + npages == last;
+}
+
 static int alloc_table(struct pfn_table *pt,
 		       unsigned long count, unsigned long max,
 		       npages_fn_t npages_fn,
@@ -216,7 +223,9 @@ static int alloc_table(struct pfn_table *pt,
 		unsigned long npages = npages_fn(n, count, rnd);
 
 		/* Nobody expects the Sparse Memmap! */
-		if (pfn_to_page(pfn + npages) != pfn_to_page(pfn) + npages) {
+		if (!page_contiguous(pfn_to_page(pfn),
+				     pfn_to_page(pfn + npages),
+				     npages)) {
 			sg_free_table(&pt->st);
 			return -ENOSPC;
 		}
diff --git a/drivers/gpu/drm/imx/Kconfig b/drivers/gpu/drm/imx/Kconfig
index f2c9ae822149..c9e439c82241 100644
--- a/drivers/gpu/drm/imx/Kconfig
+++ b/drivers/gpu/drm/imx/Kconfig
@@ -31,13 +31,6 @@ config DRM_IMX_LDB
 	  Choose this to enable the internal LVDS Display Bridge (LDB)
 	  found on i.MX53 and i.MX6 processors.
 
-config DRM_IMX_IPUV3
-	tristate
-	depends on DRM_IMX
-	depends on IMX_IPUV3_CORE
-	default y if DRM_IMX=y
-	default m if DRM_IMX=m
-
 config DRM_IMX_HDMI
 	tristate "Freescale i.MX DRM HDMI"
 	select DRM_DW_HDMI
diff --git a/drivers/gpu/drm/imx/Makefile b/drivers/gpu/drm/imx/Makefile
index f3ecd8903d97..16ecef33e008 100644
--- a/drivers/gpu/drm/imx/Makefile
+++ b/drivers/gpu/drm/imx/Makefile
@@ -1,5 +1,5 @@
 
-imxdrm-objs := imx-drm-core.o
+imxdrm-objs := imx-drm-core.o ipuv3-crtc.o ipuv3-plane.o
 
 obj-$(CONFIG_DRM_IMX) += imxdrm.o
 
@@ -7,6 +7,5 @@ obj-$(CONFIG_DRM_IMX_PARALLEL_DISPLAY) += parallel-display.o
 obj-$(CONFIG_DRM_IMX_TVE) += imx-tve.o
 obj-$(CONFIG_DRM_IMX_LDB) += imx-ldb.o
 
-imx-ipuv3-crtc-objs  := ipuv3-crtc.o ipuv3-plane.o
 obj-$(CONFIG_DRM_IMX_IPUV3)	+= imx-ipuv3-crtc.o
 obj-$(CONFIG_DRM_IMX_HDMI) += dw_hdmi-imx.o
diff --git a/drivers/gpu/drm/imx/imx-drm-core.c b/drivers/gpu/drm/imx/imx-drm-core.c
index 1888bf3920fc..50add2f9e250 100644
--- a/drivers/gpu/drm/imx/imx-drm-core.c
+++ b/drivers/gpu/drm/imx/imx-drm-core.c
@@ -422,7 +422,23 @@ static struct platform_driver imx_drm_pdrv = {
 		.of_match_table = imx_drm_dt_ids,
 	},
 };
-module_platform_driver(imx_drm_pdrv);
+
+static struct platform_driver * const drivers[] = {
+	&imx_drm_pdrv,
+	&ipu_drm_driver,
+};
+
+static int __init imx_drm_init(void)
+{
+	return platform_register_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_init(imx_drm_init);
+
+static void __exit imx_drm_exit(void)
+{
+	platform_unregister_drivers(drivers, ARRAY_SIZE(drivers));
+}
+module_exit(imx_drm_exit);
 
 MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
 MODULE_DESCRIPTION("i.MX drm driver core");
diff --git a/drivers/gpu/drm/imx/imx-drm.h b/drivers/gpu/drm/imx/imx-drm.h
index 295434b199db..f6dd64be9cd5 100644
--- a/drivers/gpu/drm/imx/imx-drm.h
+++ b/drivers/gpu/drm/imx/imx-drm.h
@@ -29,6 +29,8 @@ int imx_drm_init_drm(struct platform_device *pdev,
 		int preferred_bpp);
 int imx_drm_exit_drm(void);
 
+extern struct platform_driver ipu_drm_driver;
+
 void imx_drm_mode_config_init(struct drm_device *drm);
 
 struct drm_gem_cma_object *imx_drm_fb_get_obj(struct drm_framebuffer *fb);
diff --git a/drivers/gpu/drm/imx/imx-ldb.c b/drivers/gpu/drm/imx/imx-ldb.c
index 88cd11d30134..8fb801fab039 100644
--- a/drivers/gpu/drm/imx/imx-ldb.c
+++ b/drivers/gpu/drm/imx/imx-ldb.c
@@ -647,7 +647,6 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 
 	for_each_child_of_node(np, child) {
 		struct imx_ldb_channel *channel;
-		struct device_node *ep;
 		int bus_format;
 
 		ret = of_property_read_u32(child, "reg", &i);
@@ -671,27 +670,11 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
 		 * The output port is port@4 with an external 4-port mux or
 		 * port@2 with the internal 2-port mux.
 		 */
-		ep = of_graph_get_endpoint_by_regs(child,
-						   imx_ldb->lvds_mux ? 4 : 2,
-						   -1);
-		if (ep) {
-			struct device_node *remote;
-
-			remote = of_graph_get_remote_port_parent(ep);
-			of_node_put(ep);
-			if (remote) {
-				channel->panel = of_drm_find_panel(remote);
-				channel->bridge = of_drm_find_bridge(remote);
-			} else
-				return -EPROBE_DEFER;
-			of_node_put(remote);
-
-			if (!channel->panel && !channel->bridge) {
-				dev_err(dev, "panel/bridge not found: %s\n",
-					remote->full_name);
-				return -EPROBE_DEFER;
-			}
-		}
+		ret = drm_of_find_panel_or_bridge(child,
+						  imx_ldb->lvds_mux ? 4 : 2, 0,
+						  &channel->panel, &channel->bridge);
+		if (ret)
+			return ret;
 
 		/* panel ddc only if there is no bridge */
 		if (!channel->bridge) {
diff --git a/drivers/gpu/drm/imx/ipuv3-crtc.c b/drivers/gpu/drm/imx/ipuv3-crtc.c
index dab9d50ffd8c..5456c15d962c 100644
--- a/drivers/gpu/drm/imx/ipuv3-crtc.c
+++ b/drivers/gpu/drm/imx/ipuv3-crtc.c
@@ -465,16 +465,10 @@ static int ipu_drm_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static struct platform_driver ipu_drm_driver = {
+struct platform_driver ipu_drm_driver = {
 	.driver = {
 		.name = "imx-ipuv3-crtc",
 	},
 	.probe = ipu_drm_probe,
 	.remove = ipu_drm_remove,
 };
-module_platform_driver(ipu_drm_driver);
-
-MODULE_AUTHOR("Sascha Hauer <s.hauer@pengutronix.de>");
-MODULE_DESCRIPTION(DRIVER_DESC);
-MODULE_LICENSE("GPL");
-MODULE_ALIAS("platform:imx-ipuv3-crtc");
diff --git a/drivers/gpu/drm/imx/parallel-display.c b/drivers/gpu/drm/imx/parallel-display.c
index d5c06fd89f90..636031a30e17 100644
--- a/drivers/gpu/drm/imx/parallel-display.c
+++ b/drivers/gpu/drm/imx/parallel-display.c
@@ -19,10 +19,10 @@
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <linux/videodev2.h>
 #include <video/of_display_timing.h>
-#include <linux/of_graph.h>
 
 #include "imx-drm.h"
 
@@ -208,7 +208,6 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = data;
 	struct device_node *np = dev->of_node;
-	struct device_node *ep;
 	const u8 *edidp;
 	struct imx_parallel_display *imxpd;
 	int ret;
@@ -237,36 +236,9 @@ static int imx_pd_bind(struct device *dev, struct device *master, void *data)
 	imxpd->bus_format = bus_format;
 
 	/* port@1 is the output port */
-	ep = of_graph_get_endpoint_by_regs(np, 1, -1);
-	if (ep) {
-		struct device_node *remote;
-
-		remote = of_graph_get_remote_port_parent(ep);
-		if (!remote) {
-			dev_warn(dev, "endpoint %s not connected\n",
-				 ep->full_name);
-			of_node_put(ep);
-			return -ENODEV;
-		}
-		of_node_put(ep);
-
-		imxpd->panel = of_drm_find_panel(remote);
-		if (imxpd->panel) {
-			dev_dbg(dev, "found panel %s\n", remote->full_name);
-		} else {
-			imxpd->bridge = of_drm_find_bridge(remote);
-			if (imxpd->bridge)
-				dev_dbg(dev, "found bridge %s\n",
-					remote->full_name);
-		}
-		if (!imxpd->panel && !imxpd->bridge) {
-			dev_dbg(dev, "waiting for panel or bridge %s\n",
-				remote->full_name);
-			of_node_put(remote);
-			return -EPROBE_DEFER;
-		}
-		of_node_put(remote);
-	}
+	ret = drm_of_find_panel_or_bridge(np, 1, 0, &imxpd->panel, &imxpd->bridge);
+	if (ret)
+		return ret;
 
 	imxpd->dev = dev;
 
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
index c70310206ac5..a14d7d64d7b1 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_ovl.c
@@ -35,18 +35,28 @@
 #define DISP_REG_OVL_PITCH(n)			(0x0044 + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_CTRL(n)		(0x00c0 + 0x20 * (n))
 #define DISP_REG_OVL_RDMA_GMC(n)		(0x00c8 + 0x20 * (n))
-#define DISP_REG_OVL_ADDR(n)			(0x0f40 + 0x20 * (n))
+#define DISP_REG_OVL_ADDR_MT2701		0x0040
+#define DISP_REG_OVL_ADDR_MT8173		0x0f40
+#define DISP_REG_OVL_ADDR(ovl, n)		((ovl)->data->addr + 0x20 * (n))
 
 #define	OVL_RDMA_MEM_GMC	0x40402020
 
 #define OVL_CON_BYTE_SWAP	BIT(24)
-#define OVL_CON_CLRFMT_RGB565	(0 << 12)
-#define OVL_CON_CLRFMT_RGB888	(1 << 12)
+#define OVL_CON_CLRFMT_RGB	(1 << 12)
 #define OVL_CON_CLRFMT_RGBA8888	(2 << 12)
 #define OVL_CON_CLRFMT_ARGB8888	(3 << 12)
+#define OVL_CON_CLRFMT_RGB565(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
+					0 : OVL_CON_CLRFMT_RGB)
+#define OVL_CON_CLRFMT_RGB888(ovl)	((ovl)->data->fmt_rgb565_is_0 ? \
+					OVL_CON_CLRFMT_RGB : 0)
 #define	OVL_CON_AEN		BIT(8)
 #define	OVL_CON_ALPHA		0xff
 
+struct mtk_disp_ovl_data {
+	unsigned int addr;
+	bool fmt_rgb565_is_0;
+};
+
 /**
  * struct mtk_disp_ovl - DISP_OVL driver structure
  * @ddp_comp - structure containing type enum and hardware resources
@@ -55,8 +65,14 @@
 struct mtk_disp_ovl {
 	struct mtk_ddp_comp		ddp_comp;
 	struct drm_crtc			*crtc;
+	const struct mtk_disp_ovl_data	*data;
 };
 
+static inline struct mtk_disp_ovl *comp_to_ovl(struct mtk_ddp_comp *comp)
+{
+	return container_of(comp, struct mtk_disp_ovl, ddp_comp);
+}
+
 static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id)
 {
 	struct mtk_disp_ovl *priv = dev_id;
@@ -76,20 +92,18 @@ static irqreturn_t mtk_disp_ovl_irq_handler(int irq, void *dev_id)
 static void mtk_ovl_enable_vblank(struct mtk_ddp_comp *comp,
 				  struct drm_crtc *crtc)
 {
-	struct mtk_disp_ovl *priv = container_of(comp, struct mtk_disp_ovl,
-						 ddp_comp);
+	struct mtk_disp_ovl *ovl = comp_to_ovl(comp);
 
-	priv->crtc = crtc;
+	ovl->crtc = crtc;
 	writel(0x0, comp->regs + DISP_REG_OVL_INTSTA);
 	writel_relaxed(OVL_FME_CPL_INT, comp->regs + DISP_REG_OVL_INTEN);
 }
 
 static void mtk_ovl_disable_vblank(struct mtk_ddp_comp *comp)
 {
-	struct mtk_disp_ovl *priv = container_of(comp, struct mtk_disp_ovl,
-						 ddp_comp);
+	struct mtk_disp_ovl *ovl = comp_to_ovl(comp);
 
-	priv->crtc = NULL;
+	ovl->crtc = NULL;
 	writel_relaxed(0x0, comp->regs + DISP_REG_OVL_INTEN);
 }
 
@@ -138,18 +152,18 @@ static void mtk_ovl_layer_off(struct mtk_ddp_comp *comp, unsigned int idx)
 	writel(0x0, comp->regs + DISP_REG_OVL_RDMA_CTRL(idx));
 }
 
-static unsigned int ovl_fmt_convert(unsigned int fmt)
+static unsigned int ovl_fmt_convert(struct mtk_disp_ovl *ovl, unsigned int fmt)
 {
 	switch (fmt) {
 	default:
 	case DRM_FORMAT_RGB565:
-		return OVL_CON_CLRFMT_RGB565;
+		return OVL_CON_CLRFMT_RGB565(ovl);
 	case DRM_FORMAT_BGR565:
-		return OVL_CON_CLRFMT_RGB565 | OVL_CON_BYTE_SWAP;
+		return OVL_CON_CLRFMT_RGB565(ovl) | OVL_CON_BYTE_SWAP;
 	case DRM_FORMAT_RGB888:
-		return OVL_CON_CLRFMT_RGB888;
+		return OVL_CON_CLRFMT_RGB888(ovl);
 	case DRM_FORMAT_BGR888:
-		return OVL_CON_CLRFMT_RGB888 | OVL_CON_BYTE_SWAP;
+		return OVL_CON_CLRFMT_RGB888(ovl) | OVL_CON_BYTE_SWAP;
 	case DRM_FORMAT_RGBX8888:
 	case DRM_FORMAT_RGBA8888:
 		return OVL_CON_CLRFMT_ARGB8888;
@@ -168,6 +182,7 @@ static unsigned int ovl_fmt_convert(unsigned int fmt)
 static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 				 struct mtk_plane_state *state)
 {
+	struct mtk_disp_ovl *ovl = comp_to_ovl(comp);
 	struct mtk_plane_pending_state *pending = &state->pending;
 	unsigned int addr = pending->addr;
 	unsigned int pitch = pending->pitch & 0xffff;
@@ -179,7 +194,7 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 	if (!pending->enable)
 		mtk_ovl_layer_off(comp, idx);
 
-	con = ovl_fmt_convert(fmt);
+	con = ovl_fmt_convert(ovl, fmt);
 	if (idx != 0)
 		con |= OVL_CON_AEN | OVL_CON_ALPHA;
 
@@ -187,7 +202,7 @@ static void mtk_ovl_layer_config(struct mtk_ddp_comp *comp, unsigned int idx,
 	writel_relaxed(pitch, comp->regs + DISP_REG_OVL_PITCH(idx));
 	writel_relaxed(src_size, comp->regs + DISP_REG_OVL_SRC_SIZE(idx));
 	writel_relaxed(offset, comp->regs + DISP_REG_OVL_OFFSET(idx));
-	writel_relaxed(addr, comp->regs + DISP_REG_OVL_ADDR(idx));
+	writel_relaxed(addr, comp->regs + DISP_REG_OVL_ADDR(ovl, idx));
 
 	if (pending->enable)
 		mtk_ovl_layer_on(comp, idx);
@@ -264,6 +279,8 @@ static int mtk_disp_ovl_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	priv->data = of_device_get_match_data(dev);
+
 	platform_set_drvdata(pdev, priv);
 
 	ret = devm_request_irq(dev, irq, mtk_disp_ovl_irq_handler,
@@ -287,8 +304,21 @@ static int mtk_disp_ovl_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct mtk_disp_ovl_data mt2701_ovl_driver_data = {
+	.addr = DISP_REG_OVL_ADDR_MT2701,
+	.fmt_rgb565_is_0 = false,
+};
+
+static const struct mtk_disp_ovl_data mt8173_ovl_driver_data = {
+	.addr = DISP_REG_OVL_ADDR_MT8173,
+	.fmt_rgb565_is_0 = true,
+};
+
 static const struct of_device_id mtk_disp_ovl_driver_dt_match[] = {
-	{ .compatible = "mediatek,mt8173-disp-ovl", },
+	{ .compatible = "mediatek,mt2701-disp-ovl",
+	  .data = &mt2701_ovl_driver_data},
+	{ .compatible = "mediatek,mt8173-disp-ovl",
+	  .data = &mt8173_ovl_driver_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_disp_ovl_driver_dt_match);
diff --git a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
index 0df05f95b916..b68a51376f83 100644
--- a/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
+++ b/drivers/gpu/drm/mediatek/mtk_disp_rdma.c
@@ -38,6 +38,11 @@
 #define RDMA_FIFO_UNDERFLOW_EN				BIT(31)
 #define RDMA_FIFO_PSEUDO_SIZE(bytes)			(((bytes) / 16) << 16)
 #define RDMA_OUTPUT_VALID_FIFO_THRESHOLD(bytes)		((bytes) / 16)
+#define RDMA_FIFO_SIZE(rdma)			((rdma)->data->fifo_size)
+
+struct mtk_disp_rdma_data {
+	unsigned int fifo_size;
+};
 
 /**
  * struct mtk_disp_rdma - DISP_RDMA driver structure
@@ -47,8 +52,14 @@
 struct mtk_disp_rdma {
 	struct mtk_ddp_comp		ddp_comp;
 	struct drm_crtc			*crtc;
+	const struct mtk_disp_rdma_data	*data;
 };
 
+static inline struct mtk_disp_rdma *comp_to_rdma(struct mtk_ddp_comp *comp)
+{
+	return container_of(comp, struct mtk_disp_rdma, ddp_comp);
+}
+
 static irqreturn_t mtk_disp_rdma_irq_handler(int irq, void *dev_id)
 {
 	struct mtk_disp_rdma *priv = dev_id;
@@ -77,20 +88,18 @@ static void rdma_update_bits(struct mtk_ddp_comp *comp, unsigned int reg,
 static void mtk_rdma_enable_vblank(struct mtk_ddp_comp *comp,
 				   struct drm_crtc *crtc)
 {
-	struct mtk_disp_rdma *priv = container_of(comp, struct mtk_disp_rdma,
-						  ddp_comp);
+	struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
 
-	priv->crtc = crtc;
+	rdma->crtc = crtc;
 	rdma_update_bits(comp, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT,
 			 RDMA_FRAME_END_INT);
 }
 
 static void mtk_rdma_disable_vblank(struct mtk_ddp_comp *comp)
 {
-	struct mtk_disp_rdma *priv = container_of(comp, struct mtk_disp_rdma,
-						  ddp_comp);
+	struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
 
-	priv->crtc = NULL;
+	rdma->crtc = NULL;
 	rdma_update_bits(comp, DISP_REG_RDMA_INT_ENABLE, RDMA_FRAME_END_INT, 0);
 }
 
@@ -111,6 +120,7 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width,
 {
 	unsigned int threshold;
 	unsigned int reg;
+	struct mtk_disp_rdma *rdma = comp_to_rdma(comp);
 
 	rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_0, 0xfff, width);
 	rdma_update_bits(comp, DISP_REG_RDMA_SIZE_CON_1, 0xfffff, height);
@@ -123,7 +133,7 @@ static void mtk_rdma_config(struct mtk_ddp_comp *comp, unsigned int width,
 	 */
 	threshold = width * height * vrefresh * 4 * 7 / 1000000;
 	reg = RDMA_FIFO_UNDERFLOW_EN |
-	      RDMA_FIFO_PSEUDO_SIZE(SZ_8K) |
+	      RDMA_FIFO_PSEUDO_SIZE(RDMA_FIFO_SIZE(rdma)) |
 	      RDMA_OUTPUT_VALID_FIFO_THRESHOLD(threshold);
 	writel(reg, comp->regs + DISP_REG_RDMA_FIFO_CON);
 }
@@ -208,6 +218,8 @@ static int mtk_disp_rdma_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	priv->data = of_device_get_match_data(dev);
+
 	platform_set_drvdata(pdev, priv);
 
 	ret = component_add(dev, &mtk_disp_rdma_component_ops);
@@ -224,8 +236,19 @@ static int mtk_disp_rdma_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct mtk_disp_rdma_data mt2701_rdma_driver_data = {
+	.fifo_size = SZ_4K,
+};
+
+static const struct mtk_disp_rdma_data mt8173_rdma_driver_data = {
+	.fifo_size = SZ_8K,
+};
+
 static const struct of_device_id mtk_disp_rdma_driver_dt_match[] = {
-	{ .compatible = "mediatek,mt8173-disp-rdma", },
+	{ .compatible = "mediatek,mt2701-disp-rdma",
+	  .data = &mt2701_rdma_driver_data},
+	{ .compatible = "mediatek,mt8173-disp-rdma",
+	  .data = &mt8173_rdma_driver_data},
 	{},
 };
 MODULE_DEVICE_TABLE(of, mtk_disp_rdma_driver_dt_match);
diff --git a/drivers/gpu/drm/mediatek/mtk_dpi.c b/drivers/gpu/drm/mediatek/mtk_dpi.c
index 3bd3bd688d1a..32ca351ecd09 100644
--- a/drivers/gpu/drm/mediatek/mtk_dpi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dpi.c
@@ -661,7 +661,7 @@ static int mtk_dpi_probe(struct platform_device *pdev)
 	struct device *dev = &pdev->dev;
 	struct mtk_dpi *dpi;
 	struct resource *mem;
-	struct device_node *ep, *bridge_node = NULL;
+	struct device_node *bridge_node;
 	int comp_id;
 	int ret;
 
@@ -706,15 +706,9 @@ static int mtk_dpi_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	ep = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (ep) {
-		bridge_node = of_graph_get_remote_port_parent(ep);
-		of_node_put(ep);
-	}
-	if (!bridge_node) {
-		dev_err(dev, "Failed to find bridge node\n");
+	bridge_node = of_graph_get_remote_node(dev->of_node, 0, 0);
+	if (!bridge_node)
 		return -ENODEV;
-	}
 
 	dev_info(dev, "Found bridge node: %s\n", bridge_node->full_name);
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
index 69982f5a6198..6b08774e5501 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_crtc.c
@@ -327,6 +327,42 @@ static void mtk_crtc_ddp_hw_fini(struct mtk_drm_crtc *mtk_crtc)
 	pm_runtime_put(drm->dev);
 }
 
+static void mtk_crtc_ddp_config(struct drm_crtc *crtc)
+{
+	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+	struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state);
+	struct mtk_ddp_comp *ovl = mtk_crtc->ddp_comp[0];
+	unsigned int i;
+
+	/*
+	 * TODO: instead of updating the registers here, we should prepare
+	 * working registers in atomic_commit and let the hardware command
+	 * queue update module registers on vblank.
+	 */
+	if (state->pending_config) {
+		mtk_ddp_comp_config(ovl, state->pending_width,
+				    state->pending_height,
+				    state->pending_vrefresh, 0);
+
+		state->pending_config = false;
+	}
+
+	if (mtk_crtc->pending_planes) {
+		for (i = 0; i < OVL_LAYER_NR; i++) {
+			struct drm_plane *plane = &mtk_crtc->planes[i];
+			struct mtk_plane_state *plane_state;
+
+			plane_state = to_mtk_plane_state(plane->state);
+
+			if (plane_state->pending.config) {
+				mtk_ddp_comp_layer_config(ovl, i, plane_state);
+				plane_state->pending.config = false;
+			}
+		}
+		mtk_crtc->pending_planes = false;
+	}
+}
+
 static void mtk_drm_crtc_enable(struct drm_crtc *crtc)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
@@ -403,6 +439,7 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc,
 				      struct drm_crtc_state *old_crtc_state)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
+	struct mtk_drm_private *priv = crtc->dev->dev_private;
 	unsigned int pending_planes = 0;
 	int i;
 
@@ -424,6 +461,12 @@ static void mtk_drm_crtc_atomic_flush(struct drm_crtc *crtc,
 	if (crtc->state->color_mgmt_changed)
 		for (i = 0; i < mtk_crtc->ddp_comp_nr; i++)
 			mtk_ddp_gamma_set(mtk_crtc->ddp_comp[i], crtc->state);
+
+	if (priv->data->shadow_register) {
+		mtk_disp_mutex_acquire(mtk_crtc->mutex);
+		mtk_crtc_ddp_config(crtc);
+		mtk_disp_mutex_release(mtk_crtc->mutex);
+	}
 }
 
 static const struct drm_crtc_funcs mtk_crtc_funcs = {
@@ -471,36 +514,10 @@ err_cleanup_crtc:
 void mtk_crtc_ddp_irq(struct drm_crtc *crtc, struct mtk_ddp_comp *ovl)
 {
 	struct mtk_drm_crtc *mtk_crtc = to_mtk_crtc(crtc);
-	struct mtk_crtc_state *state = to_mtk_crtc_state(mtk_crtc->base.state);
-	unsigned int i;
+	struct mtk_drm_private *priv = crtc->dev->dev_private;
 
-	/*
-	 * TODO: instead of updating the registers here, we should prepare
-	 * working registers in atomic_commit and let the hardware command
-	 * queue update module registers on vblank.
-	 */
-	if (state->pending_config) {
-		mtk_ddp_comp_config(ovl, state->pending_width,
-				    state->pending_height,
-				    state->pending_vrefresh, 0);
-
-		state->pending_config = false;
-	}
-
-	if (mtk_crtc->pending_planes) {
-		for (i = 0; i < OVL_LAYER_NR; i++) {
-			struct drm_plane *plane = &mtk_crtc->planes[i];
-			struct mtk_plane_state *plane_state;
-
-			plane_state = to_mtk_plane_state(plane->state);
-
-			if (plane_state->pending.config) {
-				mtk_ddp_comp_layer_config(ovl, i, plane_state);
-				plane_state->pending.config = false;
-			}
-		}
-		mtk_crtc->pending_planes = false;
-	}
+	if (!priv->data->shadow_register)
+		mtk_crtc_ddp_config(crtc);
 
 	mtk_drm_finish_page_flip(mtk_crtc);
 }
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
index 17ba9355a49c..8130f3dab661 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/iopoll.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
@@ -31,26 +32,40 @@
 #define DISP_REG_CONFIG_DISP_RDMA1_MOUT_EN	0x0c8
 #define DISP_REG_CONFIG_MMSYS_CG_CON0		0x100
 
+#define DISP_REG_CONFIG_DISP_OVL_MOUT_EN	0x030
+#define DISP_REG_CONFIG_OUT_SEL			0x04c
+#define DISP_REG_CONFIG_DSI_SEL			0x050
+
 #define DISP_REG_MUTEX_EN(n)	(0x20 + 0x20 * (n))
+#define DISP_REG_MUTEX(n)	(0x24 + 0x20 * (n))
 #define DISP_REG_MUTEX_RST(n)	(0x28 + 0x20 * (n))
 #define DISP_REG_MUTEX_MOD(n)	(0x2c + 0x20 * (n))
 #define DISP_REG_MUTEX_SOF(n)	(0x30 + 0x20 * (n))
 
-#define MUTEX_MOD_DISP_OVL0		BIT(11)
-#define MUTEX_MOD_DISP_OVL1		BIT(12)
-#define MUTEX_MOD_DISP_RDMA0		BIT(13)
-#define MUTEX_MOD_DISP_RDMA1		BIT(14)
-#define MUTEX_MOD_DISP_RDMA2		BIT(15)
-#define MUTEX_MOD_DISP_WDMA0		BIT(16)
-#define MUTEX_MOD_DISP_WDMA1		BIT(17)
-#define MUTEX_MOD_DISP_COLOR0		BIT(18)
-#define MUTEX_MOD_DISP_COLOR1		BIT(19)
-#define MUTEX_MOD_DISP_AAL		BIT(20)
-#define MUTEX_MOD_DISP_GAMMA		BIT(21)
-#define MUTEX_MOD_DISP_UFOE		BIT(22)
-#define MUTEX_MOD_DISP_PWM0		BIT(23)
-#define MUTEX_MOD_DISP_PWM1		BIT(24)
-#define MUTEX_MOD_DISP_OD		BIT(25)
+#define INT_MUTEX				BIT(1)
+
+#define MT8173_MUTEX_MOD_DISP_OVL0		BIT(11)
+#define MT8173_MUTEX_MOD_DISP_OVL1		BIT(12)
+#define MT8173_MUTEX_MOD_DISP_RDMA0		BIT(13)
+#define MT8173_MUTEX_MOD_DISP_RDMA1		BIT(14)
+#define MT8173_MUTEX_MOD_DISP_RDMA2		BIT(15)
+#define MT8173_MUTEX_MOD_DISP_WDMA0		BIT(16)
+#define MT8173_MUTEX_MOD_DISP_WDMA1		BIT(17)
+#define MT8173_MUTEX_MOD_DISP_COLOR0		BIT(18)
+#define MT8173_MUTEX_MOD_DISP_COLOR1		BIT(19)
+#define MT8173_MUTEX_MOD_DISP_AAL		BIT(20)
+#define MT8173_MUTEX_MOD_DISP_GAMMA		BIT(21)
+#define MT8173_MUTEX_MOD_DISP_UFOE		BIT(22)
+#define MT8173_MUTEX_MOD_DISP_PWM0		BIT(23)
+#define MT8173_MUTEX_MOD_DISP_PWM1		BIT(24)
+#define MT8173_MUTEX_MOD_DISP_OD		BIT(25)
+
+#define MT2701_MUTEX_MOD_DISP_OVL		BIT(3)
+#define MT2701_MUTEX_MOD_DISP_WDMA		BIT(6)
+#define MT2701_MUTEX_MOD_DISP_COLOR		BIT(7)
+#define MT2701_MUTEX_MOD_DISP_BLS		BIT(9)
+#define MT2701_MUTEX_MOD_DISP_RDMA0		BIT(10)
+#define MT2701_MUTEX_MOD_DISP_RDMA1		BIT(12)
 
 #define MUTEX_SOF_SINGLE_MODE		0
 #define MUTEX_SOF_DSI0			1
@@ -67,6 +82,10 @@
 #define DPI0_SEL_IN_RDMA1		0x1
 #define COLOR1_SEL_IN_OVL1		0x1
 
+#define OVL_MOUT_EN_RDMA		0x1
+#define BLS_TO_DSI_RDMA1_TO_DPI1	0x8
+#define DSI_SEL_IN_BLS			0x0
+
 struct mtk_disp_mutex {
 	int id;
 	bool claimed;
@@ -77,24 +96,34 @@ struct mtk_ddp {
 	struct clk			*clk;
 	void __iomem			*regs;
 	struct mtk_disp_mutex		mutex[10];
+	const unsigned int		*mutex_mod;
+};
+
+static const unsigned int mt2701_mutex_mod[DDP_COMPONENT_ID_MAX] = {
+	[DDP_COMPONENT_BLS] = MT2701_MUTEX_MOD_DISP_BLS,
+	[DDP_COMPONENT_COLOR0] = MT2701_MUTEX_MOD_DISP_COLOR,
+	[DDP_COMPONENT_OVL0] = MT2701_MUTEX_MOD_DISP_OVL,
+	[DDP_COMPONENT_RDMA0] = MT2701_MUTEX_MOD_DISP_RDMA0,
+	[DDP_COMPONENT_RDMA1] = MT2701_MUTEX_MOD_DISP_RDMA1,
+	[DDP_COMPONENT_WDMA0] = MT2701_MUTEX_MOD_DISP_WDMA,
 };
 
-static const unsigned int mutex_mod[DDP_COMPONENT_ID_MAX] = {
-	[DDP_COMPONENT_AAL] = MUTEX_MOD_DISP_AAL,
-	[DDP_COMPONENT_COLOR0] = MUTEX_MOD_DISP_COLOR0,
-	[DDP_COMPONENT_COLOR1] = MUTEX_MOD_DISP_COLOR1,
-	[DDP_COMPONENT_GAMMA] = MUTEX_MOD_DISP_GAMMA,
-	[DDP_COMPONENT_OD] = MUTEX_MOD_DISP_OD,
-	[DDP_COMPONENT_OVL0] = MUTEX_MOD_DISP_OVL0,
-	[DDP_COMPONENT_OVL1] = MUTEX_MOD_DISP_OVL1,
-	[DDP_COMPONENT_PWM0] = MUTEX_MOD_DISP_PWM0,
-	[DDP_COMPONENT_PWM1] = MUTEX_MOD_DISP_PWM1,
-	[DDP_COMPONENT_RDMA0] = MUTEX_MOD_DISP_RDMA0,
-	[DDP_COMPONENT_RDMA1] = MUTEX_MOD_DISP_RDMA1,
-	[DDP_COMPONENT_RDMA2] = MUTEX_MOD_DISP_RDMA2,
-	[DDP_COMPONENT_UFOE] = MUTEX_MOD_DISP_UFOE,
-	[DDP_COMPONENT_WDMA0] = MUTEX_MOD_DISP_WDMA0,
-	[DDP_COMPONENT_WDMA1] = MUTEX_MOD_DISP_WDMA1,
+static const unsigned int mt8173_mutex_mod[DDP_COMPONENT_ID_MAX] = {
+	[DDP_COMPONENT_AAL] = MT8173_MUTEX_MOD_DISP_AAL,
+	[DDP_COMPONENT_COLOR0] = MT8173_MUTEX_MOD_DISP_COLOR0,
+	[DDP_COMPONENT_COLOR1] = MT8173_MUTEX_MOD_DISP_COLOR1,
+	[DDP_COMPONENT_GAMMA] = MT8173_MUTEX_MOD_DISP_GAMMA,
+	[DDP_COMPONENT_OD] = MT8173_MUTEX_MOD_DISP_OD,
+	[DDP_COMPONENT_OVL0] = MT8173_MUTEX_MOD_DISP_OVL0,
+	[DDP_COMPONENT_OVL1] = MT8173_MUTEX_MOD_DISP_OVL1,
+	[DDP_COMPONENT_PWM0] = MT8173_MUTEX_MOD_DISP_PWM0,
+	[DDP_COMPONENT_PWM1] = MT8173_MUTEX_MOD_DISP_PWM1,
+	[DDP_COMPONENT_RDMA0] = MT8173_MUTEX_MOD_DISP_RDMA0,
+	[DDP_COMPONENT_RDMA1] = MT8173_MUTEX_MOD_DISP_RDMA1,
+	[DDP_COMPONENT_RDMA2] = MT8173_MUTEX_MOD_DISP_RDMA2,
+	[DDP_COMPONENT_UFOE] = MT8173_MUTEX_MOD_DISP_UFOE,
+	[DDP_COMPONENT_WDMA0] = MT8173_MUTEX_MOD_DISP_WDMA0,
+	[DDP_COMPONENT_WDMA1] = MT8173_MUTEX_MOD_DISP_WDMA1,
 };
 
 static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur,
@@ -106,6 +135,9 @@ static unsigned int mtk_ddp_mout_en(enum mtk_ddp_comp_id cur,
 	if (cur == DDP_COMPONENT_OVL0 && next == DDP_COMPONENT_COLOR0) {
 		*addr = DISP_REG_CONFIG_DISP_OVL0_MOUT_EN;
 		value = OVL0_MOUT_EN_COLOR0;
+	} else if (cur == DDP_COMPONENT_OVL0 && next == DDP_COMPONENT_RDMA0) {
+		*addr = DISP_REG_CONFIG_DISP_OVL_MOUT_EN;
+		value = OVL_MOUT_EN_RDMA;
 	} else if (cur == DDP_COMPONENT_OD && next == DDP_COMPONENT_RDMA0) {
 		*addr = DISP_REG_CONFIG_DISP_OD_MOUT_EN;
 		value = OD_MOUT_EN_RDMA0;
@@ -143,6 +175,9 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur,
 	} else if (cur == DDP_COMPONENT_OVL1 && next == DDP_COMPONENT_COLOR1) {
 		*addr = DISP_REG_CONFIG_DISP_COLOR1_SEL_IN;
 		value = COLOR1_SEL_IN_OVL1;
+	} else if (cur == DDP_COMPONENT_BLS && next == DDP_COMPONENT_DSI0) {
+		*addr = DISP_REG_CONFIG_DSI_SEL;
+		value = DSI_SEL_IN_BLS;
 	} else {
 		value = 0;
 	}
@@ -150,6 +185,15 @@ static unsigned int mtk_ddp_sel_in(enum mtk_ddp_comp_id cur,
 	return value;
 }
 
+static void mtk_ddp_sout_sel(void __iomem *config_regs,
+			     enum mtk_ddp_comp_id cur,
+			     enum mtk_ddp_comp_id next)
+{
+	if (cur == DDP_COMPONENT_BLS && next == DDP_COMPONENT_DSI0)
+		writel_relaxed(BLS_TO_DSI_RDMA1_TO_DPI1,
+			       config_regs + DISP_REG_CONFIG_OUT_SEL);
+}
+
 void mtk_ddp_add_comp_to_path(void __iomem *config_regs,
 			      enum mtk_ddp_comp_id cur,
 			      enum mtk_ddp_comp_id next)
@@ -162,6 +206,8 @@ void mtk_ddp_add_comp_to_path(void __iomem *config_regs,
 		writel_relaxed(reg, config_regs + addr);
 	}
 
+	mtk_ddp_sout_sel(config_regs, cur, next);
+
 	value = mtk_ddp_sel_in(cur, next, &addr);
 	if (value) {
 		reg = readl_relaxed(config_regs + addr) | value;
@@ -247,7 +293,7 @@ void mtk_disp_mutex_add_comp(struct mtk_disp_mutex *mutex,
 		break;
 	default:
 		reg = readl_relaxed(ddp->regs + DISP_REG_MUTEX_MOD(mutex->id));
-		reg |= mutex_mod[id];
+		reg |= ddp->mutex_mod[id];
 		writel_relaxed(reg, ddp->regs + DISP_REG_MUTEX_MOD(mutex->id));
 		return;
 	}
@@ -273,7 +319,7 @@ void mtk_disp_mutex_remove_comp(struct mtk_disp_mutex *mutex,
 		break;
 	default:
 		reg = readl_relaxed(ddp->regs + DISP_REG_MUTEX_MOD(mutex->id));
-		reg &= ~mutex_mod[id];
+		reg &= ~(ddp->mutex_mod[id]);
 		writel_relaxed(reg, ddp->regs + DISP_REG_MUTEX_MOD(mutex->id));
 		break;
 	}
@@ -299,6 +345,27 @@ void mtk_disp_mutex_disable(struct mtk_disp_mutex *mutex)
 	writel(0, ddp->regs + DISP_REG_MUTEX_EN(mutex->id));
 }
 
+void mtk_disp_mutex_acquire(struct mtk_disp_mutex *mutex)
+{
+	struct mtk_ddp *ddp = container_of(mutex, struct mtk_ddp,
+					   mutex[mutex->id]);
+	u32 tmp;
+
+	writel(1, ddp->regs + DISP_REG_MUTEX_EN(mutex->id));
+	writel(1, ddp->regs + DISP_REG_MUTEX(mutex->id));
+	if (readl_poll_timeout_atomic(ddp->regs + DISP_REG_MUTEX(mutex->id),
+				      tmp, tmp & INT_MUTEX, 1, 10000))
+		pr_err("could not acquire mutex %d\n", mutex->id);
+}
+
+void mtk_disp_mutex_release(struct mtk_disp_mutex *mutex)
+{
+	struct mtk_ddp *ddp = container_of(mutex, struct mtk_ddp,
+					   mutex[mutex->id]);
+
+	writel(0, ddp->regs + DISP_REG_MUTEX(mutex->id));
+}
+
 static int mtk_ddp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
@@ -326,6 +393,8 @@ static int mtk_ddp_probe(struct platform_device *pdev)
 		return PTR_ERR(ddp->regs);
 	}
 
+	ddp->mutex_mod = of_device_get_match_data(dev);
+
 	platform_set_drvdata(pdev, ddp);
 
 	return 0;
@@ -337,7 +406,8 @@ static int mtk_ddp_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id ddp_driver_dt_match[] = {
-	{ .compatible = "mediatek,mt8173-disp-mutex" },
+	{ .compatible = "mediatek,mt2701-disp-mutex", .data = mt2701_mutex_mod},
+	{ .compatible = "mediatek,mt8173-disp-mutex", .data = mt8173_mutex_mod},
 	{},
 };
 MODULE_DEVICE_TABLE(of, ddp_driver_dt_match);
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp.h
index 92c11752ff65..f9a799168077 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp.h
@@ -37,5 +37,7 @@ void mtk_disp_mutex_remove_comp(struct mtk_disp_mutex *mutex,
 				enum mtk_ddp_comp_id id);
 void mtk_disp_mutex_unprepare(struct mtk_disp_mutex *mutex);
 void mtk_disp_mutex_put(struct mtk_disp_mutex *mutex);
+void mtk_disp_mutex_acquire(struct mtk_disp_mutex *mutex);
+void mtk_disp_mutex_release(struct mtk_disp_mutex *mutex);
 
 #endif /* MTK_DRM_DDP_H */
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
index 48cc01fd20c7..8b52416b6e41 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.c
@@ -39,9 +39,11 @@
 #define DISP_REG_UFO_START			0x0000
 
 #define DISP_COLOR_CFG_MAIN			0x0400
-#define DISP_COLOR_START			0x0c00
-#define DISP_COLOR_WIDTH			0x0c50
-#define DISP_COLOR_HEIGHT			0x0c54
+#define DISP_COLOR_START_MT2701			0x0f00
+#define DISP_COLOR_START_MT8173			0x0c00
+#define DISP_COLOR_START(comp)			((comp)->data->color_offset)
+#define DISP_COLOR_WIDTH(comp)			(DISP_COLOR_START(comp) + 0x50)
+#define DISP_COLOR_HEIGHT(comp)			(DISP_COLOR_START(comp) + 0x54)
 
 #define DISP_AAL_EN				0x0000
 #define DISP_AAL_SIZE				0x0030
@@ -80,6 +82,20 @@
 #define DITHER_ADD_LSHIFT_G(x)			(((x) & 0x7) << 4)
 #define DITHER_ADD_RSHIFT_G(x)			(((x) & 0x7) << 0)
 
+struct mtk_disp_color_data {
+	unsigned int color_offset;
+};
+
+struct mtk_disp_color {
+	struct mtk_ddp_comp			ddp_comp;
+	const struct mtk_disp_color_data	*data;
+};
+
+static inline struct mtk_disp_color *comp_to_color(struct mtk_ddp_comp *comp)
+{
+	return container_of(comp, struct mtk_disp_color, ddp_comp);
+}
+
 void mtk_dither_set(struct mtk_ddp_comp *comp, unsigned int bpc,
 		    unsigned int CFG)
 {
@@ -107,15 +123,19 @@ static void mtk_color_config(struct mtk_ddp_comp *comp, unsigned int w,
 			     unsigned int h, unsigned int vrefresh,
 			     unsigned int bpc)
 {
-	writel(w, comp->regs + DISP_COLOR_WIDTH);
-	writel(h, comp->regs + DISP_COLOR_HEIGHT);
+	struct mtk_disp_color *color = comp_to_color(comp);
+
+	writel(w, comp->regs + DISP_COLOR_WIDTH(color));
+	writel(h, comp->regs + DISP_COLOR_HEIGHT(color));
 }
 
 static void mtk_color_start(struct mtk_ddp_comp *comp)
 {
+	struct mtk_disp_color *color = comp_to_color(comp);
+
 	writel(COLOR_BYPASS_ALL | COLOR_SEQ_SEL,
 	       comp->regs + DISP_COLOR_CFG_MAIN);
-	writel(0x1, comp->regs + DISP_COLOR_START);
+	writel(0x1, comp->regs + DISP_COLOR_START(color));
 }
 
 static void mtk_od_config(struct mtk_ddp_comp *comp, unsigned int w,
@@ -236,6 +256,7 @@ static const char * const mtk_ddp_comp_stem[MTK_DDP_COMP_TYPE_MAX] = {
 	[MTK_DISP_PWM] = "pwm",
 	[MTK_DISP_MUTEX] = "mutex",
 	[MTK_DISP_OD] = "od",
+	[MTK_DISP_BLS] = "bls",
 };
 
 struct mtk_ddp_comp_match {
@@ -246,6 +267,7 @@ struct mtk_ddp_comp_match {
 
 static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_AAL]	= { MTK_DISP_AAL,	0, &ddp_aal },
+	[DDP_COMPONENT_BLS]	= { MTK_DISP_BLS,	0, NULL },
 	[DDP_COMPONENT_COLOR0]	= { MTK_DISP_COLOR,	0, &ddp_color },
 	[DDP_COMPONENT_COLOR1]	= { MTK_DISP_COLOR,	1, &ddp_color },
 	[DDP_COMPONENT_DPI0]	= { MTK_DPI,		0, NULL },
@@ -264,6 +286,22 @@ static const struct mtk_ddp_comp_match mtk_ddp_matches[DDP_COMPONENT_ID_MAX] = {
 	[DDP_COMPONENT_WDMA1]	= { MTK_DISP_WDMA,	1, NULL },
 };
 
+static const struct mtk_disp_color_data mt2701_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT2701,
+};
+
+static const struct mtk_disp_color_data mt8173_color_driver_data = {
+	.color_offset = DISP_COLOR_START_MT8173,
+};
+
+static const struct of_device_id mtk_disp_color_driver_dt_match[] = {
+	{ .compatible = "mediatek,mt2701-disp-color",
+	  .data = &mt2701_color_driver_data},
+	{ .compatible = "mediatek,mt8173-disp-color",
+	  .data = &mt8173_color_driver_data},
+	{},
+};
+
 int mtk_ddp_comp_get_id(struct device_node *node,
 			enum mtk_ddp_comp_type comp_type)
 {
@@ -286,14 +324,29 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 	enum mtk_ddp_comp_type type;
 	struct device_node *larb_node;
 	struct platform_device *larb_pdev;
+	const struct of_device_id *match;
+	struct mtk_disp_color *color;
 
 	if (comp_id < 0 || comp_id >= DDP_COMPONENT_ID_MAX)
 		return -EINVAL;
 
+	type = mtk_ddp_matches[comp_id].type;
+	if (type == MTK_DISP_COLOR) {
+		devm_kfree(dev, comp);
+		color = devm_kzalloc(dev, sizeof(*color), GFP_KERNEL);
+		if (!color)
+			return -ENOMEM;
+
+		match = of_match_node(mtk_disp_color_driver_dt_match, node);
+		color->data = match->data;
+		comp = &color->ddp_comp;
+	}
+
 	comp->id = comp_id;
 	comp->funcs = funcs ?: mtk_ddp_matches[comp_id].funcs;
 
-	if (comp_id == DDP_COMPONENT_DPI0 ||
+	if (comp_id == DDP_COMPONENT_BLS ||
+	    comp_id == DDP_COMPONENT_DPI0 ||
 	    comp_id == DDP_COMPONENT_DSI0 ||
 	    comp_id == DDP_COMPONENT_PWM0) {
 		comp->regs = NULL;
@@ -308,8 +361,6 @@ int mtk_ddp_comp_init(struct device *dev, struct device_node *node,
 	if (IS_ERR(comp->clk))
 		comp->clk = NULL;
 
-	type = mtk_ddp_matches[comp_id].type;
-
 	/* Only DMA capable components need the LARB property */
 	comp->larb_dev = NULL;
 	if (type != MTK_DISP_OVL &&
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
index 22a33ee451c4..0828cf8bf85c 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_ddp_comp.h
@@ -36,11 +36,13 @@ enum mtk_ddp_comp_type {
 	MTK_DISP_PWM,
 	MTK_DISP_MUTEX,
 	MTK_DISP_OD,
+	MTK_DISP_BLS,
 	MTK_DDP_COMP_TYPE_MAX,
 };
 
 enum mtk_ddp_comp_id {
 	DDP_COMPONENT_AAL,
+	DDP_COMPONENT_BLS,
 	DDP_COMPONENT_COLOR0,
 	DDP_COMPONENT_COLOR1,
 	DDP_COMPONENT_DPI0,
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.c b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
index f5a1fd9b3ecc..f6c8ec4c7dbc 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.c
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.c
@@ -128,7 +128,20 @@ static const struct drm_mode_config_funcs mtk_drm_mode_config_funcs = {
 	.atomic_commit = mtk_atomic_commit,
 };
 
-static const enum mtk_ddp_comp_id mtk_ddp_main[] = {
+static const enum mtk_ddp_comp_id mt2701_mtk_ddp_main[] = {
+	DDP_COMPONENT_OVL0,
+	DDP_COMPONENT_RDMA0,
+	DDP_COMPONENT_COLOR0,
+	DDP_COMPONENT_BLS,
+	DDP_COMPONENT_DSI0,
+};
+
+static const enum mtk_ddp_comp_id mt2701_mtk_ddp_ext[] = {
+	DDP_COMPONENT_RDMA1,
+	DDP_COMPONENT_DPI0,
+};
+
+static const enum mtk_ddp_comp_id mt8173_mtk_ddp_main[] = {
 	DDP_COMPONENT_OVL0,
 	DDP_COMPONENT_COLOR0,
 	DDP_COMPONENT_AAL,
@@ -139,7 +152,7 @@ static const enum mtk_ddp_comp_id mtk_ddp_main[] = {
 	DDP_COMPONENT_PWM0,
 };
 
-static const enum mtk_ddp_comp_id mtk_ddp_ext[] = {
+static const enum mtk_ddp_comp_id mt8173_mtk_ddp_ext[] = {
 	DDP_COMPONENT_OVL1,
 	DDP_COMPONENT_COLOR1,
 	DDP_COMPONENT_GAMMA,
@@ -147,6 +160,21 @@ static const enum mtk_ddp_comp_id mtk_ddp_ext[] = {
 	DDP_COMPONENT_DPI0,
 };
 
+static const struct mtk_mmsys_driver_data mt2701_mmsys_driver_data = {
+	.main_path = mt2701_mtk_ddp_main,
+	.main_len = ARRAY_SIZE(mt2701_mtk_ddp_main),
+	.ext_path = mt2701_mtk_ddp_ext,
+	.ext_len = ARRAY_SIZE(mt2701_mtk_ddp_ext),
+	.shadow_register = true,
+};
+
+static const struct mtk_mmsys_driver_data mt8173_mmsys_driver_data = {
+	.main_path = mt8173_mtk_ddp_main,
+	.main_len = ARRAY_SIZE(mt8173_mtk_ddp_main),
+	.ext_path = mt8173_mtk_ddp_ext,
+	.ext_len = ARRAY_SIZE(mt8173_mtk_ddp_ext),
+};
+
 static int mtk_drm_kms_init(struct drm_device *drm)
 {
 	struct mtk_drm_private *private = drm->dev_private;
@@ -189,17 +217,19 @@ static int mtk_drm_kms_init(struct drm_device *drm)
 	 * and each statically assigned to a crtc:
 	 * OVL0 -> COLOR0 -> AAL -> OD -> RDMA0 -> UFOE -> DSI0 ...
 	 */
-	ret = mtk_drm_crtc_create(drm, mtk_ddp_main, ARRAY_SIZE(mtk_ddp_main));
+	ret = mtk_drm_crtc_create(drm, private->data->main_path,
+				  private->data->main_len);
 	if (ret < 0)
 		goto err_component_unbind;
 	/* ... and OVL1 -> COLOR1 -> GAMMA -> RDMA1 -> DPI0. */
-	ret = mtk_drm_crtc_create(drm, mtk_ddp_ext, ARRAY_SIZE(mtk_ddp_ext));
+	ret = mtk_drm_crtc_create(drm, private->data->ext_path,
+				  private->data->ext_len);
 	if (ret < 0)
 		goto err_component_unbind;
 
 	/* Use OVL device for all DMA memory allocations */
-	np = private->comp_node[mtk_ddp_main[0]] ?:
-	     private->comp_node[mtk_ddp_ext[0]];
+	np = private->comp_node[private->data->main_path[0]] ?:
+	     private->comp_node[private->data->ext_path[0]];
 	pdev = of_find_device_by_node(np);
 	if (!pdev) {
 		ret = -ENODEV;
@@ -328,16 +358,22 @@ static const struct component_master_ops mtk_drm_ops = {
 };
 
 static const struct of_device_id mtk_ddp_comp_dt_ids[] = {
+	{ .compatible = "mediatek,mt2701-disp-ovl",   .data = (void *)MTK_DISP_OVL },
 	{ .compatible = "mediatek,mt8173-disp-ovl",   .data = (void *)MTK_DISP_OVL },
+	{ .compatible = "mediatek,mt2701-disp-rdma",  .data = (void *)MTK_DISP_RDMA },
 	{ .compatible = "mediatek,mt8173-disp-rdma",  .data = (void *)MTK_DISP_RDMA },
 	{ .compatible = "mediatek,mt8173-disp-wdma",  .data = (void *)MTK_DISP_WDMA },
+	{ .compatible = "mediatek,mt2701-disp-color", .data = (void *)MTK_DISP_COLOR },
 	{ .compatible = "mediatek,mt8173-disp-color", .data = (void *)MTK_DISP_COLOR },
 	{ .compatible = "mediatek,mt8173-disp-aal",   .data = (void *)MTK_DISP_AAL},
 	{ .compatible = "mediatek,mt8173-disp-gamma", .data = (void *)MTK_DISP_GAMMA, },
 	{ .compatible = "mediatek,mt8173-disp-ufoe",  .data = (void *)MTK_DISP_UFOE },
+	{ .compatible = "mediatek,mt2701-dsi",	      .data = (void *)MTK_DSI },
 	{ .compatible = "mediatek,mt8173-dsi",        .data = (void *)MTK_DSI },
 	{ .compatible = "mediatek,mt8173-dpi",        .data = (void *)MTK_DPI },
+	{ .compatible = "mediatek,mt2701-disp-mutex", .data = (void *)MTK_DISP_MUTEX },
 	{ .compatible = "mediatek,mt8173-disp-mutex", .data = (void *)MTK_DISP_MUTEX },
+	{ .compatible = "mediatek,mt2701-disp-pwm",   .data = (void *)MTK_DISP_BLS },
 	{ .compatible = "mediatek,mt8173-disp-pwm",   .data = (void *)MTK_DISP_PWM },
 	{ .compatible = "mediatek,mt8173-disp-od",    .data = (void *)MTK_DISP_OD },
 	{ }
@@ -359,6 +395,7 @@ static int mtk_drm_probe(struct platform_device *pdev)
 
 	mutex_init(&private->commit.lock);
 	INIT_WORK(&private->commit.work, mtk_atomic_work);
+	private->data = of_device_get_match_data(dev);
 
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	private->config_regs = devm_ioremap_resource(dev, mem);
@@ -510,7 +547,10 @@ static SIMPLE_DEV_PM_OPS(mtk_drm_pm_ops, mtk_drm_sys_suspend,
 			 mtk_drm_sys_resume);
 
 static const struct of_device_id mtk_drm_of_ids[] = {
-	{ .compatible = "mediatek,mt8173-mmsys", },
+	{ .compatible = "mediatek,mt2701-mmsys",
+	  .data = &mt2701_mmsys_driver_data},
+	{ .compatible = "mediatek,mt8173-mmsys",
+	  .data = &mt8173_mmsys_driver_data},
 	{ }
 };
 
diff --git a/drivers/gpu/drm/mediatek/mtk_drm_drv.h b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
index df322a7a5fcb..aef8747d810b 100644
--- a/drivers/gpu/drm/mediatek/mtk_drm_drv.h
+++ b/drivers/gpu/drm/mediatek/mtk_drm_drv.h
@@ -28,6 +28,14 @@ struct drm_fb_helper;
 struct drm_property;
 struct regmap;
 
+struct mtk_mmsys_driver_data {
+	const enum mtk_ddp_comp_id *main_path;
+	unsigned int main_len;
+	const enum mtk_ddp_comp_id *ext_path;
+	unsigned int ext_len;
+	bool shadow_register;
+};
+
 struct mtk_drm_private {
 	struct drm_device *drm;
 	struct device *dma_dev;
@@ -39,6 +47,7 @@ struct mtk_drm_private {
 	void __iomem *config_regs;
 	struct device_node *comp_node[DDP_COMPONENT_ID_MAX];
 	struct mtk_ddp_comp *ddp_comp[DDP_COMPONENT_ID_MAX];
+	const struct mtk_mmsys_driver_data *data;
 
 	struct {
 		struct drm_atomic_state *state;
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index dd71cbb1a622..808b995a990f 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -16,22 +16,31 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_mipi_dsi.h>
 #include <drm/drm_panel.h>
+#include <drm/drm_of.h>
 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/irq.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
-#include <linux/of_graph.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
+#include <video/mipi_display.h>
 #include <video/videomode.h>
 
 #include "mtk_drm_ddp_comp.h"
 
-#define DSI_VIDEO_FIFO_DEPTH	(1920 / 4)
-#define DSI_HOST_FIFO_DEPTH	64
-
 #define DSI_START		0x00
 
+#define DSI_INTEN		0x08
+
+#define DSI_INTSTA		0x0c
+#define LPRX_RD_RDY_INT_FLAG		BIT(0)
+#define CMD_DONE_INT_FLAG		BIT(1)
+#define TE_RDY_INT_FLAG			BIT(2)
+#define VM_DONE_INT_FLAG		BIT(3)
+#define EXT_TE_RDY_INT_FLAG		BIT(4)
+#define DSI_BUSY			BIT(31)
+
 #define DSI_CON_CTRL		0x10
 #define DSI_RESET			BIT(0)
 #define DSI_EN				BIT(1)
@@ -46,7 +55,7 @@
 #define MIX_MODE			BIT(17)
 
 #define DSI_TXRX_CTRL		0x18
-#define VC_NUM				(2 << 0)
+#define VC_NUM				BIT(1)
 #define LANE_NUM			(0xf << 2)
 #define DIS_EOT				BIT(6)
 #define NULL_EN				BIT(7)
@@ -72,8 +81,19 @@
 #define DSI_HBP_WC		0x54
 #define DSI_HFP_WC		0x58
 
+#define DSI_CMDQ_SIZE		0x60
+#define CMDQ_SIZE			0x3f
+
 #define DSI_HSTX_CKL_WC		0x64
 
+#define DSI_RX_DATA0		0x74
+#define DSI_RX_DATA1		0x78
+#define DSI_RX_DATA2		0x7c
+#define DSI_RX_DATA3		0x80
+
+#define DSI_RACK		0x84
+#define RACK				BIT(0)
+
 #define DSI_PHY_LCCON		0x104
 #define LC_HS_TX_EN			BIT(0)
 #define LC_ULPM_EN			BIT(1)
@@ -106,6 +126,19 @@
 #define CLK_HS_POST			(0xff << 8)
 #define CLK_HS_EXIT			(0xff << 16)
 
+#define DSI_VM_CMD_CON		0x130
+#define VM_CMD_EN			BIT(0)
+#define TS_VFP_EN			BIT(5)
+
+#define DSI_CMDQ0		0x180
+#define CONFIG				(0xff << 0)
+#define SHORT_PACKET			0
+#define LONG_PACKET			2
+#define BTA				BIT(2)
+#define DATA_ID				(0xff << 8)
+#define DATA_0				(0xff << 16)
+#define DATA_1				(0xff << 24)
+
 #define T_LPX		5
 #define T_HS_PREP	6
 #define T_HS_TRAIL	8
@@ -114,6 +147,12 @@
 
 #define NS_TO_CYCLE(n, c)    ((n) / (c) + (((n) % (c)) ? 1 : 0))
 
+#define MTK_DSI_HOST_IS_READ(type) \
+	((type == MIPI_DSI_GENERIC_READ_REQUEST_0_PARAM) || \
+	(type == MIPI_DSI_GENERIC_READ_REQUEST_1_PARAM) || \
+	(type == MIPI_DSI_GENERIC_READ_REQUEST_2_PARAM) || \
+	(type == MIPI_DSI_DCS_READ))
+
 struct phy;
 
 struct mtk_dsi {
@@ -140,6 +179,8 @@ struct mtk_dsi {
 	struct videomode vm;
 	int refcount;
 	bool enabled;
+	u32 irq_data;
+	wait_queue_head_t irq_wait_queue;
 };
 
 static inline struct mtk_dsi *encoder_to_dsi(struct drm_encoder *e)
@@ -164,7 +205,7 @@ static void mtk_dsi_mask(struct mtk_dsi *dsi, u32 offset, u32 mask, u32 data)
 	writel((temp & ~mask) | (data & mask), dsi->regs + offset);
 }
 
-static void dsi_phy_timconfig(struct mtk_dsi *dsi)
+static void mtk_dsi_phy_timconfig(struct mtk_dsi *dsi)
 {
 	u32 timcon0, timcon1, timcon2, timcon3;
 	u32 ui, cycle_time;
@@ -196,118 +237,39 @@ static void mtk_dsi_disable(struct mtk_dsi *dsi)
 	mtk_dsi_mask(dsi, DSI_CON_CTRL, DSI_EN, 0);
 }
 
-static void mtk_dsi_reset(struct mtk_dsi *dsi)
+static void mtk_dsi_reset_engine(struct mtk_dsi *dsi)
 {
 	mtk_dsi_mask(dsi, DSI_CON_CTRL, DSI_RESET, DSI_RESET);
 	mtk_dsi_mask(dsi, DSI_CON_CTRL, DSI_RESET, 0);
 }
 
-static int mtk_dsi_poweron(struct mtk_dsi *dsi)
-{
-	struct device *dev = dsi->dev;
-	int ret;
-	u64 pixel_clock, total_bits;
-	u32 htotal, htotal_bits, bit_per_pixel, overhead_cycles, overhead_bits;
-
-	if (++dsi->refcount != 1)
-		return 0;
-
-	switch (dsi->format) {
-	case MIPI_DSI_FMT_RGB565:
-		bit_per_pixel = 16;
-		break;
-	case MIPI_DSI_FMT_RGB666_PACKED:
-		bit_per_pixel = 18;
-		break;
-	case MIPI_DSI_FMT_RGB666:
-	case MIPI_DSI_FMT_RGB888:
-	default:
-		bit_per_pixel = 24;
-		break;
-	}
-
-	/**
-	 * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
-	 * htotal_time = htotal * byte_per_pixel / num_lanes
-	 * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
-	 * mipi_ratio = (htotal_time + overhead_time) / htotal_time
-	 * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
-	 */
-	pixel_clock = dsi->vm.pixelclock * 1000;
-	htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
-			dsi->vm.hsync_len;
-	htotal_bits = htotal * bit_per_pixel;
-
-	overhead_cycles = T_LPX + T_HS_PREP + T_HS_ZERO + T_HS_TRAIL +
-			T_HS_EXIT;
-	overhead_bits = overhead_cycles * dsi->lanes * 8;
-	total_bits = htotal_bits + overhead_bits;
-
-	dsi->data_rate = DIV_ROUND_UP_ULL(pixel_clock * total_bits,
-					  htotal * dsi->lanes);
-
-	ret = clk_set_rate(dsi->hs_clk, dsi->data_rate);
-	if (ret < 0) {
-		dev_err(dev, "Failed to set data rate: %d\n", ret);
-		goto err_refcount;
-	}
-
-	phy_power_on(dsi->phy);
-
-	ret = clk_prepare_enable(dsi->engine_clk);
-	if (ret < 0) {
-		dev_err(dev, "Failed to enable engine clock: %d\n", ret);
-		goto err_phy_power_off;
-	}
-
-	ret = clk_prepare_enable(dsi->digital_clk);
-	if (ret < 0) {
-		dev_err(dev, "Failed to enable digital clock: %d\n", ret);
-		goto err_disable_engine_clk;
-	}
-
-	mtk_dsi_enable(dsi);
-	mtk_dsi_reset(dsi);
-	dsi_phy_timconfig(dsi);
-
-	return 0;
-
-err_disable_engine_clk:
-	clk_disable_unprepare(dsi->engine_clk);
-err_phy_power_off:
-	phy_power_off(dsi->phy);
-err_refcount:
-	dsi->refcount--;
-	return ret;
-}
-
-static void dsi_clk_ulp_mode_enter(struct mtk_dsi *dsi)
+static void mtk_dsi_clk_ulp_mode_enter(struct mtk_dsi *dsi)
 {
 	mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_HS_TX_EN, 0);
 	mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_ULPM_EN, 0);
 }
 
-static void dsi_clk_ulp_mode_leave(struct mtk_dsi *dsi)
+static void mtk_dsi_clk_ulp_mode_leave(struct mtk_dsi *dsi)
 {
 	mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_ULPM_EN, 0);
 	mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_WAKEUP_EN, LC_WAKEUP_EN);
 	mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_WAKEUP_EN, 0);
 }
 
-static void dsi_lane0_ulp_mode_enter(struct mtk_dsi *dsi)
+static void mtk_dsi_lane0_ulp_mode_enter(struct mtk_dsi *dsi)
 {
 	mtk_dsi_mask(dsi, DSI_PHY_LD0CON, LD0_HS_TX_EN, 0);
 	mtk_dsi_mask(dsi, DSI_PHY_LD0CON, LD0_ULPM_EN, 0);
 }
 
-static void dsi_lane0_ulp_mode_leave(struct mtk_dsi *dsi)
+static void mtk_dsi_lane0_ulp_mode_leave(struct mtk_dsi *dsi)
 {
 	mtk_dsi_mask(dsi, DSI_PHY_LD0CON, LD0_ULPM_EN, 0);
 	mtk_dsi_mask(dsi, DSI_PHY_LD0CON, LD0_WAKEUP_EN, LD0_WAKEUP_EN);
 	mtk_dsi_mask(dsi, DSI_PHY_LD0CON, LD0_WAKEUP_EN, 0);
 }
 
-static bool dsi_clk_hs_state(struct mtk_dsi *dsi)
+static bool mtk_dsi_clk_hs_state(struct mtk_dsi *dsi)
 {
 	u32 tmp_reg1;
 
@@ -315,30 +277,37 @@ static bool dsi_clk_hs_state(struct mtk_dsi *dsi)
 	return ((tmp_reg1 & LC_HS_TX_EN) == 1) ? true : false;
 }
 
-static void dsi_clk_hs_mode(struct mtk_dsi *dsi, bool enter)
+static void mtk_dsi_clk_hs_mode(struct mtk_dsi *dsi, bool enter)
 {
-	if (enter && !dsi_clk_hs_state(dsi))
+	if (enter && !mtk_dsi_clk_hs_state(dsi))
 		mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_HS_TX_EN, LC_HS_TX_EN);
-	else if (!enter && dsi_clk_hs_state(dsi))
+	else if (!enter && mtk_dsi_clk_hs_state(dsi))
 		mtk_dsi_mask(dsi, DSI_PHY_LCCON, LC_HS_TX_EN, 0);
 }
 
-static void dsi_set_mode(struct mtk_dsi *dsi)
+static void mtk_dsi_set_mode(struct mtk_dsi *dsi)
 {
 	u32 vid_mode = CMD_MODE;
 
 	if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO) {
-		vid_mode = SYNC_PULSE_MODE;
-
-		if ((dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST) &&
-		    !(dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE))
+		if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_BURST)
 			vid_mode = BURST_MODE;
+		else if (dsi->mode_flags & MIPI_DSI_MODE_VIDEO_SYNC_PULSE)
+			vid_mode = SYNC_PULSE_MODE;
+		else
+			vid_mode = SYNC_EVENT_MODE;
 	}
 
 	writel(vid_mode, dsi->regs + DSI_MODE_CTRL);
 }
 
-static void dsi_ps_control_vact(struct mtk_dsi *dsi)
+static void mtk_dsi_set_vm_cmd(struct mtk_dsi *dsi)
+{
+	mtk_dsi_mask(dsi, DSI_VM_CMD_CON, VM_CMD_EN, VM_CMD_EN);
+	mtk_dsi_mask(dsi, DSI_VM_CMD_CON, TS_VFP_EN, TS_VFP_EN);
+}
+
+static void mtk_dsi_ps_control_vact(struct mtk_dsi *dsi)
 {
 	struct videomode *vm = &dsi->vm;
 	u32 dsi_buf_bpp, ps_wc;
@@ -372,7 +341,7 @@ static void dsi_ps_control_vact(struct mtk_dsi *dsi)
 	writel(ps_wc, dsi->regs + DSI_HSTX_CKL_WC);
 }
 
-static void dsi_rxtx_control(struct mtk_dsi *dsi)
+static void mtk_dsi_rxtx_control(struct mtk_dsi *dsi)
 {
 	u32 tmp_reg;
 
@@ -394,12 +363,15 @@ static void dsi_rxtx_control(struct mtk_dsi *dsi)
 		break;
 	}
 
+	tmp_reg |= (dsi->mode_flags & MIPI_DSI_CLOCK_NON_CONTINUOUS) << 6;
+	tmp_reg |= (dsi->mode_flags & MIPI_DSI_MODE_EOT_PACKET) >> 3;
+
 	writel(tmp_reg, dsi->regs + DSI_TXRX_CTRL);
 }
 
-static void dsi_ps_control(struct mtk_dsi *dsi)
+static void mtk_dsi_ps_control(struct mtk_dsi *dsi)
 {
-	unsigned int dsi_tmp_buf_bpp;
+	u32 dsi_tmp_buf_bpp;
 	u32 tmp_reg;
 
 	switch (dsi->format) {
@@ -429,12 +401,12 @@ static void dsi_ps_control(struct mtk_dsi *dsi)
 	writel(tmp_reg, dsi->regs + DSI_PSCTRL);
 }
 
-static void dsi_config_vdo_timing(struct mtk_dsi *dsi)
+static void mtk_dsi_config_vdo_timing(struct mtk_dsi *dsi)
 {
-	unsigned int horizontal_sync_active_byte;
-	unsigned int horizontal_backporch_byte;
-	unsigned int horizontal_frontporch_byte;
-	unsigned int dsi_tmp_buf_bpp;
+	u32 horizontal_sync_active_byte;
+	u32 horizontal_backporch_byte;
+	u32 horizontal_frontporch_byte;
+	u32 dsi_tmp_buf_bpp;
 
 	struct videomode *vm = &dsi->vm;
 
@@ -463,7 +435,7 @@ static void dsi_config_vdo_timing(struct mtk_dsi *dsi)
 	writel(horizontal_backporch_byte, dsi->regs + DSI_HBP_WC);
 	writel(horizontal_frontporch_byte, dsi->regs + DSI_HFP_WC);
 
-	dsi_ps_control(dsi);
+	mtk_dsi_ps_control(dsi);
 }
 
 static void mtk_dsi_start(struct mtk_dsi *dsi)
@@ -472,6 +444,184 @@ static void mtk_dsi_start(struct mtk_dsi *dsi)
 	writel(1, dsi->regs + DSI_START);
 }
 
+static void mtk_dsi_stop(struct mtk_dsi *dsi)
+{
+	writel(0, dsi->regs + DSI_START);
+}
+
+static void mtk_dsi_set_cmd_mode(struct mtk_dsi *dsi)
+{
+	writel(CMD_MODE, dsi->regs + DSI_MODE_CTRL);
+}
+
+static void mtk_dsi_set_interrupt_enable(struct mtk_dsi *dsi)
+{
+	u32 inten = LPRX_RD_RDY_INT_FLAG | CMD_DONE_INT_FLAG | VM_DONE_INT_FLAG;
+
+	writel(inten, dsi->regs + DSI_INTEN);
+}
+
+static void mtk_dsi_irq_data_set(struct mtk_dsi *dsi, u32 irq_bit)
+{
+	dsi->irq_data |= irq_bit;
+}
+
+static void mtk_dsi_irq_data_clear(struct mtk_dsi *dsi, u32 irq_bit)
+{
+	dsi->irq_data &= ~irq_bit;
+}
+
+static s32 mtk_dsi_wait_for_irq_done(struct mtk_dsi *dsi, u32 irq_flag,
+				     unsigned int timeout)
+{
+	s32 ret = 0;
+	unsigned long jiffies = msecs_to_jiffies(timeout);
+
+	ret = wait_event_interruptible_timeout(dsi->irq_wait_queue,
+					       dsi->irq_data & irq_flag,
+					       jiffies);
+	if (ret == 0) {
+		DRM_WARN("Wait DSI IRQ(0x%08x) Timeout\n", irq_flag);
+
+		mtk_dsi_enable(dsi);
+		mtk_dsi_reset_engine(dsi);
+	}
+
+	return ret;
+}
+
+static irqreturn_t mtk_dsi_irq(int irq, void *dev_id)
+{
+	struct mtk_dsi *dsi = dev_id;
+	u32 status, tmp;
+	u32 flag = LPRX_RD_RDY_INT_FLAG | CMD_DONE_INT_FLAG | VM_DONE_INT_FLAG;
+
+	status = readl(dsi->regs + DSI_INTSTA) & flag;
+
+	if (status) {
+		do {
+			mtk_dsi_mask(dsi, DSI_RACK, RACK, RACK);
+			tmp = readl(dsi->regs + DSI_INTSTA);
+		} while (tmp & DSI_BUSY);
+
+		mtk_dsi_mask(dsi, DSI_INTSTA, status, 0);
+		mtk_dsi_irq_data_set(dsi, status);
+		wake_up_interruptible(&dsi->irq_wait_queue);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static s32 mtk_dsi_switch_to_cmd_mode(struct mtk_dsi *dsi, u8 irq_flag, u32 t)
+{
+	mtk_dsi_irq_data_clear(dsi, irq_flag);
+	mtk_dsi_set_cmd_mode(dsi);
+
+	if (!mtk_dsi_wait_for_irq_done(dsi, irq_flag, t)) {
+		DRM_ERROR("failed to switch cmd mode\n");
+		return -ETIME;
+	} else {
+		return 0;
+	}
+}
+
+static int mtk_dsi_poweron(struct mtk_dsi *dsi)
+{
+	struct device *dev = dsi->dev;
+	int ret;
+	u64 pixel_clock, total_bits;
+	u32 htotal, htotal_bits, bit_per_pixel, overhead_cycles, overhead_bits;
+
+	if (++dsi->refcount != 1)
+		return 0;
+
+	switch (dsi->format) {
+	case MIPI_DSI_FMT_RGB565:
+		bit_per_pixel = 16;
+		break;
+	case MIPI_DSI_FMT_RGB666_PACKED:
+		bit_per_pixel = 18;
+		break;
+	case MIPI_DSI_FMT_RGB666:
+	case MIPI_DSI_FMT_RGB888:
+	default:
+		bit_per_pixel = 24;
+		break;
+	}
+
+	/**
+	 * vm.pixelclock is in kHz, pixel_clock unit is Hz, so multiply by 1000
+	 * htotal_time = htotal * byte_per_pixel / num_lanes
+	 * overhead_time = lpx + hs_prepare + hs_zero + hs_trail + hs_exit
+	 * mipi_ratio = (htotal_time + overhead_time) / htotal_time
+	 * data_rate = pixel_clock * bit_per_pixel * mipi_ratio / num_lanes;
+	 */
+	pixel_clock = dsi->vm.pixelclock * 1000;
+	htotal = dsi->vm.hactive + dsi->vm.hback_porch + dsi->vm.hfront_porch +
+			dsi->vm.hsync_len;
+	htotal_bits = htotal * bit_per_pixel;
+
+	overhead_cycles = T_LPX + T_HS_PREP + T_HS_ZERO + T_HS_TRAIL +
+			T_HS_EXIT;
+	overhead_bits = overhead_cycles * dsi->lanes * 8;
+	total_bits = htotal_bits + overhead_bits;
+
+	dsi->data_rate = DIV_ROUND_UP_ULL(pixel_clock * total_bits,
+					  htotal * dsi->lanes);
+
+	ret = clk_set_rate(dsi->hs_clk, dsi->data_rate);
+	if (ret < 0) {
+		dev_err(dev, "Failed to set data rate: %d\n", ret);
+		goto err_refcount;
+	}
+
+	phy_power_on(dsi->phy);
+
+	ret = clk_prepare_enable(dsi->engine_clk);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable engine clock: %d\n", ret);
+		goto err_phy_power_off;
+	}
+
+	ret = clk_prepare_enable(dsi->digital_clk);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable digital clock: %d\n", ret);
+		goto err_disable_engine_clk;
+	}
+
+	mtk_dsi_enable(dsi);
+	mtk_dsi_reset_engine(dsi);
+	mtk_dsi_phy_timconfig(dsi);
+
+	mtk_dsi_rxtx_control(dsi);
+	mtk_dsi_ps_control_vact(dsi);
+	mtk_dsi_set_vm_cmd(dsi);
+	mtk_dsi_config_vdo_timing(dsi);
+	mtk_dsi_set_interrupt_enable(dsi);
+
+	mtk_dsi_clk_ulp_mode_leave(dsi);
+	mtk_dsi_lane0_ulp_mode_leave(dsi);
+	mtk_dsi_clk_hs_mode(dsi, 0);
+
+	if (dsi->panel) {
+		if (drm_panel_prepare(dsi->panel)) {
+			DRM_ERROR("failed to prepare the panel\n");
+			goto err_disable_digital_clk;
+		}
+	}
+
+	return 0;
+err_disable_digital_clk:
+	clk_disable_unprepare(dsi->digital_clk);
+err_disable_engine_clk:
+	clk_disable_unprepare(dsi->engine_clk);
+err_phy_power_off:
+	phy_power_off(dsi->phy);
+err_refcount:
+	dsi->refcount--;
+	return ret;
+}
+
 static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
 {
 	if (WARN_ON(dsi->refcount == 0))
@@ -480,8 +630,18 @@ static void mtk_dsi_poweroff(struct mtk_dsi *dsi)
 	if (--dsi->refcount != 0)
 		return;
 
-	dsi_lane0_ulp_mode_enter(dsi);
-	dsi_clk_ulp_mode_enter(dsi);
+	if (!mtk_dsi_switch_to_cmd_mode(dsi, VM_DONE_INT_FLAG, 500)) {
+		if (dsi->panel) {
+			if (drm_panel_unprepare(dsi->panel)) {
+				DRM_ERROR("failed to unprepare the panel\n");
+				return;
+			}
+		}
+	}
+
+	mtk_dsi_reset_engine(dsi);
+	mtk_dsi_lane0_ulp_mode_enter(dsi);
+	mtk_dsi_clk_ulp_mode_enter(dsi);
 
 	mtk_dsi_disable(dsi);
 
@@ -498,35 +658,30 @@ static void mtk_output_dsi_enable(struct mtk_dsi *dsi)
 	if (dsi->enabled)
 		return;
 
-	if (dsi->panel) {
-		if (drm_panel_prepare(dsi->panel)) {
-			DRM_ERROR("failed to setup the panel\n");
-			return;
-		}
-	}
-
 	ret = mtk_dsi_poweron(dsi);
 	if (ret < 0) {
 		DRM_ERROR("failed to power on dsi\n");
 		return;
 	}
 
-	dsi_rxtx_control(dsi);
-
-	dsi_clk_ulp_mode_leave(dsi);
-	dsi_lane0_ulp_mode_leave(dsi);
-	dsi_clk_hs_mode(dsi, 0);
-	dsi_set_mode(dsi);
-
-	dsi_ps_control_vact(dsi);
-	dsi_config_vdo_timing(dsi);
-
-	dsi_set_mode(dsi);
-	dsi_clk_hs_mode(dsi, 1);
+	mtk_dsi_set_mode(dsi);
+	mtk_dsi_clk_hs_mode(dsi, 1);
 
 	mtk_dsi_start(dsi);
 
+	if (dsi->panel) {
+		if (drm_panel_enable(dsi->panel)) {
+			DRM_ERROR("failed to enable the panel\n");
+			goto err_dsi_power_off;
+		}
+	}
+
 	dsi->enabled = true;
+
+	return;
+err_dsi_power_off:
+	mtk_dsi_stop(dsi);
+	mtk_dsi_poweroff(dsi);
 }
 
 static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
@@ -541,6 +696,7 @@ static void mtk_output_dsi_disable(struct mtk_dsi *dsi)
 		}
 	}
 
+	mtk_dsi_stop(dsi);
 	mtk_dsi_poweroff(dsi);
 
 	dsi->enabled = false;
@@ -742,9 +898,149 @@ static int mtk_dsi_host_detach(struct mipi_dsi_host *host,
 	return 0;
 }
 
+static void mtk_dsi_wait_for_idle(struct mtk_dsi *dsi)
+{
+	u32 timeout_ms = 500000; /* total 1s ~ 2s timeout */
+
+	while (timeout_ms--) {
+		if (!(readl(dsi->regs + DSI_INTSTA) & DSI_BUSY))
+			break;
+
+		usleep_range(2, 4);
+	}
+
+	if (timeout_ms == 0) {
+		DRM_WARN("polling dsi wait not busy timeout!\n");
+
+		mtk_dsi_enable(dsi);
+		mtk_dsi_reset_engine(dsi);
+	}
+}
+
+static u32 mtk_dsi_recv_cnt(u8 type, u8 *read_data)
+{
+	switch (type) {
+	case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_1BYTE:
+	case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_1BYTE:
+		return 1;
+	case MIPI_DSI_RX_GENERIC_SHORT_READ_RESPONSE_2BYTE:
+	case MIPI_DSI_RX_DCS_SHORT_READ_RESPONSE_2BYTE:
+		return 2;
+	case MIPI_DSI_RX_GENERIC_LONG_READ_RESPONSE:
+	case MIPI_DSI_RX_DCS_LONG_READ_RESPONSE:
+		return read_data[1] + read_data[2] * 16;
+	case MIPI_DSI_RX_ACKNOWLEDGE_AND_ERROR_REPORT:
+		DRM_INFO("type is 0x02, try again\n");
+		break;
+	default:
+		DRM_INFO("type(0x%x) cannot be non-recognite\n", type);
+		break;
+	}
+
+	return 0;
+}
+
+static void mtk_dsi_cmdq(struct mtk_dsi *dsi, const struct mipi_dsi_msg *msg)
+{
+	const char *tx_buf = msg->tx_buf;
+	u8 config, cmdq_size, cmdq_off, type = msg->type;
+	u32 reg_val, cmdq_mask, i;
+
+	if (MTK_DSI_HOST_IS_READ(type))
+		config = BTA;
+	else
+		config = (msg->tx_len > 2) ? LONG_PACKET : SHORT_PACKET;
+
+	if (msg->tx_len > 2) {
+		cmdq_size = 1 + (msg->tx_len + 3) / 4;
+		cmdq_off = 4;
+		cmdq_mask = CONFIG | DATA_ID | DATA_0 | DATA_1;
+		reg_val = (msg->tx_len << 16) | (type << 8) | config;
+	} else {
+		cmdq_size = 1;
+		cmdq_off = 2;
+		cmdq_mask = CONFIG | DATA_ID;
+		reg_val = (type << 8) | config;
+	}
+
+	for (i = 0; i < msg->tx_len; i++)
+		writeb(tx_buf[i], dsi->regs + DSI_CMDQ0 + cmdq_off + i);
+
+	mtk_dsi_mask(dsi, DSI_CMDQ0, cmdq_mask, reg_val);
+	mtk_dsi_mask(dsi, DSI_CMDQ_SIZE, CMDQ_SIZE, cmdq_size);
+}
+
+static ssize_t mtk_dsi_host_send_cmd(struct mtk_dsi *dsi,
+				     const struct mipi_dsi_msg *msg, u8 flag)
+{
+	mtk_dsi_wait_for_idle(dsi);
+	mtk_dsi_irq_data_clear(dsi, flag);
+	mtk_dsi_cmdq(dsi, msg);
+	mtk_dsi_start(dsi);
+
+	if (!mtk_dsi_wait_for_irq_done(dsi, flag, 2000))
+		return -ETIME;
+	else
+		return 0;
+}
+
+static ssize_t mtk_dsi_host_transfer(struct mipi_dsi_host *host,
+				     const struct mipi_dsi_msg *msg)
+{
+	struct mtk_dsi *dsi = host_to_dsi(host);
+	u32 recv_cnt, i;
+	u8 read_data[16];
+	void *src_addr;
+	u8 irq_flag = CMD_DONE_INT_FLAG;
+
+	if (readl(dsi->regs + DSI_MODE_CTRL) & MODE) {
+		DRM_ERROR("dsi engine is not command mode\n");
+		return -EINVAL;
+	}
+
+	if (MTK_DSI_HOST_IS_READ(msg->type))
+		irq_flag |= LPRX_RD_RDY_INT_FLAG;
+
+	if (mtk_dsi_host_send_cmd(dsi, msg, irq_flag) < 0)
+		return -ETIME;
+
+	if (!MTK_DSI_HOST_IS_READ(msg->type))
+		return 0;
+
+	if (!msg->rx_buf) {
+		DRM_ERROR("dsi receive buffer size may be NULL\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < 16; i++)
+		*(read_data + i) = readb(dsi->regs + DSI_RX_DATA0 + i);
+
+	recv_cnt = mtk_dsi_recv_cnt(read_data[0], read_data);
+
+	if (recv_cnt > 2)
+		src_addr = &read_data[4];
+	else
+		src_addr = &read_data[1];
+
+	if (recv_cnt > 10)
+		recv_cnt = 10;
+
+	if (recv_cnt > msg->rx_len)
+		recv_cnt = msg->rx_len;
+
+	if (recv_cnt)
+		memcpy(msg->rx_buf, src_addr, recv_cnt);
+
+	DRM_INFO("dsi get %d byte data from the panel address(0x%x)\n",
+		 recv_cnt, *((u8 *)(msg->tx_buf)));
+
+	return recv_cnt;
+}
+
 static const struct mipi_dsi_host_ops mtk_dsi_ops = {
 	.attach = mtk_dsi_host_attach,
 	.detach = mtk_dsi_host_detach,
+	.transfer = mtk_dsi_host_transfer,
 };
 
 static int mtk_dsi_bind(struct device *dev, struct device *master, void *data)
@@ -801,8 +1097,8 @@ static int mtk_dsi_probe(struct platform_device *pdev)
 {
 	struct mtk_dsi *dsi;
 	struct device *dev = &pdev->dev;
-	struct device_node *remote_node, *endpoint;
 	struct resource *regs;
+	int irq_num;
 	int comp_id;
 	int ret;
 
@@ -813,22 +1109,10 @@ static int mtk_dsi_probe(struct platform_device *pdev)
 	dsi->host.ops = &mtk_dsi_ops;
 	dsi->host.dev = dev;
 
-	endpoint = of_graph_get_next_endpoint(dev->of_node, NULL);
-	if (endpoint) {
-		remote_node = of_graph_get_remote_port_parent(endpoint);
-		if (!remote_node) {
-			dev_err(dev, "No panel connected\n");
-			return -ENODEV;
-		}
-
-		dsi->bridge = of_drm_find_bridge(remote_node);
-		dsi->panel = of_drm_find_panel(remote_node);
-		of_node_put(remote_node);
-		if (!dsi->bridge && !dsi->panel) {
-			dev_info(dev, "Waiting for bridge or panel driver\n");
-			return -EPROBE_DEFER;
-		}
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 0, 0,
+					  &dsi->panel, &dsi->bridge);
+	if (ret)
+		return ret;
 
 	dsi->engine_clk = devm_clk_get(dev, "engine");
 	if (IS_ERR(dsi->engine_clk)) {
@@ -879,6 +1163,22 @@ static int mtk_dsi_probe(struct platform_device *pdev)
 		return ret;
 	}
 
+	irq_num = platform_get_irq(pdev, 0);
+	if (irq_num < 0) {
+		dev_err(&pdev->dev, "failed to request dsi irq resource\n");
+		return -EPROBE_DEFER;
+	}
+
+	irq_set_status_flags(irq_num, IRQ_TYPE_LEVEL_LOW);
+	ret = devm_request_irq(&pdev->dev, irq_num, mtk_dsi_irq,
+			       IRQF_TRIGGER_LOW, dev_name(&pdev->dev), dsi);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request mediatek dsi irq\n");
+		return -EPROBE_DEFER;
+	}
+
+	init_waitqueue_head(&dsi->irq_wait_queue);
+
 	platform_set_drvdata(pdev, dsi);
 
 	return component_add(&pdev->dev, &mtk_dsi_component_ops);
@@ -895,6 +1195,7 @@ static int mtk_dsi_remove(struct platform_device *pdev)
 }
 
 static const struct of_device_id mtk_dsi_of_match[] = {
+	{ .compatible = "mediatek,mt2701-dsi" },
 	{ .compatible = "mediatek,mt8173-dsi" },
 	{ },
 };
diff --git a/drivers/gpu/drm/mediatek/mtk_hdmi.c b/drivers/gpu/drm/mediatek/mtk_hdmi.c
index c26251260b83..41a1c03b0347 100644
--- a/drivers/gpu/drm/mediatek/mtk_hdmi.c
+++ b/drivers/gpu/drm/mediatek/mtk_hdmi.c
@@ -1434,7 +1434,7 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 {
 	struct device *dev = &pdev->dev;
 	struct device_node *np = dev->of_node;
-	struct device_node *cec_np, *port, *ep, *remote, *i2c_np;
+	struct device_node *cec_np, *remote, *i2c_np;
 	struct platform_device *cec_pdev;
 	struct regmap *regmap;
 	struct resource *mem;
@@ -1486,29 +1486,9 @@ static int mtk_hdmi_dt_parse_pdata(struct mtk_hdmi *hdmi,
 	if (IS_ERR(hdmi->regs))
 		return PTR_ERR(hdmi->regs);
 
-	port = of_graph_get_port_by_id(np, 1);
-	if (!port) {
-		dev_err(dev, "Missing output port node\n");
+	remote = of_graph_get_remote_node(np, 1, 0);
+	if (!remote)
 		return -EINVAL;
-	}
-
-	ep = of_get_child_by_name(port, "endpoint");
-	if (!ep) {
-		dev_err(dev, "Missing endpoint node in port %s\n",
-			port->full_name);
-		of_node_put(port);
-		return -EINVAL;
-	}
-	of_node_put(port);
-
-	remote = of_graph_get_remote_port_parent(ep);
-	if (!remote) {
-		dev_err(dev, "Missing connector/bridge node for endpoint %s\n",
-			ep->full_name);
-		of_node_put(ep);
-		return -EINVAL;
-	}
-	of_node_put(ep);
 
 	if (!of_device_is_compatible(remote, "hdmi-connector")) {
 		hdmi->next_bridge = of_drm_find_bridge(remote);
diff --git a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
index 1c366f8cb2d0..90e913108950 100644
--- a/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
+++ b/drivers/gpu/drm/mediatek/mtk_mipi_tx.c
@@ -16,6 +16,7 @@
 #include <linux/delay.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/phy/phy.h>
 
@@ -87,6 +88,9 @@
 
 #define MIPITX_DSI_PLL_CON2	0x58
 
+#define MIPITX_DSI_PLL_TOP	0x64
+#define RG_DSI_MPPLL_PRESERVE		(0xff << 8)
+
 #define MIPITX_DSI_PLL_PWR	0x68
 #define RG_DSI_MPPLL_SDM_PWR_ON		BIT(0)
 #define RG_DSI_MPPLL_SDM_ISO_EN		BIT(1)
@@ -123,10 +127,15 @@
 #define SW_LNT2_HSTX_PRE_OE		BIT(24)
 #define SW_LNT2_HSTX_OE			BIT(25)
 
+struct mtk_mipitx_data {
+	const u32 mppll_preserve;
+};
+
 struct mtk_mipi_tx {
 	struct device *dev;
 	void __iomem *regs;
-	unsigned int data_rate;
+	u32 data_rate;
+	const struct mtk_mipitx_data *driver_data;
 	struct clk_hw pll_hw;
 	struct clk *pll;
 };
@@ -163,7 +172,7 @@ static void mtk_mipi_tx_update_bits(struct mtk_mipi_tx *mipi_tx, u32 offset,
 static int mtk_mipi_tx_pll_prepare(struct clk_hw *hw)
 {
 	struct mtk_mipi_tx *mipi_tx = mtk_mipi_tx_from_clk_hw(hw);
-	unsigned int txdiv, txdiv0, txdiv1;
+	u8 txdiv, txdiv0, txdiv1;
 	u64 pcw;
 
 	dev_dbg(mipi_tx->dev, "prepare: %u Hz\n", mipi_tx->data_rate);
@@ -243,6 +252,10 @@ static int mtk_mipi_tx_pll_prepare(struct clk_hw *hw)
 	mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON1,
 			       RG_DSI_MPPLL_SDM_SSC_EN);
 
+	mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP,
+				RG_DSI_MPPLL_PRESERVE,
+				mipi_tx->driver_data->mppll_preserve);
+
 	return 0;
 }
 
@@ -255,6 +268,9 @@ static void mtk_mipi_tx_pll_unprepare(struct clk_hw *hw)
 	mtk_mipi_tx_clear_bits(mipi_tx, MIPITX_DSI_PLL_CON0,
 			       RG_DSI_MPPLL_PLL_EN);
 
+	mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_TOP,
+				RG_DSI_MPPLL_PRESERVE, 0);
+
 	mtk_mipi_tx_update_bits(mipi_tx, MIPITX_DSI_PLL_PWR,
 				RG_DSI_MPPLL_SDM_ISO_EN |
 				RG_DSI_MPPLL_SDM_PWR_ON,
@@ -310,7 +326,7 @@ static const struct clk_ops mtk_mipi_tx_pll_ops = {
 static int mtk_mipi_tx_power_on_signal(struct phy *phy)
 {
 	struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy);
-	unsigned int reg;
+	u32 reg;
 
 	for (reg = MIPITX_DSI_CLOCK_LANE;
 	     reg <= MIPITX_DSI_DATA_LANE3; reg += 4)
@@ -341,7 +357,7 @@ static int mtk_mipi_tx_power_on(struct phy *phy)
 static void mtk_mipi_tx_power_off_signal(struct phy *phy)
 {
 	struct mtk_mipi_tx *mipi_tx = phy_get_drvdata(phy);
-	unsigned int reg;
+	u32 reg;
 
 	mtk_mipi_tx_set_bits(mipi_tx, MIPITX_DSI_TOP_CON,
 			     RG_DSI_PAD_TIE_LOW_EN);
@@ -391,6 +407,7 @@ static int mtk_mipi_tx_probe(struct platform_device *pdev)
 	if (!mipi_tx)
 		return -ENOMEM;
 
+	mipi_tx->driver_data = of_device_get_match_data(dev);
 	mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	mipi_tx->regs = devm_ioremap_resource(dev, mem);
 	if (IS_ERR(mipi_tx->regs)) {
@@ -448,8 +465,19 @@ static int mtk_mipi_tx_remove(struct platform_device *pdev)
 	return 0;
 }
 
+static const struct mtk_mipitx_data mt2701_mipitx_data = {
+	.mppll_preserve = (3 << 8)
+};
+
+static const struct mtk_mipitx_data mt8173_mipitx_data = {
+	.mppll_preserve = (0 << 8)
+};
+
 static const struct of_device_id mtk_mipi_tx_match[] = {
-	{ .compatible = "mediatek,mt8173-mipi-tx", },
+	{ .compatible = "mediatek,mt2701-mipi-tx",
+	  .data = &mt2701_mipitx_data },
+	{ .compatible = "mediatek,mt8173-mipi-tx",
+	  .data = &mt8173_mipitx_data },
 	{},
 };
 
diff --git a/drivers/gpu/drm/meson/Kconfig b/drivers/gpu/drm/meson/Kconfig
index 99719afcc77f..3ce51d8dfe1c 100644
--- a/drivers/gpu/drm/meson/Kconfig
+++ b/drivers/gpu/drm/meson/Kconfig
@@ -7,3 +7,9 @@ config DRM_MESON
 	select DRM_GEM_CMA_HELPER
 	select VIDEOMODE_HELPERS
 	select REGMAP_MMIO
+
+config DRM_MESON_DW_HDMI
+	tristate "HDMI Synopsys Controller support for Amlogic Meson Display"
+	depends on DRM_MESON
+	default y if DRM_MESON
+	select DRM_DW_HDMI
diff --git a/drivers/gpu/drm/meson/Makefile b/drivers/gpu/drm/meson/Makefile
index 92cf84530f49..c5c4cc362f02 100644
--- a/drivers/gpu/drm/meson/Makefile
+++ b/drivers/gpu/drm/meson/Makefile
@@ -2,3 +2,4 @@ meson-drm-y := meson_drv.o meson_plane.o meson_crtc.o meson_venc_cvbs.o
 meson-drm-y += meson_viu.o meson_vpp.o meson_venc.o meson_vclk.o meson_canvas.o
 
 obj-$(CONFIG_DRM_MESON) += meson-drm.o
+obj-$(CONFIG_DRM_MESON_DW_HDMI) += meson_dw_hdmi.o
diff --git a/drivers/gpu/drm/meson/meson_canvas.c b/drivers/gpu/drm/meson/meson_canvas.c
index 4109e36c297f..08f6073d967e 100644
--- a/drivers/gpu/drm/meson/meson_canvas.c
+++ b/drivers/gpu/drm/meson/meson_canvas.c
@@ -24,7 +24,9 @@
 #include "meson_canvas.h"
 #include "meson_registers.h"
 
-/*
+/**
+ * DOC: Canvas
+ *
  * CANVAS is a memory zone where physical memory frames information
  * are stored for the VIU to scanout.
  */
diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index 0fe49eccda65..c986eb03b9d9 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -82,11 +82,18 @@ static const struct drm_crtc_funcs meson_crtc_funcs = {
 static void meson_crtc_enable(struct drm_crtc *crtc)
 {
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
-	struct drm_plane *plane = meson_crtc->priv->primary_plane;
+	struct drm_crtc_state *crtc_state = crtc->state;
 	struct meson_drm *priv = meson_crtc->priv;
 
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!crtc_state) {
+		DRM_ERROR("Invalid crtc_state\n");
+		return;
+	}
+
 	/* Enable VPP Postblend */
-	writel(plane->state->crtc_w,
+	writel(crtc_state->mode.hdisplay,
 	       priv->io_base + _REG(VPP_POSTBLEND_H_SIZE));
 
 	writel_bits_relaxed(VPP_POSTBLEND_ENABLE, VPP_POSTBLEND_ENABLE,
@@ -101,6 +108,7 @@ static void meson_crtc_disable(struct drm_crtc *crtc)
 	struct meson_drm *priv = meson_crtc->priv;
 
 	priv->viu.osd1_enabled = false;
+	priv->viu.osd1_commit = false;
 
 	/* Disable VPP Postblend */
 	writel_bits_relaxed(VPP_POSTBLEND_ENABLE, 0,
@@ -137,8 +145,7 @@ static void meson_crtc_atomic_flush(struct drm_crtc *crtc,
 	struct meson_crtc *meson_crtc = to_meson_crtc(crtc);
 	struct meson_drm *priv = meson_crtc->priv;
 
-	if (priv->viu.osd1_enabled)
-		priv->viu.osd1_commit = true;
+	priv->viu.osd1_commit = true;
 }
 
 static const struct drm_crtc_helper_funcs meson_crtc_helper_funcs = {
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index bc562a07847b..75382f5f0fce 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -24,6 +24,7 @@
 #include <linux/module.h>
 #include <linux/mutex.h>
 #include <linux/platform_device.h>
+#include <linux/component.h>
 #include <linux/of_graph.h>
 
 #include <drm/drmP.h>
@@ -51,13 +52,14 @@
 #define DRIVER_NAME "meson"
 #define DRIVER_DESC "Amlogic Meson DRM driver"
 
-/*
- * Video Processing Unit
+/**
+ * DOC: Video Processing Unit
  *
  * VPU Handles the Global Video Processing, it includes management of the
  * clocks gates, blocks reset lines and power domains.
  *
  * What is missing :
+ *
  * - Full reset of entire video processing HW blocks
  * - Scaling and setup of the VPU clock
  * - Bus clock gates
@@ -150,9 +152,9 @@ static struct regmap_config meson_regmap_config = {
 	.max_register   = 0x1000,
 };
 
-static int meson_drv_probe(struct platform_device *pdev)
+static int meson_drv_bind(struct device *dev)
 {
-	struct device *dev = &pdev->dev;
+	struct platform_device *pdev = to_platform_device(dev);
 	struct meson_drm *priv;
 	struct drm_device *drm;
 	struct resource *res;
@@ -215,6 +217,15 @@ static int meson_drv_probe(struct platform_device *pdev)
 
 	drm_vblank_init(drm, 1);
 	drm_mode_config_init(drm);
+	drm->mode_config.max_width = 3840;
+	drm->mode_config.max_height = 2160;
+	drm->mode_config.funcs = &meson_mode_config_funcs;
+
+	/* Hardware Initialization */
+
+	meson_venc_init(priv);
+	meson_vpp_init(priv);
+	meson_viu_init(priv);
 
 	/* Encoder Initialization */
 
@@ -222,11 +233,11 @@ static int meson_drv_probe(struct platform_device *pdev)
 	if (ret)
 		goto free_drm;
 
-	/* Hardware Initialization */
-
-	meson_venc_init(priv);
-	meson_vpp_init(priv);
-	meson_viu_init(priv);
+	ret = component_bind_all(drm->dev, drm);
+	if (ret) {
+		dev_err(drm->dev, "Couldn't bind all components\n");
+		goto free_drm;
+	}
 
 	ret = meson_plane_create(priv);
 	if (ret)
@@ -241,9 +252,6 @@ static int meson_drv_probe(struct platform_device *pdev)
 		goto free_drm;
 
 	drm_mode_config_reset(drm);
-	drm->mode_config.max_width = 8192;
-	drm->mode_config.max_height = 8192;
-	drm->mode_config.funcs = &meson_mode_config_funcs;
 
 	priv->fbdev = drm_fbdev_cma_init(drm, 32,
 					 drm->mode_config.num_connector);
@@ -268,9 +276,9 @@ free_drm:
 	return ret;
 }
 
-static int meson_drv_remove(struct platform_device *pdev)
+static void meson_drv_unbind(struct device *dev)
 {
-	struct drm_device *drm = dev_get_drvdata(&pdev->dev);
+	struct drm_device *drm = dev_get_drvdata(dev);
 	struct meson_drm *priv = drm->dev_private;
 
 	drm_dev_unregister(drm);
@@ -280,9 +288,88 @@ static int meson_drv_remove(struct platform_device *pdev)
 	drm_vblank_cleanup(drm);
 	drm_dev_unref(drm);
 
-	return 0;
 }
 
+static const struct component_master_ops meson_drv_master_ops = {
+	.bind	= meson_drv_bind,
+	.unbind	= meson_drv_unbind,
+};
+
+static int compare_of(struct device *dev, void *data)
+{
+	DRM_DEBUG_DRIVER("Comparing of node %s with %s\n",
+			 of_node_full_name(dev->of_node),
+			 of_node_full_name(data));
+
+	return dev->of_node == data;
+}
+
+/* Possible connectors nodes to ignore */
+static const struct of_device_id connectors_match[] = {
+	{ .compatible = "composite-video-connector" },
+	{ .compatible = "svideo-connector" },
+	{ .compatible = "hdmi-connector" },
+	{ .compatible = "dvi-connector" },
+	{}
+};
+
+static int meson_probe_remote(struct platform_device *pdev,
+			      struct component_match **match,
+			      struct device_node *parent,
+			      struct device_node *remote)
+{
+	struct device_node *ep, *remote_node;
+	int count = 1;
+
+	/* If node is a connector, return and do not add to match table */
+	if (of_match_node(connectors_match, remote))
+		return 1;
+
+	component_match_add(&pdev->dev, match, compare_of, remote);
+
+	for_each_endpoint_of_node(remote, ep) {
+		remote_node = of_graph_get_remote_port_parent(ep);
+		if (!remote_node ||
+		    remote_node == parent || /* Ignore parent endpoint */
+		    !of_device_is_available(remote_node))
+			continue;
+
+		count += meson_probe_remote(pdev, match, remote, remote_node);
+
+		of_node_put(remote_node);
+	}
+
+	return count;
+}
+
+static int meson_drv_probe(struct platform_device *pdev)
+{
+	struct component_match *match = NULL;
+	struct device_node *np = pdev->dev.of_node;
+	struct device_node *ep, *remote;
+	int count = 0;
+
+	for_each_endpoint_of_node(np, ep) {
+		remote = of_graph_get_remote_port_parent(ep);
+		if (!remote || !of_device_is_available(remote))
+			continue;
+
+		count += meson_probe_remote(pdev, &match, np, remote);
+	}
+
+	/* If some endpoints were found, initialize the nodes */
+	if (count) {
+		dev_info(&pdev->dev, "Queued %d outputs on vpu\n", count);
+
+		return component_master_add_with_match(&pdev->dev,
+						       &meson_drv_master_ops,
+						       match);
+	}
+
+	/* If no output endpoints were available, simply bail out */
+	return 0;
+};
+
 static const struct of_device_id dt_match[] = {
 	{ .compatible = "amlogic,meson-gxbb-vpu" },
 	{ .compatible = "amlogic,meson-gxl-vpu" },
@@ -293,7 +380,6 @@ MODULE_DEVICE_TABLE(of, dt_match);
 
 static struct platform_driver meson_drm_platform_driver = {
 	.probe      = meson_drv_probe,
-	.remove     = meson_drv_remove,
 	.driver     = {
 		.name	= "meson-drm",
 		.of_match_table = dt_match,
diff --git a/drivers/gpu/drm/meson/meson_drv.h b/drivers/gpu/drm/meson/meson_drv.h
index 6195327c51ca..5e8b392b9d1f 100644
--- a/drivers/gpu/drm/meson/meson_drv.h
+++ b/drivers/gpu/drm/meson/meson_drv.h
@@ -47,6 +47,9 @@ struct meson_drm {
 
 	struct {
 		unsigned int current_mode;
+		bool hdmi_repeat;
+		bool venc_repeat;
+		bool hdmi_use_enci;
 	} venc;
 };
 
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
new file mode 100644
index 000000000000..7b86eb7776b3
--- /dev/null
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -0,0 +1,919 @@
+/*
+ * Copyright (C) 2016 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/component.h>
+#include <linux/of_graph.h>
+#include <linux/reset.h>
+#include <linux/clk.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_edid.h>
+#include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic_helper.h>
+#include <drm/bridge/dw_hdmi.h>
+
+#include <uapi/linux/media-bus-format.h>
+#include <uapi/linux/videodev2.h>
+
+#include "meson_drv.h"
+#include "meson_venc.h"
+#include "meson_vclk.h"
+#include "meson_dw_hdmi.h"
+#include "meson_registers.h"
+
+#define DRIVER_NAME "meson-dw-hdmi"
+#define DRIVER_DESC "Amlogic Meson HDMI-TX DRM driver"
+
+/**
+ * DOC: HDMI Output
+ *
+ * HDMI Output is composed of :
+ *
+ * - A Synopsys DesignWare HDMI Controller IP
+ * - A TOP control block controlling the Clocks and PHY
+ * - A custom HDMI PHY in order convert video to TMDS signal
+ *
+ * .. code::
+ *
+ *    ___________________________________
+ *   |            HDMI TOP               |<= HPD
+ *   |___________________________________|
+ *   |                  |                |
+ *   |  Synopsys HDMI   |   HDMI PHY     |=> TMDS
+ *   |    Controller    |________________|
+ *   |___________________________________|<=> DDC
+ *
+ *
+ * The HDMI TOP block only supports HPD sensing.
+ * The Synopsys HDMI Controller interrupt is routed
+ * through the TOP Block interrupt.
+ * Communication to the TOP Block and the Synopsys
+ * HDMI Controller is done a pair of addr+read/write
+ * registers.
+ * The HDMI PHY is configured by registers in the
+ * HHI register block.
+ *
+ * Pixel data arrives in 4:4:4 format from the VENC
+ * block and the VPU HDMI mux selects either the ENCI
+ * encoder for the 576i or 480i formats or the ENCP
+ * encoder for all the other formats including
+ * interlaced HD formats.
+ * The VENC uses a DVI encoder on top of the ENCI
+ * or ENCP encoders to generate DVI timings for the
+ * HDMI controller.
+ *
+ * GXBB, GXL and GXM embeds the Synopsys DesignWare
+ * HDMI TX IP version 2.01a with HDCP and I2C & S/PDIF
+ * audio source interfaces.
+ *
+ * We handle the following features :
+ *
+ * - HPD Rise & Fall interrupt
+ * - HDMI Controller Interrupt
+ * - HDMI PHY Init for 480i to 1080p60
+ * - VENC & HDMI Clock setup for 480i to 1080p60
+ * - VENC Mode setup for 480i to 1080p60
+ *
+ * What is missing :
+ *
+ * - PHY, Clock and Mode setup for 2k && 4k modes
+ * - SDDC Scrambling mode for HDMI 2.0a
+ * - HDCP Setup
+ * - CEC Management
+ */
+
+/* TOP Block Communication Channel */
+#define HDMITX_TOP_ADDR_REG	0x0
+#define HDMITX_TOP_DATA_REG	0x4
+#define HDMITX_TOP_CTRL_REG	0x8
+
+/* Controller Communication Channel */
+#define HDMITX_DWC_ADDR_REG	0x10
+#define HDMITX_DWC_DATA_REG	0x14
+#define HDMITX_DWC_CTRL_REG	0x18
+
+/* HHI Registers */
+#define HHI_MEM_PD_REG0		0x100 /* 0x40 */
+#define HHI_HDMI_CLK_CNTL	0x1cc /* 0x73 */
+#define HHI_HDMI_PHY_CNTL0	0x3a0 /* 0xe8 */
+#define HHI_HDMI_PHY_CNTL1	0x3a4 /* 0xe9 */
+#define HHI_HDMI_PHY_CNTL2	0x3a8 /* 0xea */
+#define HHI_HDMI_PHY_CNTL3	0x3ac /* 0xeb */
+
+static DEFINE_SPINLOCK(reg_lock);
+
+enum meson_venc_source {
+	MESON_VENC_SOURCE_NONE = 0,
+	MESON_VENC_SOURCE_ENCI = 1,
+	MESON_VENC_SOURCE_ENCP = 2,
+};
+
+struct meson_dw_hdmi {
+	struct drm_encoder encoder;
+	struct dw_hdmi_plat_data dw_plat_data;
+	struct meson_drm *priv;
+	struct device *dev;
+	void __iomem *hdmitx;
+	struct reset_control *hdmitx_apb;
+	struct reset_control *hdmitx_ctrl;
+	struct reset_control *hdmitx_phy;
+	struct clk *hdmi_pclk;
+	struct clk *venci_clk;
+	u32 irq_stat;
+};
+#define encoder_to_meson_dw_hdmi(x) \
+	container_of(x, struct meson_dw_hdmi, encoder)
+
+static inline int dw_hdmi_is_compatible(struct meson_dw_hdmi *dw_hdmi,
+					const char *compat)
+{
+	return of_device_is_compatible(dw_hdmi->dev->of_node, compat);
+}
+
+/* PHY (via TOP bridge) and Controller dedicated register interface */
+
+static unsigned int dw_hdmi_top_read(struct meson_dw_hdmi *dw_hdmi,
+				     unsigned int addr)
+{
+	unsigned long flags;
+	unsigned int data;
+
+	spin_lock_irqsave(&reg_lock, flags);
+
+	/* ADDR must be written twice */
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_TOP_ADDR_REG);
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_TOP_ADDR_REG);
+
+	/* Read needs a second DATA read */
+	data = readl(dw_hdmi->hdmitx + HDMITX_TOP_DATA_REG);
+	data = readl(dw_hdmi->hdmitx + HDMITX_TOP_DATA_REG);
+
+	spin_unlock_irqrestore(&reg_lock, flags);
+
+	return data;
+}
+
+static inline void dw_hdmi_top_write(struct meson_dw_hdmi *dw_hdmi,
+				     unsigned int addr, unsigned int data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&reg_lock, flags);
+
+	/* ADDR must be written twice */
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_TOP_ADDR_REG);
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_TOP_ADDR_REG);
+
+	/* Write needs single DATA write */
+	writel(data, dw_hdmi->hdmitx + HDMITX_TOP_DATA_REG);
+
+	spin_unlock_irqrestore(&reg_lock, flags);
+}
+
+/* Helper to change specific bits in PHY registers */
+static inline void dw_hdmi_top_write_bits(struct meson_dw_hdmi *dw_hdmi,
+					  unsigned int addr,
+					  unsigned int mask,
+					  unsigned int val)
+{
+	unsigned int data = dw_hdmi_top_read(dw_hdmi, addr);
+
+	data &= ~mask;
+	data |= val;
+
+	dw_hdmi_top_write(dw_hdmi, addr, data);
+}
+
+static unsigned int dw_hdmi_dwc_read(struct meson_dw_hdmi *dw_hdmi,
+				     unsigned int addr)
+{
+	unsigned long flags;
+	unsigned int data;
+
+	spin_lock_irqsave(&reg_lock, flags);
+
+	/* ADDR must be written twice */
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_DWC_ADDR_REG);
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_DWC_ADDR_REG);
+
+	/* Read needs a second DATA read */
+	data = readl(dw_hdmi->hdmitx + HDMITX_DWC_DATA_REG);
+	data = readl(dw_hdmi->hdmitx + HDMITX_DWC_DATA_REG);
+
+	spin_unlock_irqrestore(&reg_lock, flags);
+
+	return data;
+}
+
+static inline void dw_hdmi_dwc_write(struct meson_dw_hdmi *dw_hdmi,
+				     unsigned int addr, unsigned int data)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&reg_lock, flags);
+
+	/* ADDR must be written twice */
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_DWC_ADDR_REG);
+	writel(addr & 0xffff, dw_hdmi->hdmitx + HDMITX_DWC_ADDR_REG);
+
+	/* Write needs single DATA write */
+	writel(data, dw_hdmi->hdmitx + HDMITX_DWC_DATA_REG);
+
+	spin_unlock_irqrestore(&reg_lock, flags);
+}
+
+/* Helper to change specific bits in controller registers */
+static inline void dw_hdmi_dwc_write_bits(struct meson_dw_hdmi *dw_hdmi,
+					  unsigned int addr,
+					  unsigned int mask,
+					  unsigned int val)
+{
+	unsigned int data = dw_hdmi_dwc_read(dw_hdmi, addr);
+
+	data &= ~mask;
+	data |= val;
+
+	dw_hdmi_dwc_write(dw_hdmi, addr, data);
+}
+
+/* Bridge */
+
+/* Setup PHY bandwidth modes */
+static void meson_hdmi_phy_setup_mode(struct meson_dw_hdmi *dw_hdmi,
+				      struct drm_display_mode *mode)
+{
+	struct meson_drm *priv = dw_hdmi->priv;
+	unsigned int pixel_clock = mode->clock;
+
+	if (dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxl-dw-hdmi") ||
+	    dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxm-dw-hdmi")) {
+		if (pixel_clock >= 371250) {
+			/* 5.94Gbps, 3.7125Gbps */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x333d3282);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x2136315b);
+		} else if (pixel_clock >= 297000) {
+			/* 2.97Gbps */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33303382);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x2036315b);
+		} else if (pixel_clock >= 148500) {
+			/* 1.485Gbps */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33303362);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x2016315b);
+		} else {
+			/* 742.5Mbps, and below */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33604142);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x0016315b);
+		}
+	} else if (dw_hdmi_is_compatible(dw_hdmi,
+					 "amlogic,meson-gxbb-dw-hdmi")) {
+		if (pixel_clock >= 371250) {
+			/* 5.94Gbps, 3.7125Gbps */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33353245);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x2100115b);
+		} else if (pixel_clock >= 297000) {
+			/* 2.97Gbps */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33634283);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0xb000115b);
+		} else {
+			/* 1.485Gbps, and below */
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0x33632122);
+			regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL3, 0x2000115b);
+		}
+	}
+}
+
+static inline void dw_hdmi_phy_reset(struct meson_dw_hdmi *dw_hdmi)
+{
+	struct meson_drm *priv = dw_hdmi->priv;
+
+	/* Enable and software reset */
+	regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, 0xf, 0xf);
+
+	mdelay(2);
+
+	/* Enable and unreset */
+	regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, 0xf, 0xe);
+
+	mdelay(2);
+}
+
+static void dw_hdmi_set_vclk(struct meson_dw_hdmi *dw_hdmi,
+			     struct drm_display_mode *mode)
+{
+	struct meson_drm *priv = dw_hdmi->priv;
+	int vic = drm_match_cea_mode(mode);
+	unsigned int vclk_freq;
+	unsigned int venc_freq;
+	unsigned int hdmi_freq;
+
+	vclk_freq = mode->clock;
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		vclk_freq *= 2;
+
+	venc_freq = vclk_freq;
+	hdmi_freq = vclk_freq;
+
+	if (meson_venc_hdmi_venc_repeat(vic))
+		venc_freq *= 2;
+
+	vclk_freq = max(venc_freq, hdmi_freq);
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		venc_freq /= 2;
+
+	DRM_DEBUG_DRIVER("vclk:%d venc=%d hdmi=%d enci=%d\n",
+		vclk_freq, venc_freq, hdmi_freq,
+		priv->venc.hdmi_use_enci);
+
+	meson_vclk_setup(priv, MESON_VCLK_TARGET_HDMI, vclk_freq,
+			 venc_freq, hdmi_freq, priv->venc.hdmi_use_enci);
+}
+
+static int dw_hdmi_phy_init(struct dw_hdmi *hdmi, void *data,
+			    struct drm_display_mode *mode)
+{
+	struct meson_dw_hdmi *dw_hdmi = (struct meson_dw_hdmi *)data;
+	struct meson_drm *priv = dw_hdmi->priv;
+	unsigned int wr_clk =
+		readl_relaxed(priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	DRM_DEBUG_DRIVER("%d:\"%s\"\n", mode->base.id, mode->name);
+
+	/* Enable clocks */
+	regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL, 0xffff, 0x100);
+
+	/* Bring HDMITX MEM output of power down */
+	regmap_update_bits(priv->hhi, HHI_MEM_PD_REG0, 0xff << 8, 0);
+
+	/* Bring out of reset */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_SW_RESET,  0);
+
+	/* Enable internal pixclk, tmds_clk, spdif_clk, i2s_clk, cecclk */
+	dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL,
+			       0x3, 0x3);
+	dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_CLK_CNTL,
+			       0x3 << 4, 0x3 << 4);
+
+	/* Enable normal output to PHY */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_BIST_CNTL, BIT(12));
+
+	/* TMDS pattern setup (TOFIX pattern for 4k2k scrambling) */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_01, 0x001f001f);
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_23, 0x001f001f);
+
+	/* Load TMDS pattern */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_CNTL, 0x1);
+	msleep(20);
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_TMDS_CLK_PTTN_CNTL, 0x2);
+
+	/* Setup PHY parameters */
+	meson_hdmi_phy_setup_mode(dw_hdmi, mode);
+
+	/* Setup PHY */
+	regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1,
+			   0xffff << 16, 0x0390 << 16);
+
+	/* BIT_INVERT */
+	if (dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxl-dw-hdmi") ||
+	    dw_hdmi_is_compatible(dw_hdmi, "amlogic,meson-gxm-dw-hdmi"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1,
+				   BIT(17), 0);
+	else
+		regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1,
+				   BIT(17), BIT(17));
+
+	/* Disable clock, fifo, fifo_wr */
+	regmap_update_bits(priv->hhi, HHI_HDMI_PHY_CNTL1, 0xf, 0);
+
+	msleep(100);
+
+	/* Reset PHY 3 times in a row */
+	dw_hdmi_phy_reset(dw_hdmi);
+	dw_hdmi_phy_reset(dw_hdmi);
+	dw_hdmi_phy_reset(dw_hdmi);
+
+	/* Temporary Disable VENC video stream */
+	if (priv->venc.hdmi_use_enci)
+		writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
+	else
+		writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));
+
+	/* Temporary Disable HDMI video stream to HDMI-TX */
+	writel_bits_relaxed(0x3, 0,
+			    priv->io_base + _REG(VPU_HDMI_SETTING));
+	writel_bits_relaxed(0xf << 8, 0,
+			    priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	/* Re-Enable VENC video stream */
+	if (priv->venc.hdmi_use_enci)
+		writel_relaxed(1, priv->io_base + _REG(ENCI_VIDEO_EN));
+	else
+		writel_relaxed(1, priv->io_base + _REG(ENCP_VIDEO_EN));
+
+	/* Push back HDMI clock settings */
+	writel_bits_relaxed(0xf << 8, wr_clk & (0xf << 8),
+			    priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	/* Enable and Select HDMI video source for HDMI-TX */
+	if (priv->venc.hdmi_use_enci)
+		writel_bits_relaxed(0x3, MESON_VENC_SOURCE_ENCI,
+				    priv->io_base + _REG(VPU_HDMI_SETTING));
+	else
+		writel_bits_relaxed(0x3, MESON_VENC_SOURCE_ENCP,
+				    priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	return 0;
+}
+
+static void dw_hdmi_phy_disable(struct dw_hdmi *hdmi,
+				void *data)
+{
+	struct meson_dw_hdmi *dw_hdmi = (struct meson_dw_hdmi *)data;
+	struct meson_drm *priv = dw_hdmi->priv;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	regmap_write(priv->hhi, HHI_HDMI_PHY_CNTL0, 0);
+}
+
+static enum drm_connector_status dw_hdmi_read_hpd(struct dw_hdmi *hdmi,
+			     void *data)
+{
+	struct meson_dw_hdmi *dw_hdmi = (struct meson_dw_hdmi *)data;
+
+	return !!dw_hdmi_top_read(dw_hdmi, HDMITX_TOP_STAT0) ?
+		connector_status_connected : connector_status_disconnected;
+}
+
+static void dw_hdmi_setup_hpd(struct dw_hdmi *hdmi,
+			      void *data)
+{
+	struct meson_dw_hdmi *dw_hdmi = (struct meson_dw_hdmi *)data;
+
+	/* Setup HPD Filter */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_HPD_FILTER,
+			  (0xa << 12) | 0xa0);
+
+	/* Clear interrupts */
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_INTR_STAT_CLR,
+			  HDMITX_TOP_INTR_HPD_RISE | HDMITX_TOP_INTR_HPD_FALL);
+
+	/* Unmask interrupts */
+	dw_hdmi_top_write_bits(dw_hdmi, HDMITX_TOP_INTR_MASKN,
+			HDMITX_TOP_INTR_HPD_RISE | HDMITX_TOP_INTR_HPD_FALL,
+			HDMITX_TOP_INTR_HPD_RISE | HDMITX_TOP_INTR_HPD_FALL);
+}
+
+static const struct dw_hdmi_phy_ops meson_dw_hdmi_phy_ops = {
+	.init = dw_hdmi_phy_init,
+	.disable = dw_hdmi_phy_disable,
+	.read_hpd = dw_hdmi_read_hpd,
+	.setup_hpd = dw_hdmi_setup_hpd,
+};
+
+static irqreturn_t dw_hdmi_top_irq(int irq, void *dev_id)
+{
+	struct meson_dw_hdmi *dw_hdmi = dev_id;
+	u32 stat;
+
+	stat = dw_hdmi_top_read(dw_hdmi, HDMITX_TOP_INTR_STAT);
+	dw_hdmi_top_write(dw_hdmi, HDMITX_TOP_INTR_STAT_CLR, stat);
+
+	/* HPD Events, handle in the threaded interrupt handler */
+	if (stat & (HDMITX_TOP_INTR_HPD_RISE | HDMITX_TOP_INTR_HPD_FALL)) {
+		dw_hdmi->irq_stat = stat;
+		return IRQ_WAKE_THREAD;
+	}
+
+	/* HDMI Controller Interrupt */
+	if (stat & 1)
+		return IRQ_NONE;
+
+	/* TOFIX Handle HDCP Interrupts */
+
+	return IRQ_HANDLED;
+}
+
+/* Threaded interrupt handler to manage HPD events */
+static irqreturn_t dw_hdmi_top_thread_irq(int irq, void *dev_id)
+{
+	struct meson_dw_hdmi *dw_hdmi = dev_id;
+	u32 stat = dw_hdmi->irq_stat;
+
+	/* HPD Events */
+	if (stat & (HDMITX_TOP_INTR_HPD_RISE | HDMITX_TOP_INTR_HPD_FALL)) {
+		bool hpd_connected = false;
+
+		if (stat & HDMITX_TOP_INTR_HPD_RISE)
+			hpd_connected = true;
+
+		dw_hdmi_setup_rx_sense(dw_hdmi->dev, hpd_connected,
+				       hpd_connected);
+
+		drm_helper_hpd_irq_event(dw_hdmi->encoder.dev);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* TOFIX Enable support for non-vic modes */
+static enum drm_mode_status dw_hdmi_mode_valid(struct drm_connector *connector,
+					       struct drm_display_mode *mode)
+{
+	unsigned int vclk_freq;
+	unsigned int venc_freq;
+	unsigned int hdmi_freq;
+	int vic = drm_match_cea_mode(mode);
+
+	DRM_DEBUG_DRIVER("Modeline %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x\n",
+		mode->base.id, mode->name, mode->vrefresh, mode->clock,
+		mode->hdisplay, mode->hsync_start,
+		mode->hsync_end, mode->htotal,
+		mode->vdisplay, mode->vsync_start,
+		mode->vsync_end, mode->vtotal, mode->type, mode->flags);
+
+	/* For now, only accept VIC modes */
+	if (!vic)
+		return MODE_BAD;
+
+	/* For now, filter by supported VIC modes */
+	if (!meson_venc_hdmi_supported_vic(vic))
+		return MODE_BAD;
+
+	vclk_freq = mode->clock;
+
+	/* 480i/576i needs global pixel doubling */
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		vclk_freq *= 2;
+
+	venc_freq = vclk_freq;
+	hdmi_freq = vclk_freq;
+
+	/* VENC double pixels for 1080i and 720p modes */
+	if (meson_venc_hdmi_venc_repeat(vic))
+		venc_freq *= 2;
+
+	vclk_freq = max(venc_freq, hdmi_freq);
+
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK)
+		venc_freq /= 2;
+
+	dev_dbg(connector->dev->dev, "%s: vclk:%d venc=%d hdmi=%d\n", __func__,
+		vclk_freq, venc_freq, hdmi_freq);
+
+	/* Finally filter by configurable vclk frequencies */
+	switch (vclk_freq) {
+	case 54000:
+	case 74250:
+	case 148500:
+	case 297000:
+	case 594000:
+		return MODE_OK;
+	}
+
+	return MODE_CLOCK_RANGE;
+}
+
+/* Encoder */
+
+static void meson_venc_hdmi_encoder_destroy(struct drm_encoder *encoder)
+{
+	drm_encoder_cleanup(encoder);
+}
+
+static const struct drm_encoder_funcs meson_venc_hdmi_encoder_funcs = {
+	.destroy        = meson_venc_hdmi_encoder_destroy,
+};
+
+static int meson_venc_hdmi_encoder_atomic_check(struct drm_encoder *encoder,
+					struct drm_crtc_state *crtc_state,
+					struct drm_connector_state *conn_state)
+{
+	return 0;
+}
+
+static void meson_venc_hdmi_encoder_disable(struct drm_encoder *encoder)
+{
+	struct meson_dw_hdmi *dw_hdmi = encoder_to_meson_dw_hdmi(encoder);
+	struct meson_drm *priv = dw_hdmi->priv;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	writel_bits_relaxed(0x3, 0,
+			    priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
+	writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));
+}
+
+static void meson_venc_hdmi_encoder_enable(struct drm_encoder *encoder)
+{
+	struct meson_dw_hdmi *dw_hdmi = encoder_to_meson_dw_hdmi(encoder);
+	struct meson_drm *priv = dw_hdmi->priv;
+
+	DRM_DEBUG_DRIVER("%s\n", priv->venc.hdmi_use_enci ? "VENCI" : "VENCP");
+
+	if (priv->venc.hdmi_use_enci)
+		writel_relaxed(1, priv->io_base + _REG(ENCI_VIDEO_EN));
+	else
+		writel_relaxed(1, priv->io_base + _REG(ENCP_VIDEO_EN));
+}
+
+static void meson_venc_hdmi_encoder_mode_set(struct drm_encoder *encoder,
+				   struct drm_display_mode *mode,
+				   struct drm_display_mode *adjusted_mode)
+{
+	struct meson_dw_hdmi *dw_hdmi = encoder_to_meson_dw_hdmi(encoder);
+	struct meson_drm *priv = dw_hdmi->priv;
+	int vic = drm_match_cea_mode(mode);
+
+	DRM_DEBUG_DRIVER("%d:\"%s\" vic %d\n",
+			 mode->base.id, mode->name, vic);
+
+	/* Should have been filtered */
+	if (!vic)
+		return;
+
+	/* VENC + VENC-DVI Mode setup */
+	meson_venc_hdmi_mode_set(priv, vic, mode);
+
+	/* VCLK Set clock */
+	dw_hdmi_set_vclk(dw_hdmi, mode);
+
+	/* Setup YUV444 to HDMI-TX, no 10bit diphering */
+	writel_relaxed(0, priv->io_base + _REG(VPU_HDMI_FMT_CTRL));
+}
+
+static const struct drm_encoder_helper_funcs
+				meson_venc_hdmi_encoder_helper_funcs = {
+	.atomic_check	= meson_venc_hdmi_encoder_atomic_check,
+	.disable	= meson_venc_hdmi_encoder_disable,
+	.enable		= meson_venc_hdmi_encoder_enable,
+	.mode_set	= meson_venc_hdmi_encoder_mode_set,
+};
+
+/* DW HDMI Regmap */
+
+static int meson_dw_hdmi_reg_read(void *context, unsigned int reg,
+				  unsigned int *result)
+{
+	*result = dw_hdmi_dwc_read(context, reg);
+
+	return 0;
+
+}
+
+static int meson_dw_hdmi_reg_write(void *context, unsigned int reg,
+				   unsigned int val)
+{
+	dw_hdmi_dwc_write(context, reg, val);
+
+	return 0;
+}
+
+static const struct regmap_config meson_dw_hdmi_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 8,
+	.reg_read = meson_dw_hdmi_reg_read,
+	.reg_write = meson_dw_hdmi_reg_write,
+	.max_register = 0x10000,
+};
+
+static bool meson_hdmi_connector_is_available(struct device *dev)
+{
+	struct device_node *ep, *remote;
+
+	/* HDMI Connector is on the second port, first endpoint */
+	ep = of_graph_get_endpoint_by_regs(dev->of_node, 1, 0);
+	if (!ep)
+		return false;
+
+	/* If the endpoint node exists, consider it enabled */
+	remote = of_graph_get_remote_port(ep);
+	if (remote) {
+		of_node_put(ep);
+		return true;
+	}
+
+	of_node_put(ep);
+	of_node_put(remote);
+
+	return false;
+}
+
+static int meson_dw_hdmi_bind(struct device *dev, struct device *master,
+				void *data)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct meson_dw_hdmi *meson_dw_hdmi;
+	struct drm_device *drm = data;
+	struct meson_drm *priv = drm->dev_private;
+	struct dw_hdmi_plat_data *dw_plat_data;
+	struct drm_encoder *encoder;
+	struct resource *res;
+	int irq;
+	int ret;
+
+	DRM_DEBUG_DRIVER("\n");
+
+	if (!meson_hdmi_connector_is_available(dev)) {
+		dev_info(drm->dev, "HDMI Output connector not available\n");
+		return -ENODEV;
+	}
+
+	meson_dw_hdmi = devm_kzalloc(dev, sizeof(*meson_dw_hdmi),
+				     GFP_KERNEL);
+	if (!meson_dw_hdmi)
+		return -ENOMEM;
+
+	meson_dw_hdmi->priv = priv;
+	meson_dw_hdmi->dev = dev;
+	dw_plat_data = &meson_dw_hdmi->dw_plat_data;
+	encoder = &meson_dw_hdmi->encoder;
+
+	meson_dw_hdmi->hdmitx_apb = devm_reset_control_get_exclusive(dev,
+						"hdmitx_apb");
+	if (IS_ERR(meson_dw_hdmi->hdmitx_apb)) {
+		dev_err(dev, "Failed to get hdmitx_apb reset\n");
+		return PTR_ERR(meson_dw_hdmi->hdmitx_apb);
+	}
+
+	meson_dw_hdmi->hdmitx_ctrl = devm_reset_control_get_exclusive(dev,
+						"hdmitx");
+	if (IS_ERR(meson_dw_hdmi->hdmitx_ctrl)) {
+		dev_err(dev, "Failed to get hdmitx reset\n");
+		return PTR_ERR(meson_dw_hdmi->hdmitx_ctrl);
+	}
+
+	meson_dw_hdmi->hdmitx_phy = devm_reset_control_get_exclusive(dev,
+						"hdmitx_phy");
+	if (IS_ERR(meson_dw_hdmi->hdmitx_phy)) {
+		dev_err(dev, "Failed to get hdmitx_phy reset\n");
+		return PTR_ERR(meson_dw_hdmi->hdmitx_phy);
+	}
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	meson_dw_hdmi->hdmitx = devm_ioremap_resource(dev, res);
+	if (IS_ERR(meson_dw_hdmi->hdmitx))
+		return PTR_ERR(meson_dw_hdmi->hdmitx);
+
+	meson_dw_hdmi->hdmi_pclk = devm_clk_get(dev, "isfr");
+	if (IS_ERR(meson_dw_hdmi->hdmi_pclk)) {
+		dev_err(dev, "Unable to get HDMI pclk\n");
+		return PTR_ERR(meson_dw_hdmi->hdmi_pclk);
+	}
+	clk_prepare_enable(meson_dw_hdmi->hdmi_pclk);
+
+	meson_dw_hdmi->venci_clk = devm_clk_get(dev, "venci");
+	if (IS_ERR(meson_dw_hdmi->venci_clk)) {
+		dev_err(dev, "Unable to get venci clk\n");
+		return PTR_ERR(meson_dw_hdmi->venci_clk);
+	}
+	clk_prepare_enable(meson_dw_hdmi->venci_clk);
+
+	dw_plat_data->regm = devm_regmap_init(dev, NULL, meson_dw_hdmi,
+					      &meson_dw_hdmi_regmap_config);
+	if (IS_ERR(dw_plat_data->regm))
+		return PTR_ERR(dw_plat_data->regm);
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "Failed to get hdmi top irq\n");
+		return irq;
+	}
+
+	ret = devm_request_threaded_irq(dev, irq, dw_hdmi_top_irq,
+					dw_hdmi_top_thread_irq, IRQF_SHARED,
+					"dw_hdmi_top_irq", meson_dw_hdmi);
+	if (ret) {
+		dev_err(dev, "Failed to request hdmi top irq\n");
+		return ret;
+	}
+
+	/* Encoder */
+
+	drm_encoder_helper_add(encoder, &meson_venc_hdmi_encoder_helper_funcs);
+
+	ret = drm_encoder_init(drm, encoder, &meson_venc_hdmi_encoder_funcs,
+			       DRM_MODE_ENCODER_TMDS, "meson_hdmi");
+	if (ret) {
+		dev_err(priv->dev, "Failed to init HDMI encoder\n");
+		return ret;
+	}
+
+	encoder->possible_crtcs = BIT(0);
+
+	DRM_DEBUG_DRIVER("encoder initialized\n");
+
+	/* Enable clocks */
+	regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL, 0xffff, 0x100);
+
+	/* Bring HDMITX MEM output of power down */
+	regmap_update_bits(priv->hhi, HHI_MEM_PD_REG0, 0xff << 8, 0);
+
+	/* Reset HDMITX APB & TX & PHY */
+	reset_control_reset(meson_dw_hdmi->hdmitx_apb);
+	reset_control_reset(meson_dw_hdmi->hdmitx_ctrl);
+	reset_control_reset(meson_dw_hdmi->hdmitx_phy);
+
+	/* Enable APB3 fail on error */
+	writel_bits_relaxed(BIT(15), BIT(15),
+			    meson_dw_hdmi->hdmitx + HDMITX_TOP_CTRL_REG);
+	writel_bits_relaxed(BIT(15), BIT(15),
+			    meson_dw_hdmi->hdmitx + HDMITX_DWC_CTRL_REG);
+
+	/* Bring out of reset */
+	dw_hdmi_top_write(meson_dw_hdmi, HDMITX_TOP_SW_RESET,  0);
+
+	msleep(20);
+
+	dw_hdmi_top_write(meson_dw_hdmi, HDMITX_TOP_CLK_CNTL, 0xff);
+
+	/* Enable HDMI-TX Interrupt */
+	dw_hdmi_top_write(meson_dw_hdmi, HDMITX_TOP_INTR_STAT_CLR,
+			  HDMITX_TOP_INTR_CORE);
+
+	dw_hdmi_top_write(meson_dw_hdmi, HDMITX_TOP_INTR_MASKN,
+			  HDMITX_TOP_INTR_CORE);
+
+	/* Bridge / Connector */
+
+	dw_plat_data->mode_valid = dw_hdmi_mode_valid;
+	dw_plat_data->phy_ops = &meson_dw_hdmi_phy_ops;
+	dw_plat_data->phy_name = "meson_dw_hdmi_phy";
+	dw_plat_data->phy_data = meson_dw_hdmi;
+	dw_plat_data->input_bus_format = MEDIA_BUS_FMT_YUV8_1X24;
+	dw_plat_data->input_bus_encoding = V4L2_YCBCR_ENC_709;
+
+	ret = dw_hdmi_bind(pdev, encoder, &meson_dw_hdmi->dw_plat_data);
+	if (ret)
+		return ret;
+
+	DRM_DEBUG_DRIVER("HDMI controller initialized\n");
+
+	return 0;
+}
+
+static void meson_dw_hdmi_unbind(struct device *dev, struct device *master,
+				   void *data)
+{
+	dw_hdmi_unbind(dev);
+}
+
+static const struct component_ops meson_dw_hdmi_ops = {
+	.bind	= meson_dw_hdmi_bind,
+	.unbind	= meson_dw_hdmi_unbind,
+};
+
+static int meson_dw_hdmi_probe(struct platform_device *pdev)
+{
+	return component_add(&pdev->dev, &meson_dw_hdmi_ops);
+}
+
+static int meson_dw_hdmi_remove(struct platform_device *pdev)
+{
+	component_del(&pdev->dev, &meson_dw_hdmi_ops);
+
+	return 0;
+}
+
+static const struct of_device_id meson_dw_hdmi_of_table[] = {
+	{ .compatible = "amlogic,meson-gxbb-dw-hdmi" },
+	{ .compatible = "amlogic,meson-gxl-dw-hdmi" },
+	{ .compatible = "amlogic,meson-gxm-dw-hdmi" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, meson_dw_hdmi_of_table);
+
+static struct platform_driver meson_dw_hdmi_platform_driver = {
+	.probe		= meson_dw_hdmi_probe,
+	.remove		= meson_dw_hdmi_remove,
+	.driver		= {
+		.name		= DRIVER_NAME,
+		.of_match_table	= meson_dw_hdmi_of_table,
+	},
+};
+module_platform_driver(meson_dw_hdmi_platform_driver);
+
+MODULE_AUTHOR("Neil Armstrong <narmstrong@baylibre.com>");
+MODULE_DESCRIPTION(DRIVER_DESC);
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.h b/drivers/gpu/drm/meson/meson_dw_hdmi.h
new file mode 100644
index 000000000000..0b81183125e3
--- /dev/null
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.h
@@ -0,0 +1,146 @@
+/*
+ * Copyright (C) 2016 BayLibre, SAS
+ * Author: Neil Armstrong <narmstrong@baylibre.com>
+ * Copyright (C) 2015 Amlogic, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MESON_DW_HDMI_H
+#define __MESON_DW_HDMI_H
+
+/*
+ * Bit 7 RW Reserved. Default 1.
+ * Bit 6 RW Reserved. Default 1.
+ * Bit 5 RW Reserved. Default 1.
+ * Bit 4 RW sw_reset_phyif: PHY interface. 1=Apply reset; 0=Release from reset.
+ *     Default 1.
+ * Bit 3 RW sw_reset_intr: interrupt module. 1=Apply reset;
+ *     0=Release from reset.
+ *     Default 1.
+ * Bit 2 RW sw_reset_mem: KSV/REVOC mem. 1=Apply reset; 0=Release from reset.
+ *     Default 1.
+ * Bit 1 RW sw_reset_rnd: random number interface to HDCP. 1=Apply reset;
+ *     0=Release from reset. Default 1.
+ * Bit 0 RW sw_reset_core: connects to IP's ~irstz. 1=Apply reset;
+ *     0=Release from reset. Default 1.
+ */
+#define HDMITX_TOP_SW_RESET                     (0x000)
+
+/*
+ * Bit 12 RW i2s_ws_inv:1=Invert i2s_ws; 0=No invert. Default 0.
+ * Bit 11 RW i2s_clk_inv: 1=Invert i2s_clk; 0=No invert. Default 0.
+ * Bit 10 RW spdif_clk_inv: 1=Invert spdif_clk; 0=No invert. Default 0.
+ * Bit 9 RW tmds_clk_inv: 1=Invert tmds_clk; 0=No invert. Default 0.
+ * Bit 8 RW pixel_clk_inv: 1=Invert pixel_clk; 0=No invert. Default 0.
+ * Bit 4 RW cec_clk_en: 1=enable cec_clk; 0=disable. Default 0.
+ * Bit 3 RW i2s_clk_en: 1=enable i2s_clk; 0=disable. Default 0.
+ * Bit 2 RW spdif_clk_en: 1=enable spdif_clk; 0=disable. Default 0.
+ * Bit 1 RW tmds_clk_en: 1=enable tmds_clk;  0=disable. Default 0.
+ * Bit 0 RW pixel_clk_en: 1=enable pixel_clk; 0=disable. Default 0.
+ */
+#define HDMITX_TOP_CLK_CNTL                     (0x001)
+
+/*
+ * Bit 11: 0 RW hpd_valid_width: filter out width <= M*1024.    Default 0.
+ * Bit 15:12 RW hpd_glitch_width: filter out glitch <= N.       Default 0.
+ */
+#define HDMITX_TOP_HPD_FILTER                   (0x002)
+
+/*
+ * intr_maskn: MASK_N, one bit per interrupt source.
+ *     1=Enable interrupt source; 0=Disable interrupt source. Default 0.
+ * [  4] hdcp22_rndnum_err
+ * [  3] nonce_rfrsh_rise
+ * [  2] hpd_fall_intr
+ * [  1] hpd_rise_intr
+ * [  0] core_intr
+ */
+#define HDMITX_TOP_INTR_MASKN                   (0x003)
+
+/*
+ * Bit 30: 0 RW intr_stat: For each bit, write 1 to manually set the interrupt
+ *     bit, read back the interrupt status.
+ * Bit    31 R  IP interrupt status
+ * Bit     2 RW hpd_fall
+ * Bit     1 RW hpd_rise
+ * Bit     0 RW IP interrupt
+ */
+#define HDMITX_TOP_INTR_STAT                    (0x004)
+
+/*
+ * [4]	  hdcp22_rndnum_err
+ * [3]	  nonce_rfrsh_rise
+ * [2]	  hpd_fall
+ * [1]	  hpd_rise
+ * [0]	  core_intr_rise
+ */
+#define HDMITX_TOP_INTR_STAT_CLR                (0x005)
+
+#define HDMITX_TOP_INTR_CORE		BIT(0)
+#define HDMITX_TOP_INTR_HPD_RISE	BIT(1)
+#define HDMITX_TOP_INTR_HPD_FALL	BIT(2)
+
+/* Bit 14:12 RW tmds_sel: 3'b000=Output zero; 3'b001=Output normal TMDS data;
+ *     3'b010=Output PRBS data; 3'b100=Output shift pattern. Default 0.
+ * Bit 11: 9 RW shift_pttn_repeat: 0=New pattern every clk cycle; 1=New pattern
+ *     every 2 clk cycles; ...; 7=New pattern every 8 clk cycles. Default 0.
+ * Bit 8 RW shift_pttn_en: 1= Enable shift pattern generator; 0=Disable.
+ *     Default 0.
+ * Bit 4: 3 RW prbs_pttn_mode: 0=PRBS11; 1=PRBS15; 2=PRBS7; 3=PRBS31. Default 0.
+ * Bit 2: 1 RW prbs_pttn_width: 0=idle; 1=output 8-bit pattern;
+ *     2=Output 1-bit pattern; 3=output 10-bit pattern. Default 0.
+ * Bit 0 RW prbs_pttn_en: 1=Enable PRBS generator; 0=Disable. Default 0.
+ */
+#define HDMITX_TOP_BIST_CNTL                    (0x006)
+
+/* Bit 29:20 RW shift_pttn_data[59:50]. Default 0. */
+/* Bit 19:10 RW shift_pttn_data[69:60]. Default 0. */
+/* Bit  9: 0 RW shift_pttn_data[79:70]. Default 0. */
+#define HDMITX_TOP_SHIFT_PTTN_012               (0x007)
+
+/* Bit 29:20 RW shift_pttn_data[29:20]. Default 0. */
+/* Bit 19:10 RW shift_pttn_data[39:30]. Default 0. */
+/* Bit  9: 0 RW shift_pttn_data[49:40]. Default 0. */
+#define HDMITX_TOP_SHIFT_PTTN_345               (0x008)
+
+/* Bit 19:10 RW shift_pttn_data[ 9: 0]. Default 0. */
+/* Bit  9: 0 RW shift_pttn_data[19:10]. Default 0. */
+#define HDMITX_TOP_SHIFT_PTTN_67                (0x009)
+
+/* Bit 25:16 RW tmds_clk_pttn[19:10]. Default 0. */
+/* Bit  9: 0 RW tmds_clk_pttn[ 9: 0]. Default 0. */
+#define HDMITX_TOP_TMDS_CLK_PTTN_01             (0x00A)
+
+/* Bit 25:16 RW tmds_clk_pttn[39:30]. Default 0. */
+/* Bit  9: 0 RW tmds_clk_pttn[29:20]. Default 0. */
+#define HDMITX_TOP_TMDS_CLK_PTTN_23             (0x00B)
+
+/* Bit 1 RW shift_tmds_clk_pttn:1=Enable shifting clk pattern,
+ * used when TMDS CLK rate = TMDS character rate /4. Default 0.
+ * Bit 0 R  Reserved. Default 0.
+ * [	1] shift_tmds_clk_pttn
+ * [	0] load_tmds_clk_pttn
+ */
+#define HDMITX_TOP_TMDS_CLK_PTTN_CNTL           (0x00C)
+
+/* Bit 0 RW revocmem_wr_fail: Read back 1 to indicate Host write REVOC MEM
+ * failure, write 1 to clear the failure flag.  Default 0.
+ */
+#define HDMITX_TOP_REVOCMEM_STAT                (0x00D)
+
+/* Bit     0 R  filtered HPD status. */
+#define HDMITX_TOP_STAT0                        (0x00E)
+
+#endif /* __MESON_DW_HDMI_H */
diff --git a/drivers/gpu/drm/meson/meson_registers.h b/drivers/gpu/drm/meson/meson_registers.h
index 6adf9c13fafa..284738196af9 100644
--- a/drivers/gpu/drm/meson/meson_registers.h
+++ b/drivers/gpu/drm/meson/meson_registers.h
@@ -1319,6 +1319,7 @@
 #define VPU_MISC_CTRL 0x2740
 #define VPU_ISP_GCLK_CTRL0 0x2741
 #define VPU_ISP_GCLK_CTRL1 0x2742
+#define VPU_HDMI_FMT_CTRL 0x2743
 #define VPU_VDIN_ASYNC_HOLD_CTRL 0x2743
 #define VPU_VDISP_ASYNC_HOLD_CTRL 0x2744
 #define VPU_VPUARB2_ASYNC_HOLD_CTRL 0x2745
diff --git a/drivers/gpu/drm/meson/meson_vclk.c b/drivers/gpu/drm/meson/meson_vclk.c
index 252cfd4b19b1..47677047e42d 100644
--- a/drivers/gpu/drm/meson/meson_vclk.c
+++ b/drivers/gpu/drm/meson/meson_vclk.c
@@ -23,13 +23,38 @@
 #include "meson_drv.h"
 #include "meson_vclk.h"
 
-/*
+/**
+ * DOC: Video Clocks
+ *
  * VCLK is the "Pixel Clock" frequency generator from a dedicated PLL.
  * We handle the following encodings :
+ *
  * - CVBS 27MHz generator via the VCLK2 to the VENCI and VDAC blocks
+ * - HDMI Pixel Clocks generation
  *
  * What is missing :
- * - HDMI Pixel Clocks generation
+ *
+ * - Genenate Pixel clocks for 2K/4K 10bit formats
+ *
+ * Clock generator scheme :
+ *
+ * .. code::
+ *
+ *    __________   _________            _____
+ *   |          | |         |          |     |--ENCI
+ *   | HDMI PLL |-| PLL_DIV |--- VCLK--|     |--ENCL
+ *   |__________| |_________| \        | MUX |--ENCP
+ *                             --VCLK2-|     |--VDAC
+ *                                     |_____|--HDMI-TX
+ *
+ * Final clocks can take input for either VCLK or VCLK2, but
+ * VCLK is the preferred path for HDMI clocking and VCLK2 is the
+ * preferred path for CVBS VDAC clocking.
+ *
+ * VCLK and VCLK2 have fixed divided clocks paths for /1, /2, /4, /6 or /12.
+ *
+ * The PLL_DIV can achieve an additional fractional dividing like
+ * 1.5, 3.5, 3.75... to generate special 2K and 4K 10bit clocks.
  */
 
 /* HHI Registers */
@@ -50,11 +75,34 @@
 #define VCLK2_SOFT_RESET	BIT(15)
 #define VCLK2_DIV1_EN		BIT(0)
 #define HHI_VID_CLK_DIV		0x164 /* 0x59 offset in data sheet */
+#define VCLK_DIV_MASK		0xff
+#define VCLK_DIV_EN		BIT(16)
+#define VCLK_DIV_RESET		BIT(17)
+#define CTS_ENCP_SEL_MASK	(0xf << 24)
+#define CTS_ENCP_SEL_SHIFT	24
 #define CTS_ENCI_SEL_MASK	(0xf << 28)
 #define CTS_ENCI_SEL_SHIFT	28
+#define HHI_VID_CLK_CNTL	0x17c /* 0x5f offset in data sheet */
+#define VCLK_EN			BIT(19)
+#define VCLK_SEL_MASK		(0x7 << 16)
+#define VCLK_SEL_SHIFT		16
+#define VCLK_SOFT_RESET		BIT(15)
+#define VCLK_DIV1_EN		BIT(0)
+#define VCLK_DIV2_EN		BIT(1)
+#define VCLK_DIV4_EN		BIT(2)
+#define VCLK_DIV6_EN		BIT(3)
+#define VCLK_DIV12_EN		BIT(4)
 #define HHI_VID_CLK_CNTL2	0x194 /* 0x65 offset in data sheet */
 #define CTS_ENCI_EN		BIT(0)
+#define CTS_ENCP_EN		BIT(2)
 #define CTS_VDAC_EN		BIT(4)
+#define HDMI_TX_PIXEL_EN	BIT(5)
+#define HHI_HDMI_CLK_CNTL	0x1cc /* 0x73 offset in data sheet */
+#define HDMI_TX_PIXEL_SEL_MASK	(0xf << 16)
+#define HDMI_TX_PIXEL_SEL_SHIFT	16
+#define CTS_HDMI_SYS_SEL_MASK	(0x7 << 9)
+#define CTS_HDMI_SYS_DIV_MASK	(0x7f)
+#define CTS_HDMI_SYS_EN		BIT(8)
 
 #define HHI_VDAC_CNTL0		0x2F4 /* 0xbd offset in data sheet */
 #define HHI_VDAC_CNTL1		0x2F8 /* 0xbe offset in data sheet */
@@ -69,6 +117,126 @@
 #define HDMI_PLL_RESET		BIT(28)
 #define HDMI_PLL_LOCK		BIT(31)
 
+/* VID PLL Dividers */
+enum {
+	VID_PLL_DIV_1 = 0,
+	VID_PLL_DIV_2,
+	VID_PLL_DIV_2p5,
+	VID_PLL_DIV_3,
+	VID_PLL_DIV_3p5,
+	VID_PLL_DIV_3p75,
+	VID_PLL_DIV_4,
+	VID_PLL_DIV_5,
+	VID_PLL_DIV_6,
+	VID_PLL_DIV_6p25,
+	VID_PLL_DIV_7,
+	VID_PLL_DIV_7p5,
+	VID_PLL_DIV_12,
+	VID_PLL_DIV_14,
+	VID_PLL_DIV_15,
+};
+
+void meson_vid_pll_set(struct meson_drm *priv, unsigned int div)
+{
+	unsigned int shift_val = 0;
+	unsigned int shift_sel = 0;
+
+	/* Disable vid_pll output clock */
+	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV, VID_PLL_EN, 0);
+	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV, VID_PLL_PRESET, 0);
+
+	switch (div) {
+	case VID_PLL_DIV_2:
+		shift_val = 0x0aaa;
+		shift_sel = 0;
+		break;
+	case VID_PLL_DIV_2p5:
+		shift_val = 0x5294;
+		shift_sel = 2;
+		break;
+	case VID_PLL_DIV_3:
+		shift_val = 0x0db6;
+		shift_sel = 0;
+		break;
+	case VID_PLL_DIV_3p5:
+		shift_val = 0x36cc;
+		shift_sel = 1;
+		break;
+	case VID_PLL_DIV_3p75:
+		shift_val = 0x6666;
+		shift_sel = 2;
+		break;
+	case VID_PLL_DIV_4:
+		shift_val = 0x0ccc;
+		shift_sel = 0;
+		break;
+	case VID_PLL_DIV_5:
+		shift_val = 0x739c;
+		shift_sel = 2;
+		break;
+	case VID_PLL_DIV_6:
+		shift_val = 0x0e38;
+		shift_sel = 0;
+		break;
+	case VID_PLL_DIV_6p25:
+		shift_val = 0x0000;
+		shift_sel = 3;
+		break;
+	case VID_PLL_DIV_7:
+		shift_val = 0x3c78;
+		shift_sel = 1;
+		break;
+	case VID_PLL_DIV_7p5:
+		shift_val = 0x78f0;
+		shift_sel = 2;
+		break;
+	case VID_PLL_DIV_12:
+		shift_val = 0x0fc0;
+		shift_sel = 0;
+		break;
+	case VID_PLL_DIV_14:
+		shift_val = 0x3f80;
+		shift_sel = 1;
+		break;
+	case VID_PLL_DIV_15:
+		shift_val = 0x7f80;
+		shift_sel = 2;
+		break;
+	}
+
+	if (div == VID_PLL_DIV_1)
+		/* Enable vid_pll bypass to HDMI pll */
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   VID_PLL_BYPASS, VID_PLL_BYPASS);
+	else {
+		/* Disable Bypass */
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   VID_PLL_BYPASS, 0);
+		/* Clear sel */
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   3 << 16, 0);
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   VID_PLL_PRESET, 0);
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   0x7fff, 0);
+
+		/* Setup sel and val */
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   3 << 16, shift_sel << 16);
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   VID_PLL_PRESET, VID_PLL_PRESET);
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   0x7fff, shift_val);
+
+		regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				   VID_PLL_PRESET, 0);
+	}
+
+	/* Enable the vid_pll output clock */
+	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
+				VID_PLL_EN, VID_PLL_EN);
+}
+
 /*
  * Setup VCLK2 for 27MHz, and enable clocks for ENCI and VDAC
  *
@@ -110,15 +278,8 @@ static void meson_venci_cvbs_clock_config(struct meson_drm *priv)
 	/* Disable VCLK2 */
 	regmap_update_bits(priv->hhi, HHI_VIID_CLK_CNTL, VCLK2_EN, 0);
 
-	/* Disable vid_pll output clock */
-	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV, VID_PLL_EN, 0);
-	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV, VID_PLL_PRESET, 0);
-	/* Enable vid_pll bypass to HDMI pll */
-	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
-				VID_PLL_BYPASS, VID_PLL_BYPASS);
-	/* Enable the vid_pll output clock */
-	regmap_update_bits(priv->hhi, HHI_VID_PLL_CLK_DIV,
-				VID_PLL_EN, VID_PLL_EN);
+	/* Setup vid_pll to /1 */
+	meson_vid_pll_set(priv, VID_PLL_DIV_1);
 
 	/* Setup the VCLK2 divider value to achieve 27MHz */
 	regmap_update_bits(priv->hhi, HHI_VIID_CLK_DIV,
@@ -159,9 +320,454 @@ static void meson_venci_cvbs_clock_config(struct meson_drm *priv)
 				CTS_VDAC_EN, CTS_VDAC_EN);
 }
 
+
+/* PLL	O1 O2 O3 VP DV     EN TX */
+/* 4320 /4 /4 /1 /5 /1  => /2 /2 */
+#define MESON_VCLK_HDMI_ENCI_54000	1
+/* 4320 /4 /4 /1 /5 /1  => /1 /2 */
+#define MESON_VCLK_HDMI_DDR_54000	2
+/* 2970 /4 /1 /1 /5 /1  => /1 /2 */
+#define MESON_VCLK_HDMI_DDR_148500	3
+/* 2970 /2 /2 /2 /5 /1  => /1 /1 */
+#define MESON_VCLK_HDMI_74250		4
+/* 2970 /1 /2 /2 /5 /1  => /1 /1 */
+#define MESON_VCLK_HDMI_148500		5
+/* 2970 /1 /1 /1 /5 /2  => /1 /1 */
+#define MESON_VCLK_HDMI_297000		6
+/* 5940 /1 /1 /2 /5 /1  => /1 /1 */
+#define MESON_VCLK_HDMI_594000		7
+
+struct meson_vclk_params {
+	unsigned int pll_base_freq;
+	unsigned int pll_od1;
+	unsigned int pll_od2;
+	unsigned int pll_od3;
+	unsigned int vid_pll_div;
+	unsigned int vclk_div;
+} params[] = {
+	[MESON_VCLK_HDMI_ENCI_54000] = {
+		.pll_base_freq = 4320000,
+		.pll_od1 = 4,
+		.pll_od2 = 4,
+		.pll_od3 = 1,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+	[MESON_VCLK_HDMI_DDR_54000] = {
+		.pll_base_freq = 4320000,
+		.pll_od1 = 4,
+		.pll_od2 = 4,
+		.pll_od3 = 1,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+	[MESON_VCLK_HDMI_DDR_148500] = {
+		.pll_base_freq = 2970000,
+		.pll_od1 = 4,
+		.pll_od2 = 1,
+		.pll_od3 = 1,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+	[MESON_VCLK_HDMI_74250] = {
+		.pll_base_freq = 2970000,
+		.pll_od1 = 2,
+		.pll_od2 = 2,
+		.pll_od3 = 2,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+	[MESON_VCLK_HDMI_148500] = {
+		.pll_base_freq = 2970000,
+		.pll_od1 = 1,
+		.pll_od2 = 2,
+		.pll_od3 = 2,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+	[MESON_VCLK_HDMI_297000] = {
+		.pll_base_freq = 2970000,
+		.pll_od1 = 1,
+		.pll_od2 = 1,
+		.pll_od3 = 1,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 2,
+	},
+	[MESON_VCLK_HDMI_594000] = {
+		.pll_base_freq = 5940000,
+		.pll_od1 = 1,
+		.pll_od2 = 1,
+		.pll_od3 = 2,
+		.vid_pll_div = VID_PLL_DIV_5,
+		.vclk_div = 1,
+	},
+};
+
+static inline unsigned int pll_od_to_reg(unsigned int od)
+{
+	switch (od) {
+	case 1:
+		return 0;
+	case 2:
+		return 1;
+	case 4:
+		return 2;
+	case 8:
+		return 3;
+	}
+
+	/* Invalid */
+	return 0;
+}
+
+void meson_hdmi_pll_set(struct meson_drm *priv,
+			unsigned int base,
+			unsigned int od1,
+			unsigned int od2,
+			unsigned int od3)
+{
+	unsigned int val;
+
+	if (meson_vpu_is_compatible(priv, "amlogic,meson-gxbb-vpu")) {
+		switch (base) {
+		case 2970000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x5800023d);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL2, 0x00000000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x0d5c5091);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x801da72c);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x71486980);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x00000e55);
+
+			/* Enable and unreset */
+			regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL,
+						0x7 << 28, 0x4 << 28);
+
+			/* Poll for lock bit */
+			regmap_read_poll_timeout(priv->hhi, HHI_HDMI_PLL_CNTL,
+					val, (val & HDMI_PLL_LOCK), 10, 0);
+
+			/* div_frac */
+			regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL2,
+						0xFFFF,  0x4e00);
+			break;
+
+		case 4320000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x5800025a);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL2, 0x00000000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x0d5c5091);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x801da72c);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x71486980);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x00000e55);
+
+			/* unreset */
+			regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL,
+						BIT(28), 0);
+
+			/* Poll for lock bit */
+			regmap_read_poll_timeout(priv->hhi, HHI_HDMI_PLL_CNTL,
+					val, (val & HDMI_PLL_LOCK), 10, 0);
+			break;
+
+		case 5940000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x5800027b);
+			regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL2,
+						0xFFFF,  0x4c00);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x135c5091);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x801da72c);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x71486980);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x00000e55);
+
+			/* unreset */
+			regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL,
+						BIT(28), 0);
+
+			/* Poll for lock bit */
+			regmap_read_poll_timeout(priv->hhi, HHI_HDMI_PLL_CNTL,
+					val, (val & HDMI_PLL_LOCK), 10, 0);
+			break;
+		};
+	} else if (meson_vpu_is_compatible(priv, "amlogic,meson-gxm-vpu") ||
+		   meson_vpu_is_compatible(priv, "amlogic,meson-gxl-vpu")) {
+		switch (base) {
+		case 2970000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x4000027b);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL2, 0x800cb300);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x860f30c4);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x0c8e0000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x001fa729);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x01a31500);
+			break;
+
+		case 4320000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x400002b4);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL2, 0x800cb000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x860f30c4);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x0c8e0000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x001fa729);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x01a31500);
+			break;
+
+		case 5940000:
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL, 0x400002f7);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL2, 0x800cb200);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL3, 0x860f30c4);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL4, 0x0c8e0000);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL5, 0x001fa729);
+			regmap_write(priv->hhi, HHI_HDMI_PLL_CNTL6, 0x01a31500);
+			break;
+
+		};
+
+		/* Reset PLL */
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL,
+					HDMI_PLL_RESET, HDMI_PLL_RESET);
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL,
+					HDMI_PLL_RESET, 0);
+
+		/* Poll for lock bit */
+		regmap_read_poll_timeout(priv->hhi, HHI_HDMI_PLL_CNTL, val,
+				(val & HDMI_PLL_LOCK), 10, 0);
+	};
+
+	if (meson_vpu_is_compatible(priv, "amlogic,meson-gxbb-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL2,
+				   3 << 16, pll_od_to_reg(od1) << 16);
+	else if (meson_vpu_is_compatible(priv, "amlogic,meson-gxm-vpu") ||
+		 meson_vpu_is_compatible(priv, "amlogic,meson-gxl-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL3,
+				   3 << 21, pll_od_to_reg(od1) << 21);
+
+	if (meson_vpu_is_compatible(priv, "amlogic,meson-gxbb-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL2,
+				   3 << 22, pll_od_to_reg(od2) << 22);
+	else if (meson_vpu_is_compatible(priv, "amlogic,meson-gxm-vpu") ||
+		 meson_vpu_is_compatible(priv, "amlogic,meson-gxl-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL3,
+				   3 << 23, pll_od_to_reg(od2) << 23);
+
+	if (meson_vpu_is_compatible(priv, "amlogic,meson-gxbb-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL2,
+				   3 << 18, pll_od_to_reg(od3) << 18);
+	else if (meson_vpu_is_compatible(priv, "amlogic,meson-gxm-vpu") ||
+		 meson_vpu_is_compatible(priv, "amlogic,meson-gxl-vpu"))
+		regmap_update_bits(priv->hhi, HHI_HDMI_PLL_CNTL3,
+				   3 << 19, pll_od_to_reg(od3) << 19);
+}
+
 void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
-		      unsigned int freq)
+		      unsigned int vclk_freq, unsigned int venc_freq,
+		      unsigned int dac_freq, bool hdmi_use_enci)
 {
-	if (target == MESON_VCLK_TARGET_CVBS && freq == MESON_VCLK_CVBS)
+	unsigned int freq;
+	unsigned int hdmi_tx_div;
+	unsigned int venc_div;
+
+	if (target == MESON_VCLK_TARGET_CVBS) {
 		meson_venci_cvbs_clock_config(priv);
+		return;
+	}
+
+	hdmi_tx_div = vclk_freq / dac_freq;
+
+	if (hdmi_tx_div == 0) {
+		pr_err("Fatal Error, invalid HDMI-TX freq %d\n",
+				dac_freq);
+		return;
+	}
+
+	venc_div = vclk_freq / venc_freq;
+
+	if (venc_div == 0) {
+		pr_err("Fatal Error, invalid HDMI venc freq %d\n",
+				venc_freq);
+		return;
+	}
+
+	switch (vclk_freq) {
+	case 54000:
+		if (hdmi_use_enci)
+			freq = MESON_VCLK_HDMI_ENCI_54000;
+		else
+			freq = MESON_VCLK_HDMI_DDR_54000;
+		break;
+	case 74250:
+		freq = MESON_VCLK_HDMI_74250;
+		break;
+	case 148500:
+		if (dac_freq != 148500)
+			freq = MESON_VCLK_HDMI_DDR_148500;
+		else
+			freq = MESON_VCLK_HDMI_148500;
+		break;
+	case 297000:
+		freq = MESON_VCLK_HDMI_297000;
+		break;
+	case 594000:
+		freq = MESON_VCLK_HDMI_594000;
+		break;
+	default:
+		pr_err("Fatal Error, invalid HDMI vclk freq %d\n",
+			vclk_freq);
+		return;
+	}
+
+	/* Set HDMI-TX sys clock */
+	regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			   CTS_HDMI_SYS_SEL_MASK, 0);
+	regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			   CTS_HDMI_SYS_DIV_MASK, 0);
+	regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			   CTS_HDMI_SYS_EN, CTS_HDMI_SYS_EN);
+
+	/* Set HDMI PLL rate */
+	meson_hdmi_pll_set(priv, params[freq].pll_base_freq,
+			   params[freq].pll_od1,
+			   params[freq].pll_od2,
+			   params[freq].pll_od3);
+
+	/* Setup vid_pll divider */
+	meson_vid_pll_set(priv, params[freq].vid_pll_div);
+
+	/* Set VCLK div */
+	regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+			   VCLK_SEL_MASK, 0);
+	regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+			   VCLK_DIV_MASK, params[freq].vclk_div - 1);
+
+	/* Set HDMI-TX source */
+	switch (hdmi_tx_div) {
+	case 1:
+		/* enable vclk_div1 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV1_EN, VCLK_DIV1_EN);
+
+		/* select vclk_div1 for HDMI-TX */
+		regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+				   HDMI_TX_PIXEL_SEL_MASK, 0);
+		break;
+	case 2:
+		/* enable vclk_div2 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV2_EN, VCLK_DIV2_EN);
+
+		/* select vclk_div2 for HDMI-TX */
+		regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			HDMI_TX_PIXEL_SEL_MASK, 1 << HDMI_TX_PIXEL_SEL_SHIFT);
+		break;
+	case 4:
+		/* enable vclk_div4 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV4_EN, VCLK_DIV4_EN);
+
+		/* select vclk_div4 for HDMI-TX */
+		regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			HDMI_TX_PIXEL_SEL_MASK, 2 << HDMI_TX_PIXEL_SEL_SHIFT);
+		break;
+	case 6:
+		/* enable vclk_div6 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV6_EN, VCLK_DIV6_EN);
+
+		/* select vclk_div6 for HDMI-TX */
+		regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			HDMI_TX_PIXEL_SEL_MASK, 3 << HDMI_TX_PIXEL_SEL_SHIFT);
+		break;
+	case 12:
+		/* enable vclk_div12 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV12_EN, VCLK_DIV12_EN);
+
+		/* select vclk_div12 for HDMI-TX */
+		regmap_update_bits(priv->hhi, HHI_HDMI_CLK_CNTL,
+			HDMI_TX_PIXEL_SEL_MASK, 4 << HDMI_TX_PIXEL_SEL_SHIFT);
+		break;
+	}
+	regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL2,
+				   HDMI_TX_PIXEL_EN, HDMI_TX_PIXEL_EN);
+
+	/* Set ENCI/ENCP Source */
+	switch (venc_div) {
+	case 1:
+		/* enable vclk_div1 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV1_EN, VCLK_DIV1_EN);
+
+		if (hdmi_use_enci)
+			/* select vclk_div1 for enci */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+					   CTS_ENCI_SEL_MASK, 0);
+		else
+			/* select vclk_div1 for encp */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+					   CTS_ENCP_SEL_MASK, 0);
+		break;
+	case 2:
+		/* enable vclk_div2 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV2_EN, VCLK_DIV2_EN);
+
+		if (hdmi_use_enci)
+			/* select vclk_div2 for enci */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCI_SEL_MASK, 1 << CTS_ENCI_SEL_SHIFT);
+		else
+			/* select vclk_div2 for encp */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCP_SEL_MASK, 1 << CTS_ENCP_SEL_SHIFT);
+		break;
+	case 4:
+		/* enable vclk_div4 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV4_EN, VCLK_DIV4_EN);
+
+		if (hdmi_use_enci)
+			/* select vclk_div4 for enci */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCI_SEL_MASK, 2 << CTS_ENCI_SEL_SHIFT);
+		else
+			/* select vclk_div4 for encp */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCP_SEL_MASK, 2 << CTS_ENCP_SEL_SHIFT);
+		break;
+	case 6:
+		/* enable vclk_div6 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV6_EN, VCLK_DIV6_EN);
+
+		if (hdmi_use_enci)
+			/* select vclk_div6 for enci */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCI_SEL_MASK, 3 << CTS_ENCI_SEL_SHIFT);
+		else
+			/* select vclk_div6 for encp */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCP_SEL_MASK, 3 << CTS_ENCP_SEL_SHIFT);
+		break;
+	case 12:
+		/* enable vclk_div12 gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL,
+				   VCLK_DIV12_EN, VCLK_DIV12_EN);
+
+		if (hdmi_use_enci)
+			/* select vclk_div12 for enci */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCI_SEL_MASK, 4 << CTS_ENCI_SEL_SHIFT);
+		else
+			/* select vclk_div12 for encp */
+			regmap_update_bits(priv->hhi, HHI_VID_CLK_DIV,
+				CTS_ENCP_SEL_MASK, 4 << CTS_ENCP_SEL_SHIFT);
+		break;
+	}
+
+	if (hdmi_use_enci)
+		/* Enable ENCI clock gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL2,
+				   CTS_ENCI_EN, CTS_ENCI_EN);
+	else
+		/* Enable ENCP clock gate */
+		regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL2,
+				   CTS_ENCP_EN, CTS_ENCP_EN);
+
+	regmap_update_bits(priv->hhi, HHI_VID_CLK_CNTL, VCLK_EN, VCLK_EN);
 }
+EXPORT_SYMBOL_GPL(meson_vclk_setup);
diff --git a/drivers/gpu/drm/meson/meson_vclk.h b/drivers/gpu/drm/meson/meson_vclk.h
index ec62735996de..0401b5213471 100644
--- a/drivers/gpu/drm/meson/meson_vclk.h
+++ b/drivers/gpu/drm/meson/meson_vclk.h
@@ -23,12 +23,14 @@
 
 enum {
 	MESON_VCLK_TARGET_CVBS = 0,
+	MESON_VCLK_TARGET_HDMI = 1,
 };
 
 /* 27MHz is the CVBS Pixel Clock */
-#define MESON_VCLK_CVBS	27000
+#define MESON_VCLK_CVBS			27000
 
 void meson_vclk_setup(struct meson_drm *priv, unsigned int target,
-		      unsigned int freq);
+		      unsigned int vclk_freq, unsigned int venc_freq,
+		      unsigned int dac_freq, bool hdmi_use_enci);
 
 #endif /* __MESON_VCLK_H */
diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index f7c870172220..9509017dbded 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -26,16 +26,48 @@
 #include "meson_vclk.h"
 #include "meson_registers.h"
 
-/*
+/**
+ * DOC: Video Encoder
+ *
  * VENC Handle the pixels encoding to the output formats.
  * We handle the following encodings :
- * - CVBS Encoding via the ENCI encoder and VDAC digital to analog converter
  *
- * What is missing :
+ * - CVBS Encoding via the ENCI encoder and VDAC digital to analog converter
  * - TMDS/HDMI Encoding via ENCI_DIV and ENCP
  * - Setup of more clock rates for HDMI modes
+ *
+ * What is missing :
+ *
  * - LCD Panel encoding via ENCL
  * - TV Panel encoding via ENCT
+ *
+ * VENC paths :
+ *
+ * .. code::
+ *
+ *          _____   _____   ____________________
+ *   vd1---|     |-|     | | VENC     /---------|----VDAC
+ *   vd2---| VIU |-| VPP |-|-----ENCI/-ENCI_DVI-|-|
+ *   osd1--|     |-|     | | \                  | X--HDMI-TX
+ *   osd2--|_____|-|_____| |  |\-ENCP--ENCP_DVI-|-|
+ *                         |  |                 |
+ *                         |  \--ENCL-----------|----LVDS
+ *                         |____________________|
+ *
+ * The ENCI is designed for PAl or NTSC encoding and can go through the VDAC
+ * directly for CVBS encoding or through the ENCI_DVI encoder for HDMI.
+ * The ENCP is designed for Progressive encoding but can also generate
+ * 1080i interlaced pixels, and was initialy desined to encode pixels for
+ * VDAC to output RGB ou YUV analog outputs.
+ * It's output is only used through the ENCP_DVI encoder for HDMI.
+ * The ENCL LVDS encoder is not implemented.
+ *
+ * The ENCI and ENCP encoders needs specially defined parameters for each
+ * supported mode and thus cannot be determined from standard video timings.
+ *
+ * The ENCI end ENCP DVI encoders are more generic and can generate any timings
+ * from the pixel data generated by ENCI or ENCP, so can use the standard video
+ * timings are source for HW parameters.
  */
 
 /* HHI Registers */
@@ -91,6 +123,1219 @@ struct meson_cvbs_enci_mode meson_cvbs_enci_ntsc = {
 	.analog_sync_adj = 0x9c00,
 };
 
+union meson_hdmi_venc_mode {
+	struct {
+		unsigned int mode_tag;
+		unsigned int hso_begin;
+		unsigned int hso_end;
+		unsigned int vso_even;
+		unsigned int vso_odd;
+		unsigned int macv_max_amp;
+		unsigned int video_prog_mode;
+		unsigned int video_mode;
+		unsigned int sch_adjust;
+		unsigned int yc_delay;
+		unsigned int pixel_start;
+		unsigned int pixel_end;
+		unsigned int top_field_line_start;
+		unsigned int top_field_line_end;
+		unsigned int bottom_field_line_start;
+		unsigned int bottom_field_line_end;
+	} enci;
+	struct {
+		unsigned int dvi_settings;
+		unsigned int video_mode;
+		unsigned int video_mode_adv;
+		unsigned int video_prog_mode;
+		bool video_prog_mode_present;
+		unsigned int video_sync_mode;
+		bool video_sync_mode_present;
+		unsigned int video_yc_dly;
+		bool video_yc_dly_present;
+		unsigned int video_rgb_ctrl;
+		bool video_rgb_ctrl_present;
+		unsigned int video_filt_ctrl;
+		bool video_filt_ctrl_present;
+		unsigned int video_ofld_voav_ofst;
+		bool video_ofld_voav_ofst_present;
+		unsigned int yfp1_htime;
+		unsigned int yfp2_htime;
+		unsigned int max_pxcnt;
+		unsigned int hspuls_begin;
+		unsigned int hspuls_end;
+		unsigned int hspuls_switch;
+		unsigned int vspuls_begin;
+		unsigned int vspuls_end;
+		unsigned int vspuls_bline;
+		unsigned int vspuls_eline;
+		unsigned int eqpuls_begin;
+		bool eqpuls_begin_present;
+		unsigned int eqpuls_end;
+		bool eqpuls_end_present;
+		unsigned int eqpuls_bline;
+		bool eqpuls_bline_present;
+		unsigned int eqpuls_eline;
+		bool eqpuls_eline_present;
+		unsigned int havon_begin;
+		unsigned int havon_end;
+		unsigned int vavon_bline;
+		unsigned int vavon_eline;
+		unsigned int hso_begin;
+		unsigned int hso_end;
+		unsigned int vso_begin;
+		unsigned int vso_end;
+		unsigned int vso_bline;
+		unsigned int vso_eline;
+		bool vso_eline_present;
+		unsigned int sy_val;
+		bool sy_val_present;
+		unsigned int sy2_val;
+		bool sy2_val_present;
+		unsigned int max_lncnt;
+	} encp;
+};
+
+union meson_hdmi_venc_mode meson_hdmi_enci_mode_480i = {
+	.enci = {
+		.hso_begin = 5,
+		.hso_end = 129,
+		.vso_even = 3,
+		.vso_odd = 260,
+		.macv_max_amp = 0x810b,
+		.video_prog_mode = 0xf0,
+		.video_mode = 0x8,
+		.sch_adjust = 0x20,
+		.yc_delay = 0,
+		.pixel_start = 227,
+		.pixel_end = 1667,
+		.top_field_line_start = 18,
+		.top_field_line_end = 258,
+		.bottom_field_line_start = 19,
+		.bottom_field_line_end = 259,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_enci_mode_576i = {
+	.enci = {
+		.hso_begin = 3,
+		.hso_end = 129,
+		.vso_even = 3,
+		.vso_odd = 260,
+		.macv_max_amp = 8107,
+		.video_prog_mode = 0xff,
+		.video_mode = 0x13,
+		.sch_adjust = 0x28,
+		.yc_delay = 0x333,
+		.pixel_start = 251,
+		.pixel_end = 1691,
+		.top_field_line_start = 22,
+		.top_field_line_end = 310,
+		.bottom_field_line_start = 23,
+		.bottom_field_line_end = 311,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_480p = {
+	.encp = {
+		.dvi_settings = 0x21,
+		.video_mode = 0x4000,
+		.video_mode_adv = 0x9,
+		.video_prog_mode = 0,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 7,
+		.video_sync_mode_present = true,
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		.video_filt_ctrl = 0x2052,
+		.video_filt_ctrl_present = true,
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 244,
+		.yfp2_htime = 1630,
+		.max_pxcnt = 1715,
+		.hspuls_begin = 0x22,
+		.hspuls_end = 0xa0,
+		.hspuls_switch = 88,
+		.vspuls_begin = 0,
+		.vspuls_end = 1589,
+		.vspuls_bline = 0,
+		.vspuls_eline = 5,
+		.havon_begin = 249,
+		.havon_end = 1689,
+		.vavon_bline = 42,
+		.vavon_eline = 521,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 3,
+		.hso_end = 5,
+		.vso_begin = 3,
+		.vso_end = 5,
+		.vso_bline = 0,
+		/* vso_eline */
+		.sy_val	= 8,
+		.sy_val_present = true,
+		.sy2_val = 0x1d8,
+		.sy2_val_present = true,
+		.max_lncnt = 524,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_576p = {
+	.encp = {
+		.dvi_settings = 0x21,
+		.video_mode = 0x4000,
+		.video_mode_adv = 0x9,
+		.video_prog_mode = 0,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 7,
+		.video_sync_mode_present = true,
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		.video_filt_ctrl = 0x52,
+		.video_filt_ctrl_present = true,
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 235,
+		.yfp2_htime = 1674,
+		.max_pxcnt = 1727,
+		.hspuls_begin = 0,
+		.hspuls_end = 0x80,
+		.hspuls_switch = 88,
+		.vspuls_begin = 0,
+		.vspuls_end = 1599,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 235,
+		.havon_end = 1674,
+		.vavon_bline = 44,
+		.vavon_eline = 619,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 0x80,
+		.hso_end = 0,
+		.vso_begin = 0,
+		.vso_end = 5,
+		.vso_bline = 0,
+		/* vso_eline */
+		.sy_val	= 8,
+		.sy_val_present = true,
+		.sy2_val = 0x1d8,
+		.sy2_val_present = true,
+		.max_lncnt = 624,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_720p60 = {
+	.encp = {
+		.dvi_settings = 0x2029,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x19,
+		/* video_prog_mode */
+		/* video_sync_mode */
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		/* video_filt_ctrl */
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 648,
+		.yfp2_htime = 3207,
+		.max_pxcnt = 3299,
+		.hspuls_begin = 80,
+		.hspuls_end = 240,
+		.hspuls_switch = 80,
+		.vspuls_begin = 688,
+		.vspuls_end = 3248,
+		.vspuls_bline = 4,
+		.vspuls_eline = 8,
+		.havon_begin = 648,
+		.havon_end = 3207,
+		.vavon_bline = 29,
+		.vavon_eline = 748,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 256,
+		.hso_end = 168,
+		.vso_begin = 168,
+		.vso_end = 256,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 749,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_720p50 = {
+	.encp = {
+		.dvi_settings = 0x202d,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x19,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 0x407,
+		.video_sync_mode_present = true,
+		.video_yc_dly = 0,
+		.video_yc_dly_present = true,
+		/* video_rgb_ctrl */
+		/* video_filt_ctrl */
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 648,
+		.yfp2_htime = 3207,
+		.max_pxcnt = 3959,
+		.hspuls_begin = 80,
+		.hspuls_end = 240,
+		.hspuls_switch = 80,
+		.vspuls_begin = 688,
+		.vspuls_end = 3248,
+		.vspuls_bline = 4,
+		.vspuls_eline = 8,
+		.havon_begin = 648,
+		.havon_end = 3207,
+		.vavon_bline = 29,
+		.vavon_eline = 748,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 128,
+		.hso_end = 208,
+		.vso_begin = 128,
+		.vso_end = 128,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 749,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080i60 = {
+	.encp = {
+		.dvi_settings = 0x2029,
+		.video_mode = 0x5ffc,
+		.video_mode_adv = 0x19,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 0x207,
+		.video_sync_mode_present = true,
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		/* video_filt_ctrl */
+		.video_ofld_voav_ofst = 0x11,
+		.video_ofld_voav_ofst_present = true,
+		.yfp1_htime = 516,
+		.yfp2_htime = 4355,
+		.max_pxcnt = 4399,
+		.hspuls_begin = 88,
+		.hspuls_end = 264,
+		.hspuls_switch = 88,
+		.vspuls_begin = 440,
+		.vspuls_end = 2200,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 516,
+		.havon_end = 4355,
+		.vavon_bline = 20,
+		.vavon_eline = 559,
+		.eqpuls_begin = 2288,
+		.eqpuls_begin_present = true,
+		.eqpuls_end = 2464,
+		.eqpuls_end_present = true,
+		.eqpuls_bline = 0,
+		.eqpuls_bline_present = true,
+		.eqpuls_eline = 4,
+		.eqpuls_eline_present = true,
+		.hso_begin = 264,
+		.hso_end = 176,
+		.vso_begin = 88,
+		.vso_end = 88,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080i50 = {
+	.encp = {
+		.dvi_settings = 0x202d,
+		.video_mode = 0x5ffc,
+		.video_mode_adv = 0x19,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 0x7,
+		.video_sync_mode_present = true,
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		/* video_filt_ctrl */
+		.video_ofld_voav_ofst = 0x11,
+		.video_ofld_voav_ofst_present = true,
+		.yfp1_htime = 526,
+		.yfp2_htime = 4365,
+		.max_pxcnt = 5279,
+		.hspuls_begin = 88,
+		.hspuls_end = 264,
+		.hspuls_switch = 88,
+		.vspuls_begin = 440,
+		.vspuls_end = 2200,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 526,
+		.havon_end = 4365,
+		.vavon_bline = 20,
+		.vavon_eline = 559,
+		.eqpuls_begin = 2288,
+		.eqpuls_begin_present = true,
+		.eqpuls_end = 2464,
+		.eqpuls_end_present = true,
+		.eqpuls_bline = 0,
+		.eqpuls_bline_present = true,
+		.eqpuls_eline = 4,
+		.eqpuls_eline_present = true,
+		.hso_begin = 142,
+		.hso_end = 230,
+		.vso_begin = 142,
+		.vso_end = 142,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080p24 = {
+	.encp = {
+		.dvi_settings = 0xd,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x18,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 0x7,
+		.video_sync_mode_present = true,
+		.video_yc_dly = 0,
+		.video_yc_dly_present = true,
+		.video_rgb_ctrl = 2,
+		.video_rgb_ctrl_present = true,
+		.video_filt_ctrl = 0x1052,
+		.video_filt_ctrl_present = true,
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 271,
+		.yfp2_htime = 2190,
+		.max_pxcnt = 2749,
+		.hspuls_begin = 44,
+		.hspuls_end = 132,
+		.hspuls_switch = 44,
+		.vspuls_begin = 220,
+		.vspuls_end = 2140,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 271,
+		.havon_end = 2190,
+		.vavon_bline = 41,
+		.vavon_eline = 1120,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		.eqpuls_bline = 0,
+		.eqpuls_bline_present = true,
+		.eqpuls_eline = 4,
+		.eqpuls_eline_present = true,
+		.hso_begin = 79,
+		.hso_end = 123,
+		.vso_begin = 79,
+		.vso_end = 79,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080p30 = {
+	.encp = {
+		.dvi_settings = 0x1,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x18,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		/* video_sync_mode */
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		.video_filt_ctrl = 0x1052,
+		.video_filt_ctrl_present = true,
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 140,
+		.yfp2_htime = 2060,
+		.max_pxcnt = 2199,
+		.hspuls_begin = 2156,
+		.hspuls_end = 44,
+		.hspuls_switch = 44,
+		.vspuls_begin = 140,
+		.vspuls_end = 2059,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 148,
+		.havon_end = 2067,
+		.vavon_bline = 41,
+		.vavon_eline = 1120,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 44,
+		.hso_end = 2156,
+		.vso_begin = 2100,
+		.vso_end = 2164,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080p50 = {
+	.encp = {
+		.dvi_settings = 0xd,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x18,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		.video_sync_mode = 0x7,
+		.video_sync_mode_present = true,
+		.video_yc_dly = 0,
+		.video_yc_dly_present = true,
+		.video_rgb_ctrl = 2,
+		.video_rgb_ctrl_present = true,
+		/* video_filt_ctrl */
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 271,
+		.yfp2_htime = 2190,
+		.max_pxcnt = 2639,
+		.hspuls_begin = 44,
+		.hspuls_end = 132,
+		.hspuls_switch = 44,
+		.vspuls_begin = 220,
+		.vspuls_end = 2140,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 271,
+		.havon_end = 2190,
+		.vavon_bline = 41,
+		.vavon_eline = 1120,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		.eqpuls_bline = 0,
+		.eqpuls_bline_present = true,
+		.eqpuls_eline = 4,
+		.eqpuls_eline_present = true,
+		.hso_begin = 79,
+		.hso_end = 123,
+		.vso_begin = 79,
+		.vso_end = 79,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+union meson_hdmi_venc_mode meson_hdmi_encp_mode_1080p60 = {
+	.encp = {
+		.dvi_settings = 0x1,
+		.video_mode = 0x4040,
+		.video_mode_adv = 0x18,
+		.video_prog_mode = 0x100,
+		.video_prog_mode_present = true,
+		/* video_sync_mode */
+		/* video_yc_dly */
+		/* video_rgb_ctrl */
+		.video_filt_ctrl = 0x1052,
+		.video_filt_ctrl_present = true,
+		/* video_ofld_voav_ofst */
+		.yfp1_htime = 140,
+		.yfp2_htime = 2060,
+		.max_pxcnt = 2199,
+		.hspuls_begin = 2156,
+		.hspuls_end = 44,
+		.hspuls_switch = 44,
+		.vspuls_begin = 140,
+		.vspuls_end = 2059,
+		.vspuls_bline = 0,
+		.vspuls_eline = 4,
+		.havon_begin = 148,
+		.havon_end = 2067,
+		.vavon_bline = 41,
+		.vavon_eline = 1120,
+		/* eqpuls_begin */
+		/* eqpuls_end */
+		/* eqpuls_bline */
+		/* eqpuls_eline */
+		.hso_begin = 44,
+		.hso_end = 2156,
+		.vso_begin = 2100,
+		.vso_end = 2164,
+		.vso_bline = 0,
+		.vso_eline = 5,
+		.vso_eline_present = true,
+		/* sy_val */
+		/* sy2_val */
+		.max_lncnt = 1124,
+	},
+};
+
+struct meson_hdmi_venc_vic_mode {
+	unsigned int vic;
+	union meson_hdmi_venc_mode *mode;
+} meson_hdmi_venc_vic_modes[] = {
+	{ 6, &meson_hdmi_enci_mode_480i },
+	{ 7, &meson_hdmi_enci_mode_480i },
+	{ 21, &meson_hdmi_enci_mode_576i },
+	{ 22, &meson_hdmi_enci_mode_576i },
+	{ 2, &meson_hdmi_encp_mode_480p },
+	{ 3, &meson_hdmi_encp_mode_480p },
+	{ 17, &meson_hdmi_encp_mode_576p },
+	{ 18, &meson_hdmi_encp_mode_576p },
+	{ 4, &meson_hdmi_encp_mode_720p60 },
+	{ 19, &meson_hdmi_encp_mode_720p50 },
+	{ 5, &meson_hdmi_encp_mode_1080i60 },
+	{ 20, &meson_hdmi_encp_mode_1080i50 },
+	{ 32, &meson_hdmi_encp_mode_1080p24 },
+	{ 34, &meson_hdmi_encp_mode_1080p30 },
+	{ 31, &meson_hdmi_encp_mode_1080p50 },
+	{ 16, &meson_hdmi_encp_mode_1080p60 },
+	{ 0, NULL}, /* sentinel */
+};
+
+static signed int to_signed(unsigned int a)
+{
+	if (a <= 7)
+		return a;
+	else
+		return a - 16;
+}
+
+static unsigned long modulo(unsigned long a, unsigned long b)
+{
+	if (a >= b)
+		return a - b;
+	else
+		return a;
+}
+
+bool meson_venc_hdmi_supported_vic(int vic)
+{
+	struct meson_hdmi_venc_vic_mode *vmode = meson_hdmi_venc_vic_modes;
+
+	while (vmode->vic && vmode->mode) {
+		if (vmode->vic == vic)
+			return true;
+		vmode++;
+	}
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(meson_venc_hdmi_supported_vic);
+
+static union meson_hdmi_venc_mode *meson_venc_hdmi_get_vic_vmode(int vic)
+{
+	struct meson_hdmi_venc_vic_mode *vmode = meson_hdmi_venc_vic_modes;
+
+	while (vmode->vic && vmode->mode) {
+		if (vmode->vic == vic)
+			return vmode->mode;
+		vmode++;
+	}
+
+	return NULL;
+}
+
+bool meson_venc_hdmi_venc_repeat(int vic)
+{
+	/* Repeat VENC pixels for 480/576i/p, 720p50/60 and 1080p50/60 */
+	if (vic == 6 || vic == 7 || /* 480i */
+	    vic == 21 || vic == 22 || /* 576i */
+	    vic == 17 || vic == 18 || /* 576p */
+	    vic == 2 || vic == 3 || /* 480p */
+	    vic == 4 || /* 720p60 */
+	    vic == 19 || /* 720p50 */
+	    vic == 5 || /* 1080i60 */
+	    vic == 20)	/* 1080i50 */
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(meson_venc_hdmi_venc_repeat);
+
+void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic,
+			      struct drm_display_mode *mode)
+{
+	union meson_hdmi_venc_mode *vmode = NULL;
+	bool use_enci = false;
+	bool venc_repeat = false;
+	bool hdmi_repeat = false;
+	unsigned int venc_hdmi_latency = 2;
+	unsigned long total_pixels_venc = 0;
+	unsigned long active_pixels_venc = 0;
+	unsigned long front_porch_venc = 0;
+	unsigned long hsync_pixels_venc = 0;
+	unsigned long de_h_begin = 0;
+	unsigned long de_h_end = 0;
+	unsigned long de_v_begin_even = 0;
+	unsigned long de_v_end_even = 0;
+	unsigned long de_v_begin_odd = 0;
+	unsigned long de_v_end_odd = 0;
+	unsigned long hs_begin = 0;
+	unsigned long hs_end = 0;
+	unsigned long vs_adjust = 0;
+	unsigned long vs_bline_evn = 0;
+	unsigned long vs_eline_evn = 0;
+	unsigned long vs_bline_odd = 0;
+	unsigned long vs_eline_odd = 0;
+	unsigned long vso_begin_evn = 0;
+	unsigned long vso_begin_odd = 0;
+	unsigned int eof_lines;
+	unsigned int sof_lines;
+	unsigned int vsync_lines;
+
+	vmode = meson_venc_hdmi_get_vic_vmode(vic);
+	if (!vmode) {
+		dev_err(priv->dev, "%s: Fatal Error, unsupported vic %d\n",
+			__func__, vic);
+		return;
+	}
+
+	/* Use VENCI for 480i and 576i and double HDMI pixels */
+	if (mode->flags & DRM_MODE_FLAG_DBLCLK) {
+		hdmi_repeat = true;
+		use_enci = true;
+		venc_hdmi_latency = 1;
+	}
+
+	/* Repeat VENC pixels for 480/576i/p, 720p50/60 and 1080p50/60 */
+	if (meson_venc_hdmi_venc_repeat(vic))
+		venc_repeat = true;
+
+	eof_lines = mode->vsync_start - mode->vdisplay;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		eof_lines /= 2;
+	sof_lines = mode->vtotal - mode->vsync_end;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		sof_lines /= 2;
+	vsync_lines = mode->vsync_end - mode->vsync_start;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+		vsync_lines /= 2;
+
+	total_pixels_venc = mode->htotal;
+	if (hdmi_repeat)
+		total_pixels_venc /= 2;
+	if (venc_repeat)
+		total_pixels_venc *= 2;
+
+	active_pixels_venc = mode->hdisplay;
+	if (hdmi_repeat)
+		active_pixels_venc /= 2;
+	if (venc_repeat)
+		active_pixels_venc *= 2;
+
+	front_porch_venc = (mode->hsync_start - mode->hdisplay);
+	if (hdmi_repeat)
+		front_porch_venc /= 2;
+	if (venc_repeat)
+		front_porch_venc *= 2;
+
+	hsync_pixels_venc = (mode->hsync_end - mode->hsync_start);
+	if (hdmi_repeat)
+		hsync_pixels_venc /= 2;
+	if (venc_repeat)
+		hsync_pixels_venc *= 2;
+
+	/* Disable VDACs */
+	writel_bits_relaxed(0x1f, 0x1f,
+			priv->io_base + _REG(VENC_VDAC_SETTING));
+
+	writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_EN));
+	writel_relaxed(0, priv->io_base + _REG(ENCP_VIDEO_EN));
+
+	if (use_enci) {
+		unsigned int lines_f0;
+		unsigned int lines_f1;
+
+		/* CVBS Filter settings */
+		writel_relaxed(0x12, priv->io_base + _REG(ENCI_CFILT_CTRL));
+		writel_relaxed(0x12, priv->io_base + _REG(ENCI_CFILT_CTRL2));
+
+		/* Digital Video Select : Interlace, clk27 clk, external */
+		writel_relaxed(0, priv->io_base + _REG(VENC_DVI_SETTING));
+
+		/* Reset Video Mode */
+		writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_MODE));
+		writel_relaxed(0, priv->io_base + _REG(ENCI_VIDEO_MODE_ADV));
+
+		/* Horizontal sync signal output */
+		writel_relaxed(vmode->enci.hso_begin,
+				priv->io_base + _REG(ENCI_SYNC_HSO_BEGIN));
+		writel_relaxed(vmode->enci.hso_end,
+				priv->io_base + _REG(ENCI_SYNC_HSO_END));
+
+		/* Vertical Sync lines */
+		writel_relaxed(vmode->enci.vso_even,
+				priv->io_base + _REG(ENCI_SYNC_VSO_EVNLN));
+		writel_relaxed(vmode->enci.vso_odd,
+				priv->io_base + _REG(ENCI_SYNC_VSO_ODDLN));
+
+		/* Macrovision max amplitude change */
+		writel_relaxed(vmode->enci.macv_max_amp,
+				priv->io_base + _REG(ENCI_MACV_MAX_AMP));
+
+		/* Video mode */
+		writel_relaxed(vmode->enci.video_prog_mode,
+				priv->io_base + _REG(VENC_VIDEO_PROG_MODE));
+		writel_relaxed(vmode->enci.video_mode,
+				priv->io_base + _REG(ENCI_VIDEO_MODE));
+
+		/* Advanced Video Mode :
+		 * Demux shifting 0x2
+		 * Blank line end at line17/22
+		 * High bandwidth Luma Filter
+		 * Low bandwidth Chroma Filter
+		 * Bypass luma low pass filter
+		 * No macrovision on CSYNC
+		 */
+		writel_relaxed(0x26, priv->io_base + _REG(ENCI_VIDEO_MODE_ADV));
+
+		writel(vmode->enci.sch_adjust,
+				priv->io_base + _REG(ENCI_VIDEO_SCH));
+
+		/* Sync mode : MASTER Master mode, free run, send HSO/VSO out */
+		writel_relaxed(0x07, priv->io_base + _REG(ENCI_SYNC_MODE));
+
+		if (vmode->enci.yc_delay)
+			writel_relaxed(vmode->enci.yc_delay,
+					priv->io_base + _REG(ENCI_YC_DELAY));
+
+
+		/* UNreset Interlaced TV Encoder */
+		writel_relaxed(0, priv->io_base + _REG(ENCI_DBG_PX_RST));
+
+		/* Enable Vfifo2vd, Y_Cb_Y_Cr select */
+		writel_relaxed(0x4e01, priv->io_base + _REG(ENCI_VFIFO2VD_CTL));
+
+		/* Timings */
+		writel_relaxed(vmode->enci.pixel_start,
+			priv->io_base + _REG(ENCI_VFIFO2VD_PIXEL_START));
+		writel_relaxed(vmode->enci.pixel_end,
+			priv->io_base + _REG(ENCI_VFIFO2VD_PIXEL_END));
+
+		writel_relaxed(vmode->enci.top_field_line_start,
+			priv->io_base + _REG(ENCI_VFIFO2VD_LINE_TOP_START));
+		writel_relaxed(vmode->enci.top_field_line_end,
+			priv->io_base + _REG(ENCI_VFIFO2VD_LINE_TOP_END));
+
+		writel_relaxed(vmode->enci.bottom_field_line_start,
+			priv->io_base + _REG(ENCI_VFIFO2VD_LINE_BOT_START));
+		writel_relaxed(vmode->enci.bottom_field_line_end,
+			priv->io_base + _REG(ENCI_VFIFO2VD_LINE_BOT_END));
+
+		/* Select ENCI for VIU */
+		meson_vpp_setup_mux(priv, MESON_VIU_VPP_MUX_ENCI);
+
+		/* Interlace video enable */
+		writel_relaxed(1, priv->io_base + _REG(ENCI_VIDEO_EN));
+
+		lines_f0 = mode->vtotal >> 1;
+		lines_f1 = lines_f0 + 1;
+
+		de_h_begin = modulo(readl_relaxed(priv->io_base +
+					_REG(ENCI_VFIFO2VD_PIXEL_START))
+					+ venc_hdmi_latency,
+				    total_pixels_venc);
+		de_h_end  = modulo(de_h_begin + active_pixels_venc,
+				   total_pixels_venc);
+
+		writel_relaxed(de_h_begin,
+				priv->io_base + _REG(ENCI_DE_H_BEGIN));
+		writel_relaxed(de_h_end,
+				priv->io_base + _REG(ENCI_DE_H_END));
+
+		de_v_begin_even = readl_relaxed(priv->io_base +
+					_REG(ENCI_VFIFO2VD_LINE_TOP_START));
+		de_v_end_even  = de_v_begin_even + mode->vdisplay;
+		de_v_begin_odd = readl_relaxed(priv->io_base +
+					_REG(ENCI_VFIFO2VD_LINE_BOT_START));
+		de_v_end_odd = de_v_begin_odd + mode->vdisplay;
+
+		writel_relaxed(de_v_begin_even,
+				priv->io_base + _REG(ENCI_DE_V_BEGIN_EVEN));
+		writel_relaxed(de_v_end_even,
+				priv->io_base + _REG(ENCI_DE_V_END_EVEN));
+		writel_relaxed(de_v_begin_odd,
+				priv->io_base + _REG(ENCI_DE_V_BEGIN_ODD));
+		writel_relaxed(de_v_end_odd,
+				priv->io_base + _REG(ENCI_DE_V_END_ODD));
+
+		/* Program Hsync timing */
+		hs_begin = de_h_end + front_porch_venc;
+		if (de_h_end + front_porch_venc >= total_pixels_venc) {
+			hs_begin -= total_pixels_venc;
+			vs_adjust  = 1;
+		} else {
+			hs_begin = de_h_end + front_porch_venc;
+			vs_adjust  = 0;
+		}
+
+		hs_end = modulo(hs_begin + hsync_pixels_venc,
+				total_pixels_venc);
+		writel_relaxed(hs_begin,
+				priv->io_base + _REG(ENCI_DVI_HSO_BEGIN));
+		writel_relaxed(hs_end,
+				priv->io_base + _REG(ENCI_DVI_HSO_END));
+
+		/* Program Vsync timing for even field */
+		if (((de_v_end_odd - 1) + eof_lines + vs_adjust) >= lines_f1) {
+			vs_bline_evn = (de_v_end_odd - 1)
+					+ eof_lines
+					+ vs_adjust
+					- lines_f1;
+			vs_eline_evn = vs_bline_evn + vsync_lines;
+
+			writel_relaxed(vs_bline_evn,
+				priv->io_base + _REG(ENCI_DVI_VSO_BLINE_EVN));
+
+			writel_relaxed(vs_eline_evn,
+				priv->io_base + _REG(ENCI_DVI_VSO_ELINE_EVN));
+
+			writel_relaxed(hs_begin,
+				priv->io_base + _REG(ENCI_DVI_VSO_BEGIN_EVN));
+			writel_relaxed(hs_begin,
+				priv->io_base + _REG(ENCI_DVI_VSO_END_EVN));
+		} else {
+			vs_bline_odd = (de_v_end_odd - 1)
+					+ eof_lines
+					+ vs_adjust;
+
+			writel_relaxed(vs_bline_odd,
+				priv->io_base + _REG(ENCI_DVI_VSO_BLINE_ODD));
+
+			writel_relaxed(hs_begin,
+				priv->io_base + _REG(ENCI_DVI_VSO_BEGIN_ODD));
+
+			if ((vs_bline_odd + vsync_lines) >= lines_f1) {
+				vs_eline_evn = vs_bline_odd
+						+ vsync_lines
+						- lines_f1;
+
+				writel_relaxed(vs_eline_evn, priv->io_base
+						+ _REG(ENCI_DVI_VSO_ELINE_EVN));
+
+				writel_relaxed(hs_begin, priv->io_base
+						+ _REG(ENCI_DVI_VSO_END_EVN));
+			} else {
+				vs_eline_odd = vs_bline_odd
+						+ vsync_lines;
+
+				writel_relaxed(vs_eline_odd, priv->io_base
+						+ _REG(ENCI_DVI_VSO_ELINE_ODD));
+
+				writel_relaxed(hs_begin, priv->io_base
+						+ _REG(ENCI_DVI_VSO_END_ODD));
+			}
+		}
+
+		/* Program Vsync timing for odd field */
+		if (((de_v_end_even - 1) + (eof_lines + 1)) >= lines_f0) {
+			vs_bline_odd = (de_v_end_even - 1)
+					+ (eof_lines + 1)
+					- lines_f0;
+			vs_eline_odd = vs_bline_odd + vsync_lines;
+
+			writel_relaxed(vs_bline_odd,
+				priv->io_base + _REG(ENCI_DVI_VSO_BLINE_ODD));
+
+			writel_relaxed(vs_eline_odd,
+				priv->io_base + _REG(ENCI_DVI_VSO_ELINE_ODD));
+
+			vso_begin_odd  = modulo(hs_begin
+						+ (total_pixels_venc >> 1),
+						total_pixels_venc);
+
+			writel_relaxed(vso_begin_odd,
+				priv->io_base + _REG(ENCI_DVI_VSO_BEGIN_ODD));
+			writel_relaxed(vso_begin_odd,
+				priv->io_base + _REG(ENCI_DVI_VSO_END_ODD));
+		} else {
+			vs_bline_evn = (de_v_end_even - 1)
+					+ (eof_lines + 1);
+
+			writel_relaxed(vs_bline_evn,
+				priv->io_base + _REG(ENCI_DVI_VSO_BLINE_EVN));
+
+			vso_begin_evn  = modulo(hs_begin
+						+ (total_pixels_venc >> 1),
+						total_pixels_venc);
+
+			writel_relaxed(vso_begin_evn, priv->io_base
+					+ _REG(ENCI_DVI_VSO_BEGIN_EVN));
+
+			if (vs_bline_evn + vsync_lines >= lines_f0) {
+				vs_eline_odd = vs_bline_evn
+						+ vsync_lines
+						- lines_f0;
+
+				writel_relaxed(vs_eline_odd, priv->io_base
+						+ _REG(ENCI_DVI_VSO_ELINE_ODD));
+
+				writel_relaxed(vso_begin_evn, priv->io_base
+						+ _REG(ENCI_DVI_VSO_END_ODD));
+			} else {
+				vs_eline_evn = vs_bline_evn + vsync_lines;
+
+				writel_relaxed(vs_eline_evn, priv->io_base
+						+ _REG(ENCI_DVI_VSO_ELINE_EVN));
+
+				writel_relaxed(vso_begin_evn, priv->io_base
+						+ _REG(ENCI_DVI_VSO_END_EVN));
+			}
+		}
+	} else {
+		writel_relaxed(vmode->encp.dvi_settings,
+				priv->io_base + _REG(VENC_DVI_SETTING));
+		writel_relaxed(vmode->encp.video_mode,
+				priv->io_base + _REG(ENCP_VIDEO_MODE));
+		writel_relaxed(vmode->encp.video_mode_adv,
+				priv->io_base + _REG(ENCP_VIDEO_MODE_ADV));
+		if (vmode->encp.video_prog_mode_present)
+			writel_relaxed(vmode->encp.video_prog_mode,
+				priv->io_base + _REG(VENC_VIDEO_PROG_MODE));
+		if (vmode->encp.video_sync_mode_present)
+			writel_relaxed(vmode->encp.video_sync_mode,
+				priv->io_base + _REG(ENCP_VIDEO_SYNC_MODE));
+		if (vmode->encp.video_yc_dly_present)
+			writel_relaxed(vmode->encp.video_yc_dly,
+				priv->io_base + _REG(ENCP_VIDEO_YC_DLY));
+		if (vmode->encp.video_rgb_ctrl_present)
+			writel_relaxed(vmode->encp.video_rgb_ctrl,
+				priv->io_base + _REG(ENCP_VIDEO_RGB_CTRL));
+		if (vmode->encp.video_filt_ctrl_present)
+			writel_relaxed(vmode->encp.video_filt_ctrl,
+				priv->io_base + _REG(ENCP_VIDEO_FILT_CTRL));
+		if (vmode->encp.video_ofld_voav_ofst_present)
+			writel_relaxed(vmode->encp.video_ofld_voav_ofst,
+				priv->io_base
+				+ _REG(ENCP_VIDEO_OFLD_VOAV_OFST));
+		writel_relaxed(vmode->encp.yfp1_htime,
+				priv->io_base + _REG(ENCP_VIDEO_YFP1_HTIME));
+		writel_relaxed(vmode->encp.yfp2_htime,
+				priv->io_base + _REG(ENCP_VIDEO_YFP2_HTIME));
+		writel_relaxed(vmode->encp.max_pxcnt,
+				priv->io_base + _REG(ENCP_VIDEO_MAX_PXCNT));
+		writel_relaxed(vmode->encp.hspuls_begin,
+				priv->io_base + _REG(ENCP_VIDEO_HSPULS_BEGIN));
+		writel_relaxed(vmode->encp.hspuls_end,
+				priv->io_base + _REG(ENCP_VIDEO_HSPULS_END));
+		writel_relaxed(vmode->encp.hspuls_switch,
+				priv->io_base + _REG(ENCP_VIDEO_HSPULS_SWITCH));
+		writel_relaxed(vmode->encp.vspuls_begin,
+				priv->io_base + _REG(ENCP_VIDEO_VSPULS_BEGIN));
+		writel_relaxed(vmode->encp.vspuls_end,
+				priv->io_base + _REG(ENCP_VIDEO_VSPULS_END));
+		writel_relaxed(vmode->encp.vspuls_bline,
+				priv->io_base + _REG(ENCP_VIDEO_VSPULS_BLINE));
+		writel_relaxed(vmode->encp.vspuls_eline,
+				priv->io_base + _REG(ENCP_VIDEO_VSPULS_ELINE));
+		if (vmode->encp.eqpuls_begin_present)
+			writel_relaxed(vmode->encp.eqpuls_begin,
+				priv->io_base + _REG(ENCP_VIDEO_EQPULS_BEGIN));
+		if (vmode->encp.eqpuls_end_present)
+			writel_relaxed(vmode->encp.eqpuls_end,
+				priv->io_base + _REG(ENCP_VIDEO_EQPULS_END));
+		if (vmode->encp.eqpuls_bline_present)
+			writel_relaxed(vmode->encp.eqpuls_bline,
+				priv->io_base + _REG(ENCP_VIDEO_EQPULS_BLINE));
+		if (vmode->encp.eqpuls_eline_present)
+			writel_relaxed(vmode->encp.eqpuls_eline,
+				priv->io_base + _REG(ENCP_VIDEO_EQPULS_ELINE));
+		writel_relaxed(vmode->encp.havon_begin,
+				priv->io_base + _REG(ENCP_VIDEO_HAVON_BEGIN));
+		writel_relaxed(vmode->encp.havon_end,
+				priv->io_base + _REG(ENCP_VIDEO_HAVON_END));
+		writel_relaxed(vmode->encp.vavon_bline,
+				priv->io_base + _REG(ENCP_VIDEO_VAVON_BLINE));
+		writel_relaxed(vmode->encp.vavon_eline,
+				priv->io_base + _REG(ENCP_VIDEO_VAVON_ELINE));
+		writel_relaxed(vmode->encp.hso_begin,
+				priv->io_base + _REG(ENCP_VIDEO_HSO_BEGIN));
+		writel_relaxed(vmode->encp.hso_end,
+				priv->io_base + _REG(ENCP_VIDEO_HSO_END));
+		writel_relaxed(vmode->encp.vso_begin,
+				priv->io_base + _REG(ENCP_VIDEO_VSO_BEGIN));
+		writel_relaxed(vmode->encp.vso_end,
+				priv->io_base + _REG(ENCP_VIDEO_VSO_END));
+		writel_relaxed(vmode->encp.vso_bline,
+				priv->io_base + _REG(ENCP_VIDEO_VSO_BLINE));
+		if (vmode->encp.vso_eline_present)
+			writel_relaxed(vmode->encp.vso_eline,
+				priv->io_base + _REG(ENCP_VIDEO_VSO_ELINE));
+		if (vmode->encp.sy_val_present)
+			writel_relaxed(vmode->encp.sy_val,
+				priv->io_base + _REG(ENCP_VIDEO_SY_VAL));
+		if (vmode->encp.sy2_val_present)
+			writel_relaxed(vmode->encp.sy2_val,
+				priv->io_base + _REG(ENCP_VIDEO_SY2_VAL));
+		writel_relaxed(vmode->encp.max_lncnt,
+				priv->io_base + _REG(ENCP_VIDEO_MAX_LNCNT));
+
+		writel_relaxed(1, priv->io_base + _REG(ENCP_VIDEO_EN));
+
+		/* Set DE signal’s polarity is active high */
+		writel_bits_relaxed(BIT(14), BIT(14),
+				    priv->io_base + _REG(ENCP_VIDEO_MODE));
+
+		/* Program DE timing */
+		de_h_begin = modulo(readl_relaxed(priv->io_base +
+					_REG(ENCP_VIDEO_HAVON_BEGIN))
+					+ venc_hdmi_latency,
+				    total_pixels_venc);
+		de_h_end = modulo(de_h_begin + active_pixels_venc,
+				  total_pixels_venc);
+
+		writel_relaxed(de_h_begin,
+				priv->io_base + _REG(ENCP_DE_H_BEGIN));
+		writel_relaxed(de_h_end,
+				priv->io_base + _REG(ENCP_DE_H_END));
+
+		/* Program DE timing for even field */
+		de_v_begin_even = readl_relaxed(priv->io_base
+						+ _REG(ENCP_VIDEO_VAVON_BLINE));
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+			de_v_end_even = de_v_begin_even +
+					(mode->vdisplay / 2);
+		else
+			de_v_end_even = de_v_begin_even + mode->vdisplay;
+
+		writel_relaxed(de_v_begin_even,
+				priv->io_base + _REG(ENCP_DE_V_BEGIN_EVEN));
+		writel_relaxed(de_v_end_even,
+				priv->io_base + _REG(ENCP_DE_V_END_EVEN));
+
+		/* Program DE timing for odd field if needed */
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+			unsigned int ofld_voav_ofst =
+				readl_relaxed(priv->io_base +
+					_REG(ENCP_VIDEO_OFLD_VOAV_OFST));
+			de_v_begin_odd = to_signed((ofld_voav_ofst & 0xf0) >> 4)
+						+ de_v_begin_even
+						+ ((mode->vtotal - 1) / 2);
+			de_v_end_odd = de_v_begin_odd + (mode->vdisplay / 2);
+
+			writel_relaxed(de_v_begin_odd,
+				priv->io_base + _REG(ENCP_DE_V_BEGIN_ODD));
+			writel_relaxed(de_v_end_odd,
+				priv->io_base + _REG(ENCP_DE_V_END_ODD));
+		}
+
+		/* Program Hsync timing */
+		if ((de_h_end + front_porch_venc) >= total_pixels_venc) {
+			hs_begin = de_h_end
+				   + front_porch_venc
+				   - total_pixels_venc;
+			vs_adjust  = 1;
+		} else {
+			hs_begin = de_h_end
+				   + front_porch_venc;
+			vs_adjust  = 0;
+		}
+
+		hs_end = modulo(hs_begin + hsync_pixels_venc,
+				total_pixels_venc);
+
+		writel_relaxed(hs_begin,
+				priv->io_base + _REG(ENCP_DVI_HSO_BEGIN));
+		writel_relaxed(hs_end,
+				priv->io_base + _REG(ENCP_DVI_HSO_END));
+
+		/* Program Vsync timing for even field */
+		if (de_v_begin_even >=
+				(sof_lines + vsync_lines + (1 - vs_adjust)))
+			vs_bline_evn = de_v_begin_even
+					- sof_lines
+					- vsync_lines
+					- (1 - vs_adjust);
+		else
+			vs_bline_evn = mode->vtotal
+					+ de_v_begin_even
+					- sof_lines
+					- vsync_lines
+					- (1 - vs_adjust);
+
+		vs_eline_evn = modulo(vs_bline_evn + vsync_lines,
+					mode->vtotal);
+
+		writel_relaxed(vs_bline_evn,
+				priv->io_base + _REG(ENCP_DVI_VSO_BLINE_EVN));
+		writel_relaxed(vs_eline_evn,
+				priv->io_base + _REG(ENCP_DVI_VSO_ELINE_EVN));
+
+		vso_begin_evn = hs_begin;
+		writel_relaxed(vso_begin_evn,
+				priv->io_base + _REG(ENCP_DVI_VSO_BEGIN_EVN));
+		writel_relaxed(vso_begin_evn,
+				priv->io_base + _REG(ENCP_DVI_VSO_END_EVN));
+
+		/* Program Vsync timing for odd field if needed */
+		if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+			vs_bline_odd = (de_v_begin_odd - 1)
+					- sof_lines
+					- vsync_lines;
+			vs_eline_odd = (de_v_begin_odd - 1)
+					- vsync_lines;
+			vso_begin_odd  = modulo(hs_begin
+						+ (total_pixels_venc >> 1),
+						total_pixels_venc);
+
+			writel_relaxed(vs_bline_odd,
+				priv->io_base + _REG(ENCP_DVI_VSO_BLINE_ODD));
+			writel_relaxed(vs_eline_odd,
+				priv->io_base + _REG(ENCP_DVI_VSO_ELINE_ODD));
+			writel_relaxed(vso_begin_odd,
+				priv->io_base + _REG(ENCP_DVI_VSO_BEGIN_ODD));
+			writel_relaxed(vso_begin_odd,
+				priv->io_base + _REG(ENCP_DVI_VSO_END_ODD));
+		}
+
+		/* Select ENCP for VIU */
+		meson_vpp_setup_mux(priv, MESON_VIU_VPP_MUX_ENCP);
+	}
+
+	writel_relaxed((use_enci ? 1 : 2) |
+		       (mode->flags & DRM_MODE_FLAG_PHSYNC ? 1 << 2 : 0) |
+		       (mode->flags & DRM_MODE_FLAG_PVSYNC ? 1 << 3 : 0) |
+		       4 << 5 |
+		       (venc_repeat ? 1 << 8 : 0) |
+		       (hdmi_repeat ? 1 << 12 : 0),
+		       priv->io_base + _REG(VPU_HDMI_SETTING));
+
+	priv->venc.hdmi_repeat = hdmi_repeat;
+	priv->venc.venc_repeat = venc_repeat;
+	priv->venc.hdmi_use_enci = use_enci;
+
+	priv->venc.current_mode = MESON_VENC_MODE_HDMI;
+}
+EXPORT_SYMBOL_GPL(meson_venc_hdmi_mode_set);
+
 void meson_venci_cvbs_mode_set(struct meson_drm *priv,
 			       struct meson_cvbs_enci_mode *mode)
 {
@@ -223,9 +1468,6 @@ void meson_venci_cvbs_mode_set(struct meson_drm *priv,
 	writel_relaxed(mode->analog_sync_adj,
 			priv->io_base + _REG(ENCI_SYNC_ADJ));
 
-	/* Setup 27MHz vclk2 for ENCI and VDAC */
-	meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS, MESON_VCLK_CVBS);
-
 	priv->venc.current_mode = mode->mode_tag;
 }
 
diff --git a/drivers/gpu/drm/meson/meson_venc.h b/drivers/gpu/drm/meson/meson_venc.h
index 77d4a7d82c44..a1b96e898c14 100644
--- a/drivers/gpu/drm/meson/meson_venc.h
+++ b/drivers/gpu/drm/meson/meson_venc.h
@@ -30,6 +30,7 @@ enum {
 	MESON_VENC_MODE_NONE = 0,
 	MESON_VENC_MODE_CVBS_PAL,
 	MESON_VENC_MODE_CVBS_NTSC,
+	MESON_VENC_MODE_HDMI,
 };
 
 struct meson_cvbs_enci_mode {
@@ -56,12 +57,18 @@ struct meson_cvbs_enci_mode {
 	unsigned int analog_sync_adj;
 };
 
+/* HDMI Clock parameters */
+bool meson_venc_hdmi_supported_vic(int vic);
+bool meson_venc_hdmi_venc_repeat(int vic);
+
 /* CVBS Timings and Parameters */
 extern struct meson_cvbs_enci_mode meson_cvbs_enci_pal;
 extern struct meson_cvbs_enci_mode meson_cvbs_enci_ntsc;
 
 void meson_venci_cvbs_mode_set(struct meson_drm *priv,
 			       struct meson_cvbs_enci_mode *mode);
+void meson_venc_hdmi_mode_set(struct meson_drm *priv, int vic,
+			      struct drm_display_mode *mode);
 unsigned int meson_venci_get_field(struct meson_drm *priv);
 
 void meson_venc_enable_vsync(struct meson_drm *priv);
diff --git a/drivers/gpu/drm/meson/meson_venc_cvbs.c b/drivers/gpu/drm/meson/meson_venc_cvbs.c
index a2bcc70a03ef..00775b397dba 100644
--- a/drivers/gpu/drm/meson/meson_venc_cvbs.c
+++ b/drivers/gpu/drm/meson/meson_venc_cvbs.c
@@ -32,6 +32,7 @@
 
 #include "meson_venc_cvbs.h"
 #include "meson_venc.h"
+#include "meson_vclk.h"
 #include "meson_registers.h"
 
 /* HHI VDAC Registers */
@@ -194,14 +195,20 @@ static void meson_venc_cvbs_encoder_mode_set(struct drm_encoder *encoder,
 {
 	struct meson_venc_cvbs *meson_venc_cvbs =
 					encoder_to_meson_venc_cvbs(encoder);
+	struct meson_drm *priv = meson_venc_cvbs->priv;
 	int i;
 
 	for (i = 0; i < MESON_CVBS_MODES_COUNT; ++i) {
 		struct meson_cvbs_mode *meson_mode = &meson_cvbs_modes[i];
 
 		if (drm_mode_equal(mode, &meson_mode->mode)) {
-			meson_venci_cvbs_mode_set(meson_venc_cvbs->priv,
+			meson_venci_cvbs_mode_set(priv,
 						  meson_mode->enci);
+
+			/* Setup 27MHz vclk2 for ENCI and VDAC */
+			meson_vclk_setup(priv, MESON_VCLK_TARGET_CVBS,
+					 MESON_VCLK_CVBS, MESON_VCLK_CVBS,
+					 MESON_VCLK_CVBS, true);
 			break;
 		}
 	}
@@ -217,25 +224,14 @@ static const struct drm_encoder_helper_funcs
 
 static bool meson_venc_cvbs_connector_is_available(struct meson_drm *priv)
 {
-	struct device_node *ep, *remote;
+	struct device_node *remote;
 
-	/* CVBS VDAC output is on the first port, first endpoint */
-	ep = of_graph_get_endpoint_by_regs(priv->dev->of_node, 0, 0);
-	if (!ep)
+	remote = of_graph_get_remote_node(priv->dev->of_node, 0, 0);
+	if (!remote)
 		return false;
 
-
-	/* If the endpoint node exists, consider it enabled */
-	remote = of_graph_get_remote_port(ep);
-	if (remote) {
-		of_node_put(ep);
-		return true;
-	}
-
-	of_node_put(ep);
 	of_node_put(remote);
-
-	return false;
+	return true;
 }
 
 int meson_venc_cvbs_create(struct meson_drm *priv)
@@ -248,7 +244,7 @@ int meson_venc_cvbs_create(struct meson_drm *priv)
 
 	if (!meson_venc_cvbs_connector_is_available(priv)) {
 		dev_info(drm->dev, "CVBS Output connector not available\n");
-		return -ENODEV;
+		return 0;
 	}
 
 	meson_venc_cvbs = devm_kzalloc(priv->dev, sizeof(*meson_venc_cvbs),
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index a6de8ba7af19..6bcfa527c180 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -28,9 +28,12 @@
 #include "meson_canvas.h"
 #include "meson_registers.h"
 
-/*
+/**
+ * DOC: Video Input Unit
+ *
  * VIU Handles the Pixel scanout and the basic Colorspace conversions
  * We handle the following features :
+ *
  * - OSD1 RGB565/RGB888/xRGB8888 scanout
  * - RGB conversion to x/cb/cr
  * - Progressive or Interlace buffer scanout
@@ -38,6 +41,7 @@
  * - HDR OSD matrix for GXL/GXM
  *
  * What is missing :
+ *
  * - BGR888/xBGR8888/BGRx8888/BGRx8888 modes
  * - YUV4:2:2 Y0CbY1Cr scanout
  * - Conversion to YUV 4:4:4 from 4:2:2 input
diff --git a/drivers/gpu/drm/meson/meson_vpp.c b/drivers/gpu/drm/meson/meson_vpp.c
index 671909d8672e..27356f81a0ab 100644
--- a/drivers/gpu/drm/meson/meson_vpp.c
+++ b/drivers/gpu/drm/meson/meson_vpp.c
@@ -25,16 +25,20 @@
 #include "meson_vpp.h"
 #include "meson_registers.h"
 
-/*
+/**
+ * DOC: Video Post Processing
+ *
  * VPP Handles all the Post Processing after the Scanout from the VIU
  * We handle the following post processings :
- * - Postblend : Blends the OSD1 only
+ *
+ * - Postblend, Blends the OSD1 only
  *	We exclude OSD2, VS1, VS1 and Preblend output
  * - Vertical OSD Scaler for OSD1 only, we disable vertical scaler and
  *	use it only for interlace scanout
  * - Intermediate FIFO with default Amlogic values
  *
  * What is missing :
+ *
  * - Preblend for video overlay pre-scaling
  * - OSD2 support for cursor framebuffer
  * - Video pre-scaling before postblend
diff --git a/drivers/gpu/drm/meson/meson_vpp.h b/drivers/gpu/drm/meson/meson_vpp.h
index ede3b26e0f22..815177cc7dfd 100644
--- a/drivers/gpu/drm/meson/meson_vpp.h
+++ b/drivers/gpu/drm/meson/meson_vpp.h
@@ -23,6 +23,8 @@
 
 /* Mux VIU/VPP to ENCI */
 #define MESON_VIU_VPP_MUX_ENCI	0x5
+/* Mux VIU/VPP to ENCP */
+#define MESON_VIU_VPP_MUX_ENCP	0xA
 
 void meson_vpp_setup_mux(struct meson_drm *priv, unsigned int mux);
 
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index f2e9b2bc18a5..adb411a078e8 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1393,7 +1393,8 @@ static void mga_crtc_commit(struct drm_crtc *crtc)
  * but it's a requirement that we provide the function
  */
 static int mga_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-			      u16 *blue, uint32_t size)
+			      u16 *blue, uint32_t size,
+			      struct drm_modeset_acquire_ctx *ctx)
 {
 	struct mga_crtc *mga_crtc = to_mga_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/mgag200/mgag200_ttm.c b/drivers/gpu/drm/mgag200/mgag200_ttm.c
index 657598bb1e6b..565a217b46f2 100644
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@@ -236,6 +236,7 @@ struct ttm_bo_driver mgag200_bo_driver = {
 	.verify_access = mgag200_bo_verify_access,
 	.io_mem_reserve = &mgag200_ttm_io_mem_reserve,
 	.io_mem_free = &mgag200_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int mgag200_mm_init(struct mga_device *mdev)
diff --git a/drivers/gpu/drm/msm/Makefile b/drivers/gpu/drm/msm/Makefile
index 39055362da95..5241ac8803ba 100644
--- a/drivers/gpu/drm/msm/Makefile
+++ b/drivers/gpu/drm/msm/Makefile
@@ -40,6 +40,7 @@ msm-y := \
 	mdp/mdp5/mdp5_mdss.o \
 	mdp/mdp5/mdp5_kms.o \
 	mdp/mdp5/mdp5_pipe.o \
+	mdp/mdp5/mdp5_mixer.o \
 	mdp/mdp5/mdp5_plane.o \
 	mdp/mdp5/mdp5_smp.o \
 	msm_atomic.o \
diff --git a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
index b999349b7d2d..7fd77958a436 100644
--- a/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a3xx_gpu.c
@@ -412,10 +412,8 @@ static const unsigned int a3xx_registers[] = {
 #ifdef CONFIG_DEBUG_FS
 static void a3xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
-	gpu->funcs->pm_resume(gpu);
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A3XX_RBBM_STATUS));
-	gpu->funcs->pm_suspend(gpu);
 	adreno_show(gpu, m);
 }
 #endif
diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
index 511bc855cc7f..dfe0eceaae3b 100644
--- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
@@ -456,12 +456,8 @@ static const unsigned int a4xx_registers[] = {
 #ifdef CONFIG_DEBUG_FS
 static void a4xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
-	gpu->funcs->pm_resume(gpu);
-
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A4XX_RBBM_STATUS));
-	gpu->funcs->pm_suspend(gpu);
-
 	adreno_show(gpu, m);
 
 }
diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
index 4414cf73735d..31a9bceed32c 100644
--- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2016 The Linux Foundation. All rights reserved.
+/* Copyright (c) 2016-2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -534,7 +534,7 @@ static void a5xx_destroy(struct msm_gpu *gpu)
 	}
 
 	if (a5xx_gpu->gpmu_bo) {
-		if (a5xx_gpu->gpmu_bo)
+		if (a5xx_gpu->gpmu_iova)
 			msm_gem_put_iova(a5xx_gpu->gpmu_bo, gpu->id);
 		drm_gem_object_unreference_unlocked(a5xx_gpu->gpmu_bo);
 	}
@@ -638,10 +638,8 @@ static void a5xx_cp_err_irq(struct msm_gpu *gpu)
 	}
 }
 
-static void a5xx_rbbm_err_irq(struct msm_gpu *gpu)
+static void a5xx_rbbm_err_irq(struct msm_gpu *gpu, u32 status)
 {
-	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
-
 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR) {
 		u32 val = gpu_read(gpu, REG_A5XX_RBBM_AHB_ERROR_STATUS);
 
@@ -653,6 +651,10 @@ static void a5xx_rbbm_err_irq(struct msm_gpu *gpu)
 
 		/* Clear the error */
 		gpu_write(gpu, REG_A5XX_RBBM_AHB_CMD, (1 << 4));
+
+		/* Clear the interrupt */
+		gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
+			A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 	}
 
 	if (status & A5XX_RBBM_INT_0_MASK_RBBM_TRANSFER_TIMEOUT)
@@ -704,10 +706,16 @@ static irqreturn_t a5xx_irq(struct msm_gpu *gpu)
 {
 	u32 status = gpu_read(gpu, REG_A5XX_RBBM_INT_0_STATUS);
 
-	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD, status);
+	/*
+	 * Clear all the interrupts except RBBM_AHB_ERROR - if we clear it
+	 * before the source is cleared the interrupt will storm.
+	 */
+	gpu_write(gpu, REG_A5XX_RBBM_INT_CLEAR_CMD,
+		status & ~A5XX_RBBM_INT_0_MASK_RBBM_AHB_ERROR);
 
+	/* Pass status to a5xx_rbbm_err_irq because we've already cleared it */
 	if (status & RBBM_ERROR_MASK)
-		a5xx_rbbm_err_irq(gpu);
+		a5xx_rbbm_err_irq(gpu, status);
 
 	if (status & A5XX_RBBM_INT_0_MASK_CP_HW_ERROR)
 		a5xx_cp_err_irq(gpu);
@@ -837,12 +845,8 @@ static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 #ifdef CONFIG_DEBUG_FS
 static void a5xx_show(struct msm_gpu *gpu, struct seq_file *m)
 {
-	gpu->funcs->pm_resume(gpu);
-
 	seq_printf(m, "status:   %08x\n",
 			gpu_read(gpu, REG_A5XX_RBBM_STATUS));
-	gpu->funcs->pm_suspend(gpu);
-
 	adreno_show(gpu, m);
 }
 #endif
@@ -860,7 +864,9 @@ static const struct adreno_gpu_funcs funcs = {
 		.idle = a5xx_idle,
 		.irq = a5xx_irq,
 		.destroy = a5xx_destroy,
+#ifdef CONFIG_DEBUG_FS
 		.show = a5xx_show,
+#endif
 	},
 	.get_timestamp = a5xx_get_timestamp,
 };
diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
index ece39b16a864..c0fa5d1c75ff 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_device.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2013-2014 Red Hat
  * Author: Rob Clark <robdclark@gmail.com>
  *
- * Copyright (c) 2014 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2014,2017 The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License version 2 as published by
@@ -17,6 +17,7 @@
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
+#include <linux/pm_opp.h>
 #include "adreno_gpu.h"
 
 #define ANY_ID 0xff
@@ -155,21 +156,14 @@ struct msm_gpu *adreno_load_gpu(struct drm_device *dev)
 
 	if (gpu) {
 		int ret;
-		mutex_lock(&dev->struct_mutex);
-		gpu->funcs->pm_resume(gpu);
-		mutex_unlock(&dev->struct_mutex);
 
-		disable_irq(gpu->irq);
-
-		ret = gpu->funcs->hw_init(gpu);
+		pm_runtime_get_sync(&pdev->dev);
+		ret = msm_gpu_hw_init(gpu);
+		pm_runtime_put_sync(&pdev->dev);
 		if (ret) {
 			dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
 			gpu->funcs->destroy(gpu);
 			gpu = NULL;
-		} else {
-			enable_irq(gpu->irq);
-			/* give inactive pm a chance to kick in: */
-			msm_gpu_retire(gpu);
 		}
 	}
 
@@ -220,10 +214,71 @@ static int find_chipid(struct device *dev, u32 *chipid)
 	return 0;
 }
 
+/* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
+static int adreno_get_legacy_pwrlevels(struct device *dev)
+{
+	struct device_node *child, *node;
+	int ret;
+
+	node = of_find_compatible_node(dev->of_node, NULL,
+		"qcom,gpu-pwrlevels");
+	if (!node) {
+		dev_err(dev, "Could not find the GPU powerlevels\n");
+		return -ENXIO;
+	}
+
+	for_each_child_of_node(node, child) {
+		unsigned int val;
+
+		ret = of_property_read_u32(child, "qcom,gpu-freq", &val);
+		if (ret)
+			continue;
+
+		/*
+		 * Skip the intentionally bogus clock value found at the bottom
+		 * of most legacy frequency tables
+		 */
+		if (val != 27000000)
+			dev_pm_opp_add(dev, val, 0);
+	}
+
+	return 0;
+}
+
+static int adreno_get_pwrlevels(struct device *dev,
+		struct adreno_platform_config *config)
+{
+	unsigned long freq = ULONG_MAX;
+	struct dev_pm_opp *opp;
+	int ret;
+
+	/* You down with OPP? */
+	if (!of_find_property(dev->of_node, "operating-points-v2", NULL))
+		ret = adreno_get_legacy_pwrlevels(dev);
+	else
+		ret = dev_pm_opp_of_add_table(dev);
+
+	if (ret)
+		return ret;
+
+	/* Find the fastest defined rate */
+	opp = dev_pm_opp_find_freq_floor(dev, &freq);
+	if (!IS_ERR(opp))
+		config->fast_rate = dev_pm_opp_get_freq(opp);
+
+	if (!config->fast_rate) {
+		DRM_DEV_INFO(dev,
+			"Could not find clock rate. Using default\n");
+		/* Pick a suitably safe clock speed for any target */
+		config->fast_rate = 200000000;
+	}
+
+	return 0;
+}
+
 static int adreno_bind(struct device *dev, struct device *master, void *data)
 {
 	static struct adreno_platform_config config = {};
-	struct device_node *child, *node = dev->of_node;
 	u32 val;
 	int ret;
 
@@ -238,28 +293,10 @@ static int adreno_bind(struct device *dev, struct device *master, void *data)
 
 	/* find clock rates: */
 	config.fast_rate = 0;
-	config.slow_rate = ~0;
-	for_each_child_of_node(node, child) {
-		if (of_device_is_compatible(child, "qcom,gpu-pwrlevels")) {
-			struct device_node *pwrlvl;
-			for_each_child_of_node(child, pwrlvl) {
-				ret = of_property_read_u32(pwrlvl, "qcom,gpu-freq", &val);
-				if (ret) {
-					dev_err(dev, "could not find gpu-freq: %d\n", ret);
-					return ret;
-				}
-				config.fast_rate = max(config.fast_rate, val);
-				config.slow_rate = min(config.slow_rate, val);
-			}
-		}
-	}
 
-	if (!config.fast_rate) {
-		dev_warn(dev, "could not find clk rates\n");
-		/* This is a safe low speed for all devices: */
-		config.fast_rate = 200000000;
-		config.slow_rate = 27000000;
-	}
+	ret = adreno_get_pwrlevels(dev, &config);
+	if (ret)
+		return ret;
 
 	dev->platform_data = &config;
 	set_gpu_pdev(dev_get_drvdata(master), to_platform_device(dev));
@@ -296,12 +333,35 @@ static const struct of_device_id dt_match[] = {
 	{}
 };
 
+#ifdef CONFIG_PM
+static int adreno_resume(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct msm_gpu *gpu = platform_get_drvdata(pdev);
+
+	return gpu->funcs->pm_resume(gpu);
+}
+
+static int adreno_suspend(struct device *dev)
+{
+	struct platform_device *pdev = to_platform_device(dev);
+	struct msm_gpu *gpu = platform_get_drvdata(pdev);
+
+	return gpu->funcs->pm_suspend(gpu);
+}
+#endif
+
+static const struct dev_pm_ops adreno_pm_ops = {
+	SET_RUNTIME_PM_OPS(adreno_suspend, adreno_resume, NULL)
+};
+
 static struct platform_driver adreno_driver = {
 	.probe = adreno_probe,
 	.remove = adreno_remove,
 	.driver = {
 		.name = "adreno",
 		.of_match_table = dt_match,
+		.pm = &adreno_pm_ops,
 	},
 };
 
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
index c9bd1e6225f4..5b63fc649dcc 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
@@ -35,6 +35,9 @@ int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value)
 	case MSM_PARAM_GMEM_SIZE:
 		*value = adreno_gpu->gmem;
 		return 0;
+	case MSM_PARAM_GMEM_BASE:
+		*value = 0x100000;
+		return 0;
 	case MSM_PARAM_CHIP_ID:
 		*value = adreno_gpu->rev.patchid |
 				(adreno_gpu->rev.minor << 8) |
@@ -68,6 +71,14 @@ int adreno_hw_init(struct msm_gpu *gpu)
 		return ret;
 	}
 
+	/* reset ringbuffer: */
+	gpu->rb->cur = gpu->rb->start;
+
+	/* reset completed fence seqno: */
+	adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
+	adreno_gpu->memptrs->rptr  = 0;
+	adreno_gpu->memptrs->wptr  = 0;
+
 	/* Setup REG_CP_RB_CNTL: */
 	adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
 			/* size is log2(quad-words): */
@@ -111,29 +122,20 @@ uint32_t adreno_last_fence(struct msm_gpu *gpu)
 
 void adreno_recover(struct msm_gpu *gpu)
 {
-	struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
 	struct drm_device *dev = gpu->dev;
 	int ret;
 
-	gpu->funcs->pm_suspend(gpu);
-
-	/* reset ringbuffer: */
-	gpu->rb->cur = gpu->rb->start;
-
-	/* reset completed fence seqno: */
-	adreno_gpu->memptrs->fence = gpu->fctx->completed_fence;
-	adreno_gpu->memptrs->rptr  = 0;
-	adreno_gpu->memptrs->wptr  = 0;
+	// XXX pm-runtime??  we *need* the device to be off after this
+	// so maybe continuing to call ->pm_suspend/resume() is better?
 
+	gpu->funcs->pm_suspend(gpu);
 	gpu->funcs->pm_resume(gpu);
 
-	disable_irq(gpu->irq);
-	ret = gpu->funcs->hw_init(gpu);
+	ret = msm_gpu_hw_init(gpu);
 	if (ret) {
 		dev_err(dev->dev, "gpu hw init failed: %d\n", ret);
 		/* hmm, oh well? */
 	}
-	enable_irq(gpu->irq);
 }
 
 void adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
@@ -259,8 +261,6 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 	seq_printf(m, "wptr:     %d\n", adreno_gpu->memptrs->wptr);
 	seq_printf(m, "rb wptr:  %d\n", get_wptr(gpu->rb));
 
-	gpu->funcs->pm_resume(gpu);
-
 	/* dump these out in a form that can be parsed by demsm: */
 	seq_printf(m, "IO:region %s 00000000 00020000\n", gpu->name);
 	for (i = 0; adreno_gpu->registers[i] != ~0; i += 2) {
@@ -273,8 +273,6 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
 			seq_printf(m, "IO:R %08x %08x\n", addr<<2, val);
 		}
 	}
-
-	gpu->funcs->pm_suspend(gpu);
 }
 #endif
 
@@ -354,14 +352,13 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	adreno_gpu->rev = config->rev;
 
 	gpu->fast_rate = config->fast_rate;
-	gpu->slow_rate = config->slow_rate;
 	gpu->bus_freq  = config->bus_freq;
 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
 	gpu->bus_scale_table = config->bus_scale_table;
 #endif
 
-	DBG("fast_rate=%u, slow_rate=%u, bus_freq=%u",
-			gpu->fast_rate, gpu->slow_rate, gpu->bus_freq);
+	DBG("fast_rate=%u, slow_rate=27000000, bus_freq=%u",
+			gpu->fast_rate, gpu->bus_freq);
 
 	ret = msm_gpu_init(drm, pdev, &adreno_gpu->base, &funcs->base,
 			adreno_gpu->info->name, "kgsl_3d0_reg_memory", "kgsl_3d0_irq",
@@ -369,6 +366,10 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	if (ret)
 		return ret;
 
+	pm_runtime_set_autosuspend_delay(&pdev->dev, DRM_MSM_INACTIVE_PERIOD);
+	pm_runtime_use_autosuspend(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+
 	ret = request_firmware(&adreno_gpu->pm4, adreno_gpu->info->pm4fw, drm->dev);
 	if (ret) {
 		dev_err(drm->dev, "failed to load %s PM4 firmware: %d\n",
@@ -418,18 +419,27 @@ int adreno_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	return 0;
 }
 
-void adreno_gpu_cleanup(struct adreno_gpu *gpu)
+void adreno_gpu_cleanup(struct adreno_gpu *adreno_gpu)
 {
-	if (gpu->memptrs_bo) {
-		if (gpu->memptrs)
-			msm_gem_put_vaddr(gpu->memptrs_bo);
+	struct msm_gpu *gpu = &adreno_gpu->base;
+
+	if (adreno_gpu->memptrs_bo) {
+		if (adreno_gpu->memptrs)
+			msm_gem_put_vaddr(adreno_gpu->memptrs_bo);
+
+		if (adreno_gpu->memptrs_iova)
+			msm_gem_put_iova(adreno_gpu->memptrs_bo, gpu->id);
+
+		drm_gem_object_unreference_unlocked(adreno_gpu->memptrs_bo);
+	}
+	release_firmware(adreno_gpu->pm4);
+	release_firmware(adreno_gpu->pfp);
 
-		if (gpu->memptrs_iova)
-			msm_gem_put_iova(gpu->memptrs_bo, gpu->base.id);
+	msm_gpu_cleanup(gpu);
 
-		drm_gem_object_unreference_unlocked(gpu->memptrs_bo);
+	if (gpu->aspace) {
+		gpu->aspace->mmu->funcs->detach(gpu->aspace->mmu,
+			iommu_ports, ARRAY_SIZE(iommu_ports));
+		msm_gem_address_space_put(gpu->aspace);
 	}
-	release_firmware(gpu->pm4);
-	release_firmware(gpu->pfp);
-	msm_gpu_cleanup(&gpu->base);
 }
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
index 42e444a67630..fb4831f9f80b 100644
--- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
+++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
@@ -123,7 +123,7 @@ struct adreno_gpu {
 /* platform config data (ie. from DT, or pdata) */
 struct adreno_platform_config {
 	struct adreno_rev rev;
-	uint32_t fast_rate, slow_rate, bus_freq;
+	uint32_t fast_rate, bus_freq;
 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
 	struct msm_bus_scale_pdata *bus_scale_table;
 #endif
diff --git a/drivers/gpu/drm/msm/dsi/dsi_host.c b/drivers/gpu/drm/msm/dsi/dsi_host.c
index 4f79b109173d..f97a7803a02d 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_host.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_host.c
@@ -1635,7 +1635,7 @@ static int dsi_host_parse_dt(struct msm_dsi_host *msm_host)
 	}
 
 	/* Get panel node from the output port's endpoint data */
-	device_node = of_graph_get_remote_port_parent(endpoint);
+	device_node = of_graph_get_remote_node(np, 1, 0);
 	if (!device_node) {
 		dev_dbg(dev, "%s: no valid device\n", __func__);
 		goto err;
diff --git a/drivers/gpu/drm/msm/dsi/dsi_manager.c b/drivers/gpu/drm/msm/dsi/dsi_manager.c
index 921270ea6059..a879ffa534b4 100644
--- a/drivers/gpu/drm/msm/dsi/dsi_manager.c
+++ b/drivers/gpu/drm/msm/dsi/dsi_manager.c
@@ -171,7 +171,7 @@ dsi_mgr_phy_enable(int id,
 			}
 		}
 	} else {
-		msm_dsi_host_reset_phy(mdsi->host);
+		msm_dsi_host_reset_phy(msm_dsi->host);
 		ret = enable_phy(msm_dsi, src_pll_id, &shared_timings[id]);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
index a54d3bb5baad..8177e8511afd 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_audio.c
@@ -18,13 +18,6 @@
 #include <linux/hdmi.h>
 #include "hdmi.h"
 
-
-/* Supported HDMI Audio channels */
-#define MSM_HDMI_AUDIO_CHANNEL_2		0
-#define MSM_HDMI_AUDIO_CHANNEL_4		1
-#define MSM_HDMI_AUDIO_CHANNEL_6		2
-#define MSM_HDMI_AUDIO_CHANNEL_8		3
-
 /* maps MSM_HDMI_AUDIO_CHANNEL_n consts used by audio driver to # of channels: */
 static int nchannels[] = { 2, 4, 6, 8 };
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 1c29618f4ddb..f29194a74a19 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -114,15 +114,9 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	event = mdp4_crtc->event;
 	if (event) {
-		/* if regular vblank case (!file) or if cancel-flip from
-		 * preclose on file that requested flip, then send the
-		 * event:
-		 */
-		if (!file || (event->base.file_priv == file)) {
-			mdp4_crtc->event = NULL;
-			DBG("%s: send event: %p", mdp4_crtc->name, event);
-			drm_crtc_send_vblank_event(crtc, event);
-		}
+		mdp4_crtc->event = NULL;
+		DBG("%s: send event: %p", mdp4_crtc->name, event);
+		drm_crtc_send_vblank_event(crtc, event);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
index a4e1206a66a8..3d26d7774c08 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_kms.c
@@ -169,7 +169,7 @@ static void mdp4_destroy(struct msm_kms *kms)
 	if (aspace) {
 		aspace->mmu->funcs->detach(aspace->mmu,
 				iommu_ports, ARRAY_SIZE(iommu_ports));
-		msm_gem_address_space_destroy(aspace);
+		msm_gem_address_space_put(aspace);
 	}
 
 	if (mdp4_kms->rpm_enabled)
@@ -225,32 +225,6 @@ int mdp4_enable(struct mdp4_kms *mdp4_kms)
 	return 0;
 }
 
-static struct device_node *mdp4_detect_lcdc_panel(struct drm_device *dev)
-{
-	struct device_node *endpoint, *panel_node;
-	struct device_node *np = dev->dev->of_node;
-
-	/*
-	 * LVDS/LCDC is the first port described in the list of ports in the
-	 * MDP4 DT node.
-	 */
-	endpoint = of_graph_get_endpoint_by_regs(np, 0, -1);
-	if (!endpoint) {
-		DBG("no LVDS remote endpoint\n");
-		return NULL;
-	}
-
-	panel_node = of_graph_get_remote_port_parent(endpoint);
-	if (!panel_node) {
-		DBG("no valid panel node in LVDS endpoint\n");
-		of_node_put(endpoint);
-		return NULL;
-	}
-
-	of_node_put(endpoint);
-
-	return panel_node;
-}
 
 static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
 				  int intf_type)
@@ -269,7 +243,7 @@ static int mdp4_modeset_init_intf(struct mdp4_kms *mdp4_kms,
 		 * bail out early if there is no panel node (no need to
 		 * initialize LCDC encoder and LVDS connector)
 		 */
-		panel_node = mdp4_detect_lcdc_panel(dev);
+		panel_node = of_graph_get_remote_node(dev->dev->of_node, 0, 0);
 		if (!panel_node)
 			return 0;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
index ba2d017f6591..c2bdad88447e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.c
@@ -70,6 +70,18 @@ const struct mdp5_cfg_hw msm8x74v1_config = {
 	.lm = {
 		.count = 5,
 		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = 2,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB },
+			     },
 		.nb_stages = 5,
 	},
 	.dspp = {
@@ -134,6 +146,18 @@ const struct mdp5_cfg_hw msm8x74v2_config = {
 	.lm = {
 		.count = 5,
 		.base = { 0x03100, 0x03500, 0x03900, 0x03d00, 0x04100 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = 2,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+			     },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
@@ -167,6 +191,7 @@ const struct mdp5_cfg_hw apq8084_config = {
 	.mdp = {
 		.count = 1,
 		.caps = MDP_CAP_SMP |
+			MDP_CAP_SRC_SPLIT |
 			0,
 	},
 	.smp = {
@@ -211,6 +236,22 @@ const struct mdp5_cfg_hw apq8084_config = {
 	.lm = {
 		.count = 6,
 		.base = { 0x03900, 0x03d00, 0x04100, 0x04500, 0x04900, 0x04d00 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = 2,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 5, .pp = 3, .dspp = 3,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+			     },
 		.nb_stages = 5,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
@@ -282,6 +323,12 @@ const struct mdp5_cfg_hw msm8x16_config = {
 	.lm = {
 		.count = 2, /* LM0 and LM3 */
 		.base = { 0x44000, 0x47000 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB },
+			     },
 		.nb_stages = 8,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
@@ -306,6 +353,7 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	.mdp = {
 		.count = 1,
 		.caps = MDP_CAP_SMP |
+			MDP_CAP_SRC_SPLIT |
 			0,
 	},
 	.smp = {
@@ -350,6 +398,22 @@ const struct mdp5_cfg_hw msm8x94_config = {
 	.lm = {
 		.count = 6,
 		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = 2,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 5, .pp = 3, .dspp = 3,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+			     },
 		.nb_stages = 8,
 		.max_width = 2048,
 		.max_height = 0xFFFF,
@@ -385,6 +449,7 @@ const struct mdp5_cfg_hw msm8x96_config = {
 		.count = 1,
 		.caps = MDP_CAP_DSC |
 			MDP_CAP_CDM |
+			MDP_CAP_SRC_SPLIT |
 			0,
 	},
 	.ctl = {
@@ -434,6 +499,22 @@ const struct mdp5_cfg_hw msm8x96_config = {
 	.lm = {
 		.count = 6,
 		.base = { 0x44000, 0x45000, 0x46000, 0x47000, 0x48000, 0x49000 },
+		.instances = {
+				{ .id = 0, .pp = 0, .dspp = 0,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 1, .pp = 1, .dspp = 1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+				{ .id = 2, .pp = 2, .dspp = -1,
+				  .caps = MDP_LM_CAP_DISPLAY |
+					  MDP_LM_CAP_PAIR, },
+				{ .id = 3, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 4, .pp = -1, .dspp = -1,
+				  .caps = MDP_LM_CAP_WB, },
+				{ .id = 5, .pp = 3, .dspp = -1,
+				  .caps = MDP_LM_CAP_DISPLAY, },
+			     },
 		.nb_stages = 8,
 		.max_width = 2560,
 		.max_height = 0xFFFF,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
index b1c7daaede86..75910d0f2f4c 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cfg.h
@@ -39,8 +39,16 @@ struct mdp5_sub_block {
 	MDP5_SUB_BLOCK_DEFINITION;
 };
 
+struct mdp5_lm_instance {
+	int id;
+	int pp;
+	int dspp;
+	uint32_t caps;
+};
+
 struct mdp5_lm_block {
 	MDP5_SUB_BLOCK_DEFINITION;
+	struct mdp5_lm_instance instances[MAX_BASES];
 	uint32_t nb_stages;		/* number of stages per blender */
 	uint32_t max_width;		/* Maximum output resolution */
 	uint32_t max_height;
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
index df1c8adec3f3..8dafc7bdba48 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_cmd_encoder.c
@@ -51,7 +51,8 @@ static int pingpong_tearcheck_setup(struct drm_encoder *encoder,
 	struct device *dev = encoder->dev->dev;
 	u32 total_lines_x100, vclks_line, cfg;
 	long vsync_clk_speed;
-	int pp_id = GET_PING_PONG_ID(mdp5_crtc_get_lm(encoder->crtc));
+	struct mdp5_hw_mixer *mixer = mdp5_crtc_get_mixer(encoder->crtc);
+	int pp_id = mixer->pp;
 
 	if (IS_ERR_OR_NULL(mdp5_kms->vsync_clk)) {
 		dev_err(dev, "vsync_clk is not initialized\n");
@@ -94,7 +95,8 @@ static int pingpong_tearcheck_setup(struct drm_encoder *encoder,
 static int pingpong_tearcheck_enable(struct drm_encoder *encoder)
 {
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
-	int pp_id = GET_PING_PONG_ID(mdp5_crtc_get_lm(encoder->crtc));
+	struct mdp5_hw_mixer *mixer = mdp5_crtc_get_mixer(encoder->crtc);
+	int pp_id = mixer->pp;
 	int ret;
 
 	ret = clk_set_rate(mdp5_kms->vsync_clk,
@@ -119,7 +121,8 @@ static int pingpong_tearcheck_enable(struct drm_encoder *encoder)
 static void pingpong_tearcheck_disable(struct drm_encoder *encoder)
 {
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
-	int pp_id = GET_PING_PONG_ID(mdp5_crtc_get_lm(encoder->crtc));
+	struct mdp5_hw_mixer *mixer = mdp5_crtc_get_mixer(encoder->crtc);
+	int pp_id = mixer->pp;
 
 	mdp5_write(mdp5_kms, REG_MDP5_PP_TEAR_CHECK_EN(pp_id), 0);
 	clk_disable_unprepare(mdp5_kms->vsync_clk);
@@ -129,8 +132,6 @@ void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
 			       struct drm_display_mode *mode,
 			       struct drm_display_mode *adjusted_mode)
 {
-	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
-
 	mode = adjusted_mode;
 
 	DBG("set mode: %d:\"%s\" %d %d %d %d %d %d %d %d %d %d 0x%x 0x%x",
@@ -142,23 +143,23 @@ void mdp5_cmd_encoder_mode_set(struct drm_encoder *encoder,
 			mode->vsync_end, mode->vtotal,
 			mode->type, mode->flags);
 	pingpong_tearcheck_setup(encoder, mode);
-	mdp5_crtc_set_pipeline(encoder->crtc, &mdp5_cmd_enc->intf,
-				mdp5_cmd_enc->ctl);
+	mdp5_crtc_set_pipeline(encoder->crtc);
 }
 
 void mdp5_cmd_encoder_disable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 	struct mdp5_ctl *ctl = mdp5_cmd_enc->ctl;
-	struct mdp5_interface *intf = &mdp5_cmd_enc->intf;
+	struct mdp5_interface *intf = mdp5_cmd_enc->intf;
+	struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(encoder->crtc);
 
 	if (WARN_ON(!mdp5_cmd_enc->enabled))
 		return;
 
 	pingpong_tearcheck_disable(encoder);
 
-	mdp5_ctl_set_encoder_state(ctl, false);
-	mdp5_ctl_commit(ctl, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_set_encoder_state(ctl, pipeline, false);
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
 
 	bs_set(mdp5_cmd_enc, 0);
 
@@ -169,7 +170,8 @@ void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_cmd_enc = to_mdp5_encoder(encoder);
 	struct mdp5_ctl *ctl = mdp5_cmd_enc->ctl;
-	struct mdp5_interface *intf = &mdp5_cmd_enc->intf;
+	struct mdp5_interface *intf = mdp5_cmd_enc->intf;
+	struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(encoder->crtc);
 
 	if (WARN_ON(mdp5_cmd_enc->enabled))
 		return;
@@ -178,9 +180,9 @@ void mdp5_cmd_encoder_enable(struct drm_encoder *encoder)
 	if (pingpong_tearcheck_enable(encoder))
 		return;
 
-	mdp5_ctl_commit(ctl, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
 
-	mdp5_ctl_set_encoder_state(ctl, true);
+	mdp5_ctl_set_encoder_state(ctl, pipeline, true);
 
 	mdp5_cmd_enc->enabled = true;
 }
@@ -197,7 +199,7 @@ int mdp5_cmd_encoder_set_split_display(struct drm_encoder *encoder,
 		return -EINVAL;
 
 	mdp5_kms = get_kms(encoder);
-	intf_num = mdp5_cmd_enc->intf.num;
+	intf_num = mdp5_cmd_enc->intf->num;
 
 	/* Switch slave encoder's trigger MUX, to use the master's
 	 * start signal for the slave encoder
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index d0c8b38b96ce..9217e0d6e93e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -32,13 +32,7 @@ struct mdp5_crtc {
 	int id;
 	bool enabled;
 
-	/* layer mixer used for this CRTC (+ its lock): */
-#define GET_LM_ID(crtc_id)	((crtc_id == 3) ? 5 : crtc_id)
-	int lm;
-	spinlock_t lm_lock;	/* protect REG_MDP5_LM_* registers */
-
-	/* CTL used for this CRTC: */
-	struct mdp5_ctl *ctl;
+	spinlock_t lm_lock;     /* protect REG_MDP5_LM_* registers */
 
 	/* if there is a pending flip, these will be non-null: */
 	struct drm_pending_vblank_event *event;
@@ -61,8 +55,6 @@ struct mdp5_crtc {
 
 	struct completion pp_completion;
 
-	bool cmd_mode;
-
 	struct {
 		/* protect REG_MDP5_LM_CURSOR* registers and cursor scanout_bo*/
 		spinlock_t lock;
@@ -97,10 +89,12 @@ static void request_pp_done_pending(struct drm_crtc *crtc)
 
 static u32 crtc_flush(struct drm_crtc *crtc, u32 flush_mask)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
 
 	DBG("%s: flush=%08x", crtc->name, flush_mask);
-	return mdp5_ctl_commit(mdp5_crtc->ctl, flush_mask);
+	return mdp5_ctl_commit(ctl, pipeline, flush_mask);
 }
 
 /*
@@ -110,19 +104,25 @@ static u32 crtc_flush(struct drm_crtc *crtc, u32 flush_mask)
  */
 static u32 crtc_flush_all(struct drm_crtc *crtc)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_hw_mixer *mixer, *r_mixer;
 	struct drm_plane *plane;
 	uint32_t flush_mask = 0;
 
 	/* this should not happen: */
-	if (WARN_ON(!mdp5_crtc->ctl))
+	if (WARN_ON(!mdp5_cstate->ctl))
 		return 0;
 
 	drm_atomic_crtc_for_each_plane(plane, crtc) {
 		flush_mask |= mdp5_plane_get_flush(plane);
 	}
 
-	flush_mask |= mdp_ctl_flush_mask_lm(mdp5_crtc->lm);
+	mixer = mdp5_cstate->pipeline.mixer;
+	flush_mask |= mdp_ctl_flush_mask_lm(mixer->lm);
+
+	r_mixer = mdp5_cstate->pipeline.r_mixer;
+	if (r_mixer)
+		flush_mask |= mdp_ctl_flush_mask_lm(r_mixer->lm);
 
 	return crtc_flush(crtc, flush_mask);
 }
@@ -130,7 +130,10 @@ static u32 crtc_flush_all(struct drm_crtc *crtc)
 /* if file!=NULL, this is preclose potential cancel-flip path */
 static void complete_flip(struct drm_crtc *crtc, struct drm_file *file)
 {
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	struct drm_device *dev = crtc->dev;
 	struct drm_pending_vblank_event *event;
 	unsigned long flags;
@@ -138,22 +141,17 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file)
 	spin_lock_irqsave(&dev->event_lock, flags);
 	event = mdp5_crtc->event;
 	if (event) {
-		/* if regular vblank case (!file) or if cancel-flip from
-		 * preclose on file that requested flip, then send the
-		 * event:
-		 */
-		if (!file || (event->base.file_priv == file)) {
-			mdp5_crtc->event = NULL;
-			DBG("%s: send event: %p", crtc->name, event);
-			drm_crtc_send_vblank_event(crtc, event);
-		}
+		mdp5_crtc->event = NULL;
+		DBG("%s: send event: %p", crtc->name, event);
+		drm_crtc_send_vblank_event(crtc, event);
 	}
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	if (mdp5_crtc->ctl && !crtc->state->enable) {
+	if (ctl && !crtc->state->enable) {
 		/* set STAGE_UNUSED for all layers */
-		mdp5_ctl_blend(mdp5_crtc->ctl, NULL, 0, 0);
-		mdp5_crtc->ctl = NULL;
+		mdp5_ctl_blend(ctl, pipeline, NULL, NULL, 0, 0);
+		/* XXX: What to do here? */
+		/* mdp5_crtc->ctl = NULL; */
 	}
 }
 
@@ -193,6 +191,12 @@ static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage)
 }
 
 /*
+ * left/right pipe offsets for the stage array used in blend_setup()
+ */
+#define PIPE_LEFT	0
+#define PIPE_RIGHT	1
+
+/*
  * blend_setup() - blend all the planes of a CRTC
  *
  * If no base layer is available, border will be enabled as the base layer.
@@ -202,18 +206,26 @@ static inline u32 mdp5_lm_use_fg_alpha_mask(enum mdp_mixer_stage_id stage)
 static void blend_setup(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 	struct drm_plane *plane;
 	const struct mdp5_cfg_hw *hw_cfg;
 	struct mdp5_plane_state *pstate, *pstates[STAGE_MAX + 1] = {NULL};
 	const struct mdp_format *format;
-	uint32_t lm = mdp5_crtc->lm;
+	struct mdp5_hw_mixer *mixer = pipeline->mixer;
+	uint32_t lm = mixer->lm;
+	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
+	uint32_t r_lm = r_mixer ? r_mixer->lm : 0;
+	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	uint32_t blend_op, fg_alpha, bg_alpha, ctl_blend_flags = 0;
 	unsigned long flags;
-	enum mdp5_pipe stage[STAGE_MAX + 1] = { SSPP_NONE };
+	enum mdp5_pipe stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
+	enum mdp5_pipe r_stage[STAGE_MAX + 1][MAX_PIPE_STAGE] = { SSPP_NONE };
 	int i, plane_cnt = 0;
 	bool bg_alpha_enabled = false;
 	u32 mixer_op_mode = 0;
+	u32 val;
 #define blender(stage)	((stage) - STAGE0)
 
 	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
@@ -221,14 +233,35 @@ static void blend_setup(struct drm_crtc *crtc)
 	spin_lock_irqsave(&mdp5_crtc->lm_lock, flags);
 
 	/* ctl could be released already when we are shutting down: */
-	if (!mdp5_crtc->ctl)
+	/* XXX: Can this happen now? */
+	if (!ctl)
 		goto out;
 
 	/* Collect all plane information */
 	drm_atomic_crtc_for_each_plane(plane, crtc) {
+		enum mdp5_pipe right_pipe;
+
 		pstate = to_mdp5_plane_state(plane->state);
 		pstates[pstate->stage] = pstate;
-		stage[pstate->stage] = mdp5_plane_pipe(plane);
+		stage[pstate->stage][PIPE_LEFT] = mdp5_plane_pipe(plane);
+		/*
+		 * if we have a right mixer, stage the same pipe as we
+		 * have on the left mixer
+		 */
+		if (r_mixer)
+			r_stage[pstate->stage][PIPE_LEFT] =
+						mdp5_plane_pipe(plane);
+		/*
+		 * if we have a right pipe (i.e, the plane comprises of 2
+		 * hwpipes, then stage the right pipe on the right side of both
+		 * the layer mixers
+		 */
+		right_pipe = mdp5_plane_right_pipe(plane);
+		if (right_pipe) {
+			stage[pstate->stage][PIPE_RIGHT] = right_pipe;
+			r_stage[pstate->stage][PIPE_RIGHT] = right_pipe;
+		}
+
 		plane_cnt++;
 	}
 
@@ -294,12 +327,27 @@ static void blend_setup(struct drm_crtc *crtc)
 				blender(i)), fg_alpha);
 		mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_BG_ALPHA(lm,
 				blender(i)), bg_alpha);
+		if (r_mixer) {
+			mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_OP_MODE(r_lm,
+					blender(i)), blend_op);
+			mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_FG_ALPHA(r_lm,
+					blender(i)), fg_alpha);
+			mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_BG_ALPHA(r_lm,
+					blender(i)), bg_alpha);
+		}
 	}
 
-	mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm), mixer_op_mode);
-
-	mdp5_ctl_blend(mdp5_crtc->ctl, stage, plane_cnt, ctl_blend_flags);
+	val = mdp5_read(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm));
+	mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm),
+		   val | mixer_op_mode);
+	if (r_mixer) {
+		val = mdp5_read(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(r_lm));
+		mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(r_lm),
+			   val | mixer_op_mode);
+	}
 
+	mdp5_ctl_blend(ctl, pipeline, stage, r_stage, plane_cnt,
+		       ctl_blend_flags);
 out:
 	spin_unlock_irqrestore(&mdp5_crtc->lm_lock, flags);
 }
@@ -307,7 +355,12 @@ out:
 static void mdp5_crtc_mode_set_nofb(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
+	struct mdp5_hw_mixer *mixer = mdp5_cstate->pipeline.mixer;
+	struct mdp5_hw_mixer *r_mixer = mdp5_cstate->pipeline.r_mixer;
+	uint32_t lm = mixer->lm;
+	u32 mixer_width, val;
 	unsigned long flags;
 	struct drm_display_mode *mode;
 
@@ -325,16 +378,40 @@ static void mdp5_crtc_mode_set_nofb(struct drm_crtc *crtc)
 			mode->vsync_end, mode->vtotal,
 			mode->type, mode->flags);
 
+	mixer_width = mode->hdisplay;
+	if (r_mixer)
+		mixer_width /= 2;
+
 	spin_lock_irqsave(&mdp5_crtc->lm_lock, flags);
-	mdp5_write(mdp5_kms, REG_MDP5_LM_OUT_SIZE(mdp5_crtc->lm),
-			MDP5_LM_OUT_SIZE_WIDTH(mode->hdisplay) |
+	mdp5_write(mdp5_kms, REG_MDP5_LM_OUT_SIZE(lm),
+			MDP5_LM_OUT_SIZE_WIDTH(mixer_width) |
 			MDP5_LM_OUT_SIZE_HEIGHT(mode->vdisplay));
+
+	/* Assign mixer to LEFT side in source split mode */
+	val = mdp5_read(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm));
+	val &= ~MDP5_LM_BLEND_COLOR_OUT_SPLIT_LEFT_RIGHT;
+	mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(lm), val);
+
+	if (r_mixer) {
+		u32 r_lm = r_mixer->lm;
+
+		mdp5_write(mdp5_kms, REG_MDP5_LM_OUT_SIZE(r_lm),
+			   MDP5_LM_OUT_SIZE_WIDTH(mixer_width) |
+			   MDP5_LM_OUT_SIZE_HEIGHT(mode->vdisplay));
+
+		/* Assign mixer to RIGHT side in source split mode */
+		val = mdp5_read(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(r_lm));
+		val |= MDP5_LM_BLEND_COLOR_OUT_SPLIT_LEFT_RIGHT;
+		mdp5_write(mdp5_kms, REG_MDP5_LM_BLEND_COLOR_OUT(r_lm), val);
+	}
+
 	spin_unlock_irqrestore(&mdp5_crtc->lm_lock, flags);
 }
 
 static void mdp5_crtc_disable(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 
 	DBG("%s", crtc->name);
@@ -342,7 +419,7 @@ static void mdp5_crtc_disable(struct drm_crtc *crtc)
 	if (WARN_ON(!mdp5_crtc->enabled))
 		return;
 
-	if (mdp5_crtc->cmd_mode)
+	if (mdp5_cstate->cmd_mode)
 		mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->pp_done);
 
 	mdp_irq_unregister(&mdp5_kms->base, &mdp5_crtc->err);
@@ -354,6 +431,7 @@ static void mdp5_crtc_disable(struct drm_crtc *crtc)
 static void mdp5_crtc_enable(struct drm_crtc *crtc)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 
 	DBG("%s", crtc->name);
@@ -364,12 +442,73 @@ static void mdp5_crtc_enable(struct drm_crtc *crtc)
 	mdp5_enable(mdp5_kms);
 	mdp_irq_register(&mdp5_kms->base, &mdp5_crtc->err);
 
-	if (mdp5_crtc->cmd_mode)
+	if (mdp5_cstate->cmd_mode)
 		mdp_irq_register(&mdp5_kms->base, &mdp5_crtc->pp_done);
 
 	mdp5_crtc->enabled = true;
 }
 
+int mdp5_crtc_setup_pipeline(struct drm_crtc *crtc,
+			     struct drm_crtc_state *new_crtc_state,
+			     bool need_right_mixer)
+{
+	struct mdp5_crtc_state *mdp5_cstate =
+			to_mdp5_crtc_state(new_crtc_state);
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
+	struct mdp5_interface *intf;
+	bool new_mixer = false;
+
+	new_mixer = !pipeline->mixer;
+
+	if ((need_right_mixer && !pipeline->r_mixer) ||
+	    (!need_right_mixer && pipeline->r_mixer))
+		new_mixer = true;
+
+	if (new_mixer) {
+		struct mdp5_hw_mixer *old_mixer = pipeline->mixer;
+		struct mdp5_hw_mixer *old_r_mixer = pipeline->r_mixer;
+		u32 caps;
+		int ret;
+
+		caps = MDP_LM_CAP_DISPLAY;
+		if (need_right_mixer)
+			caps |= MDP_LM_CAP_PAIR;
+
+		ret = mdp5_mixer_assign(new_crtc_state->state, crtc, caps,
+					&pipeline->mixer, need_right_mixer ?
+					&pipeline->r_mixer : NULL);
+		if (ret)
+			return ret;
+
+		mdp5_mixer_release(new_crtc_state->state, old_mixer);
+		if (old_r_mixer) {
+			mdp5_mixer_release(new_crtc_state->state, old_r_mixer);
+			if (!need_right_mixer)
+				pipeline->r_mixer = NULL;
+		}
+	}
+
+	/*
+	 * these should have been already set up in the encoder's atomic
+	 * check (called by drm_atomic_helper_check_modeset)
+	 */
+	intf = pipeline->intf;
+
+	mdp5_cstate->err_irqmask = intf2err(intf->num);
+	mdp5_cstate->vblank_irqmask = intf2vblank(pipeline->mixer, intf);
+
+	if ((intf->type == INTF_DSI) &&
+	    (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)) {
+		mdp5_cstate->pp_done_irqmask = lm2ppdone(pipeline->mixer);
+		mdp5_cstate->cmd_mode = true;
+	} else {
+		mdp5_cstate->pp_done_irqmask = 0;
+		mdp5_cstate->cmd_mode = false;
+	}
+
+	return 0;
+}
+
 struct plane_state {
 	struct drm_plane *plane;
 	struct mdp5_plane_state *state;
@@ -391,6 +530,29 @@ static bool is_fullscreen(struct drm_crtc_state *cstate,
 		((pstate->crtc_y + pstate->crtc_h) >= cstate->mode.vdisplay);
 }
 
+enum mdp_mixer_stage_id get_start_stage(struct drm_crtc *crtc,
+					struct drm_crtc_state *new_crtc_state,
+					struct drm_plane_state *bpstate)
+{
+	struct mdp5_crtc_state *mdp5_cstate =
+			to_mdp5_crtc_state(new_crtc_state);
+
+	/*
+	 * if we're in source split mode, it's mandatory to have
+	 * border out on the base stage
+	 */
+	if (mdp5_cstate->pipeline.r_mixer)
+		return STAGE0;
+
+	/* if the bottom-most layer is not fullscreen, we need to use
+	 * it for solid-color:
+	 */
+	if (!is_fullscreen(new_crtc_state, bpstate))
+		return STAGE0;
+
+	return STAGE_BASE;
+}
+
 static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 		struct drm_crtc_state *state)
 {
@@ -400,8 +562,12 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 	struct plane_state pstates[STAGE_MAX + 1];
 	const struct mdp5_cfg_hw *hw_cfg;
 	const struct drm_plane_state *pstate;
+	const struct drm_display_mode *mode = &state->adjusted_mode;
 	bool cursor_plane = false;
-	int cnt = 0, base = 0, i;
+	bool need_right_mixer = false;
+	int cnt = 0, i;
+	int ret;
+	enum mdp_mixer_stage_id start;
 
 	DBG("%s: check", crtc->name);
 
@@ -409,32 +575,52 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 		pstates[cnt].plane = plane;
 		pstates[cnt].state = to_mdp5_plane_state(pstate);
 
+		/*
+		 * if any plane on this crtc uses 2 hwpipes, then we need
+		 * the crtc to have a right hwmixer.
+		 */
+		if (pstates[cnt].state->r_hwpipe)
+			need_right_mixer = true;
 		cnt++;
 
 		if (plane->type == DRM_PLANE_TYPE_CURSOR)
 			cursor_plane = true;
 	}
 
-	/* assign a stage based on sorted zpos property */
-	sort(pstates, cnt, sizeof(pstates[0]), pstate_cmp, NULL);
+	/* bail out early if there aren't any planes */
+	if (!cnt)
+		return 0;
 
-	/* if the bottom-most layer is not fullscreen, we need to use
-	 * it for solid-color:
+	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
+
+	/*
+	 * we need a right hwmixer if the mode's width is greater than a single
+	 * LM's max width
 	 */
-	if ((cnt > 0) && !is_fullscreen(state, &pstates[0].state->base))
-		base++;
+	if (mode->hdisplay > hw_cfg->lm.max_width)
+		need_right_mixer = true;
+
+	ret = mdp5_crtc_setup_pipeline(crtc, state, need_right_mixer);
+	if (ret) {
+		dev_err(dev->dev, "couldn't assign mixers %d\n", ret);
+		return ret;
+	}
+
+	/* assign a stage based on sorted zpos property */
+	sort(pstates, cnt, sizeof(pstates[0]), pstate_cmp, NULL);
 
 	/* trigger a warning if cursor isn't the highest zorder */
 	WARN_ON(cursor_plane &&
 		(pstates[cnt - 1].plane->type != DRM_PLANE_TYPE_CURSOR));
 
+	start = get_start_stage(crtc, state, &pstates[0].state->base);
+
 	/* verify that there are not too many planes attached to crtc
 	 * and that we don't have conflicting mixer stages:
 	 */
-	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
-
-	if ((cnt + base) >= hw_cfg->lm.nb_stages) {
-		dev_err(dev->dev, "too many planes! cnt=%d, base=%d\n", cnt, base);
+	if ((cnt + start - 1) >= hw_cfg->lm.nb_stages) {
+		dev_err(dev->dev, "too many planes! cnt=%d, start stage=%d\n",
+			cnt, start);
 		return -EINVAL;
 	}
 
@@ -442,7 +628,7 @@ static int mdp5_crtc_atomic_check(struct drm_crtc *crtc,
 		if (cursor_plane && (i == (cnt - 1)))
 			pstates[i].state->stage = hw_cfg->lm.nb_stages;
 		else
-			pstates[i].state->stage = STAGE_BASE + i + base;
+			pstates[i].state->stage = start + i;
 		DBG("%s: assign pipe %s on stage=%d", crtc->name,
 				pstates[i].plane->name,
 				pstates[i].state->stage);
@@ -461,6 +647,7 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc,
 				   struct drm_crtc_state *old_crtc_state)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct drm_device *dev = crtc->dev;
 	unsigned long flags;
 
@@ -477,7 +664,8 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc,
 	 * it means we are trying to flush a CRTC whose state is disabled:
 	 * nothing else needs to be done.
 	 */
-	if (unlikely(!mdp5_crtc->ctl))
+	/* XXX: Can this happen now ? */
+	if (unlikely(!mdp5_cstate->ctl))
 		return;
 
 	blend_setup(crtc);
@@ -488,11 +676,16 @@ static void mdp5_crtc_atomic_flush(struct drm_crtc *crtc,
 	 * This is safe because no pp_done will happen before SW trigger
 	 * in command mode.
 	 */
-	if (mdp5_crtc->cmd_mode)
+	if (mdp5_cstate->cmd_mode)
 		request_pp_done_pending(crtc);
 
 	mdp5_crtc->flushed_mask = crtc_flush_all(crtc);
 
+	/* XXX are we leaking out state here? */
+	mdp5_crtc->vblank.irqmask = mdp5_cstate->vblank_irqmask;
+	mdp5_crtc->err.irqmask = mdp5_cstate->err_irqmask;
+	mdp5_crtc->pp_done.irqmask = mdp5_cstate->pp_done_irqmask;
+
 	request_pending(crtc, PENDING_FLIP);
 }
 
@@ -527,11 +720,14 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 		uint32_t width, uint32_t height)
 {
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
 	struct drm_device *dev = crtc->dev;
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 	struct drm_gem_object *cursor_bo, *old_bo = NULL;
 	uint32_t blendcfg, stride;
 	uint64_t cursor_addr;
+	struct mdp5_ctl *ctl;
 	int ret, lm;
 	enum mdp5_cursor_alpha cur_alpha = CURSOR_ALPHA_PER_PIXEL;
 	uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0);
@@ -544,7 +740,12 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 		return -EINVAL;
 	}
 
-	if (NULL == mdp5_crtc->ctl)
+	ctl = mdp5_cstate->ctl;
+	if (!ctl)
+		return -EINVAL;
+
+	/* don't support LM cursors when we we have source split enabled */
+	if (mdp5_cstate->pipeline.r_mixer)
 		return -EINVAL;
 
 	if (!handle) {
@@ -561,7 +762,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	if (ret)
 		return -EINVAL;
 
-	lm = mdp5_crtc->lm;
+	lm = mdp5_cstate->pipeline.mixer->lm;
 	stride = width * drm_format_plane_cpp(DRM_FORMAT_ARGB8888, 0);
 
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
@@ -591,7 +792,7 @@ static int mdp5_crtc_cursor_set(struct drm_crtc *crtc,
 	spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags);
 
 set_cursor:
-	ret = mdp5_ctl_set_cursor(mdp5_crtc->ctl, 0, cursor_enable);
+	ret = mdp5_ctl_set_cursor(ctl, pipeline, 0, cursor_enable);
 	if (ret) {
 		dev_err(dev->dev, "failed to %sable cursor: %d\n",
 				cursor_enable ? "en" : "dis", ret);
@@ -613,11 +814,17 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 {
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	uint32_t lm = mdp5_cstate->pipeline.mixer->lm;
 	uint32_t flush_mask = mdp_ctl_flush_mask_cursor(0);
 	uint32_t roi_w;
 	uint32_t roi_h;
 	unsigned long flags;
 
+	/* don't support LM cursors when we we have source split enabled */
+	if (mdp5_cstate->pipeline.r_mixer)
+		return -EINVAL;
+
 	/* In case the CRTC is disabled, just drop the cursor update */
 	if (unlikely(!crtc->state->enable))
 		return 0;
@@ -628,10 +835,10 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	get_roi(crtc, &roi_w, &roi_h);
 
 	spin_lock_irqsave(&mdp5_crtc->cursor.lock, flags);
-	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(mdp5_crtc->lm),
+	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_SIZE(lm),
 			MDP5_LM_CURSOR_SIZE_ROI_H(roi_h) |
 			MDP5_LM_CURSOR_SIZE_ROI_W(roi_w));
-	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(mdp5_crtc->lm),
+	mdp5_write(mdp5_kms, REG_MDP5_LM_CURSOR_START_XY(lm),
 			MDP5_LM_CURSOR_START_XY_Y_START(y) |
 			MDP5_LM_CURSOR_START_XY_X_START(x));
 	spin_unlock_irqrestore(&mdp5_crtc->cursor.lock, flags);
@@ -641,16 +848,80 @@ static int mdp5_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 	return 0;
 }
 
+static void
+mdp5_crtc_atomic_print_state(struct drm_printer *p,
+			     const struct drm_crtc_state *state)
+{
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(state);
+	struct mdp5_pipeline *pipeline = &mdp5_cstate->pipeline;
+	struct mdp5_kms *mdp5_kms = get_kms(state->crtc);
+
+	if (WARN_ON(!pipeline))
+		return;
+
+	drm_printf(p, "\thwmixer=%s\n", pipeline->mixer ?
+			pipeline->mixer->name : "(null)");
+
+	if (mdp5_kms->caps & MDP_CAP_SRC_SPLIT)
+		drm_printf(p, "\tright hwmixer=%s\n", pipeline->r_mixer ?
+			   pipeline->r_mixer->name : "(null)");
+}
+
+static void mdp5_crtc_reset(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc_state *mdp5_cstate;
+
+	if (crtc->state) {
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+		kfree(to_mdp5_crtc_state(crtc->state));
+	}
+
+	mdp5_cstate = kzalloc(sizeof(*mdp5_cstate), GFP_KERNEL);
+
+	if (mdp5_cstate) {
+		mdp5_cstate->base.crtc = crtc;
+		crtc->state = &mdp5_cstate->base;
+	}
+}
+
+static struct drm_crtc_state *
+mdp5_crtc_duplicate_state(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc_state *mdp5_cstate;
+
+	if (WARN_ON(!crtc->state))
+		return NULL;
+
+	mdp5_cstate = kmemdup(to_mdp5_crtc_state(crtc->state),
+			      sizeof(*mdp5_cstate), GFP_KERNEL);
+	if (!mdp5_cstate)
+		return NULL;
+
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &mdp5_cstate->base);
+
+	return &mdp5_cstate->base;
+}
+
+static void mdp5_crtc_destroy_state(struct drm_crtc *crtc, struct drm_crtc_state *state)
+{
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(state);
+
+	__drm_atomic_helper_crtc_destroy_state(state);
+
+	kfree(mdp5_cstate);
+}
+
 static const struct drm_crtc_funcs mdp5_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.set_property = drm_atomic_helper_crtc_set_property,
-	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.reset = mdp5_crtc_reset,
+	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
+	.atomic_destroy_state = mdp5_crtc_destroy_state,
 	.cursor_set = mdp5_crtc_cursor_set,
 	.cursor_move = mdp5_crtc_cursor_move,
+	.atomic_print_state = mdp5_crtc_atomic_print_state,
 };
 
 static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
@@ -658,9 +929,10 @@ static const struct drm_crtc_funcs mdp5_crtc_no_lm_cursor_funcs = {
 	.destroy = mdp5_crtc_destroy,
 	.page_flip = drm_atomic_helper_page_flip,
 	.set_property = drm_atomic_helper_crtc_set_property,
-	.reset = drm_atomic_helper_crtc_reset,
-	.atomic_duplicate_state = drm_atomic_helper_crtc_duplicate_state,
-	.atomic_destroy_state = drm_atomic_helper_crtc_destroy_state,
+	.reset = mdp5_crtc_reset,
+	.atomic_duplicate_state = mdp5_crtc_duplicate_state,
+	.atomic_destroy_state = mdp5_crtc_destroy_state,
+	.atomic_print_state = mdp5_crtc_atomic_print_state,
 };
 
 static const struct drm_crtc_helper_funcs mdp5_crtc_helper_funcs = {
@@ -710,22 +982,26 @@ static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	int ret;
 
 	ret = wait_for_completion_timeout(&mdp5_crtc->pp_completion,
 						msecs_to_jiffies(50));
 	if (ret == 0)
-		dev_warn(dev->dev, "pp done time out, lm=%d\n", mdp5_crtc->lm);
+		dev_warn(dev->dev, "pp done time out, lm=%d\n",
+			 mdp5_cstate->pipeline.mixer->lm);
 }
 
 static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+	struct mdp5_ctl *ctl = mdp5_cstate->ctl;
 	int ret;
 
 	/* Should not call this function if crtc is disabled. */
-	if (!mdp5_crtc->ctl)
+	if (!ctl)
 		return;
 
 	ret = drm_crtc_vblank_get(crtc);
@@ -733,7 +1009,7 @@ static void mdp5_crtc_wait_for_flush_done(struct drm_crtc *crtc)
 		return;
 
 	ret = wait_event_timeout(dev->vblank[drm_crtc_index(crtc)].queue,
-		((mdp5_ctl_get_commit_status(mdp5_crtc->ctl) &
+		((mdp5_ctl_get_commit_status(ctl) &
 		mdp5_crtc->flushed_mask) == 0),
 		msecs_to_jiffies(50));
 	if (ret <= 0)
@@ -750,52 +1026,54 @@ uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc)
 	return mdp5_crtc->vblank.irqmask;
 }
 
-void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
-		struct mdp5_interface *intf, struct mdp5_ctl *ctl)
+void mdp5_crtc_set_pipeline(struct drm_crtc *crtc)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 	struct mdp5_kms *mdp5_kms = get_kms(crtc);
-	int lm = mdp5_crtc_get_lm(crtc);
-
-	/* now that we know what irq's we want: */
-	mdp5_crtc->err.irqmask = intf2err(intf->num);
-	mdp5_crtc->vblank.irqmask = intf2vblank(lm, intf);
-
-	if ((intf->type == INTF_DSI) &&
-		(intf->mode == MDP5_INTF_DSI_MODE_COMMAND)) {
-		mdp5_crtc->pp_done.irqmask = lm2ppdone(lm);
-		mdp5_crtc->pp_done.irq = mdp5_crtc_pp_done_irq;
-		mdp5_crtc->cmd_mode = true;
-	} else {
-		mdp5_crtc->pp_done.irqmask = 0;
-		mdp5_crtc->pp_done.irq = NULL;
-		mdp5_crtc->cmd_mode = false;
-	}
 
+	/* should this be done elsewhere ? */
 	mdp_irq_update(&mdp5_kms->base);
 
-	mdp5_crtc->ctl = ctl;
-	mdp5_ctl_set_pipeline(ctl, intf, lm);
+	mdp5_ctl_set_pipeline(mdp5_cstate->ctl, &mdp5_cstate->pipeline);
 }
 
 struct mdp5_ctl *mdp5_crtc_get_ctl(struct drm_crtc *crtc)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 
-	return mdp5_crtc->ctl;
+	return mdp5_cstate->ctl;
 }
 
-int mdp5_crtc_get_lm(struct drm_crtc *crtc)
+struct mdp5_hw_mixer *mdp5_crtc_get_mixer(struct drm_crtc *crtc)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
-	return WARN_ON(!crtc) ? -EINVAL : mdp5_crtc->lm;
+	struct mdp5_crtc_state *mdp5_cstate;
+
+	if (WARN_ON(!crtc))
+		return ERR_PTR(-EINVAL);
+
+	mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+
+	return WARN_ON(!mdp5_cstate->pipeline.mixer) ?
+		ERR_PTR(-EINVAL) : mdp5_cstate->pipeline.mixer;
+}
+
+struct mdp5_pipeline *mdp5_crtc_get_pipeline(struct drm_crtc *crtc)
+{
+	struct mdp5_crtc_state *mdp5_cstate;
+
+	if (WARN_ON(!crtc))
+		return ERR_PTR(-EINVAL);
+
+	mdp5_cstate = to_mdp5_crtc_state(crtc->state);
+
+	return &mdp5_cstate->pipeline;
 }
 
 void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc)
 {
-	struct mdp5_crtc *mdp5_crtc = to_mdp5_crtc(crtc);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc->state);
 
-	if (mdp5_crtc->cmd_mode)
+	if (mdp5_cstate->cmd_mode)
 		mdp5_crtc_wait_for_pp_done(crtc);
 	else
 		mdp5_crtc_wait_for_flush_done(crtc);
@@ -816,7 +1094,6 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 	crtc = &mdp5_crtc->base;
 
 	mdp5_crtc->id = id;
-	mdp5_crtc->lm = GET_LM_ID(id);
 
 	spin_lock_init(&mdp5_crtc->lm_lock);
 	spin_lock_init(&mdp5_crtc->cursor.lock);
@@ -824,6 +1101,7 @@ struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 
 	mdp5_crtc->vblank.irq = mdp5_crtc_vblank_irq;
 	mdp5_crtc->err.irq = mdp5_crtc_err_irq;
+	mdp5_crtc->pp_done.irq = mdp5_crtc_pp_done_irq;
 
 	if (cursor_plane)
 		drm_crtc_init_with_planes(dev, crtc, plane, cursor_plane,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
index 8b93f7e13200..439e0a300e25 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.c
@@ -32,24 +32,16 @@
 #define CTL_STAT_BUSY		0x1
 #define CTL_STAT_BOOKED	0x2
 
-struct op_mode {
-	struct mdp5_interface intf;
-
-	bool encoder_enabled;
-	uint32_t start_mask;
-};
-
 struct mdp5_ctl {
 	struct mdp5_ctl_manager *ctlm;
 
 	u32 id;
-	int lm;
 
 	/* CTL status bitmask */
 	u32 status;
 
-	/* Operation Mode Configuration for the Pipeline */
-	struct op_mode pipeline;
+	bool encoder_enabled;
+	uint32_t start_mask;
 
 	/* REG_MDP5_CTL_*(<id>) registers access info + lock: */
 	spinlock_t hw_lock;
@@ -146,9 +138,10 @@ static void set_display_intf(struct mdp5_kms *mdp5_kms,
 	spin_unlock_irqrestore(&mdp5_kms->resource_lock, flags);
 }
 
-static void set_ctl_op(struct mdp5_ctl *ctl, struct mdp5_interface *intf)
+static void set_ctl_op(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline)
 {
 	unsigned long flags;
+	struct mdp5_interface *intf = pipeline->intf;
 	u32 ctl_op = 0;
 
 	if (!mdp5_cfg_intf_is_virtual(intf->type))
@@ -169,52 +162,50 @@ static void set_ctl_op(struct mdp5_ctl *ctl, struct mdp5_interface *intf)
 		break;
 	}
 
+	if (pipeline->r_mixer)
+		ctl_op |= MDP5_CTL_OP_PACK_3D_ENABLE |
+			  MDP5_CTL_OP_PACK_3D(1);
+
 	spin_lock_irqsave(&ctl->hw_lock, flags);
 	ctl_write(ctl, REG_MDP5_CTL_OP(ctl->id), ctl_op);
 	spin_unlock_irqrestore(&ctl->hw_lock, flags);
 }
 
-int mdp5_ctl_set_pipeline(struct mdp5_ctl *ctl,
-		struct mdp5_interface *intf, int lm)
+int mdp5_ctl_set_pipeline(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
 	struct mdp5_kms *mdp5_kms = get_kms(ctl_mgr);
+	struct mdp5_interface *intf = pipeline->intf;
+	struct mdp5_hw_mixer *mixer = pipeline->mixer;
+	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
 
-	if (unlikely(WARN_ON(intf->num != ctl->pipeline.intf.num))) {
-		dev_err(mdp5_kms->dev->dev,
-			"CTL %d is allocated by INTF %d, but used by INTF %d\n",
-			ctl->id, ctl->pipeline.intf.num, intf->num);
-		return -EINVAL;
-	}
-
-	ctl->lm = lm;
-
-	memcpy(&ctl->pipeline.intf, intf, sizeof(*intf));
-
-	ctl->pipeline.start_mask = mdp_ctl_flush_mask_lm(ctl->lm) |
-				   mdp_ctl_flush_mask_encoder(intf);
+	ctl->start_mask = mdp_ctl_flush_mask_lm(mixer->lm) |
+			  mdp_ctl_flush_mask_encoder(intf);
+	if (r_mixer)
+		ctl->start_mask |= mdp_ctl_flush_mask_lm(r_mixer->lm);
 
 	/* Virtual interfaces need not set a display intf (e.g.: Writeback) */
 	if (!mdp5_cfg_intf_is_virtual(intf->type))
 		set_display_intf(mdp5_kms, intf);
 
-	set_ctl_op(ctl, intf);
+	set_ctl_op(ctl, pipeline);
 
 	return 0;
 }
 
-static bool start_signal_needed(struct mdp5_ctl *ctl)
+static bool start_signal_needed(struct mdp5_ctl *ctl,
+				struct mdp5_pipeline *pipeline)
 {
-	struct op_mode *pipeline = &ctl->pipeline;
+	struct mdp5_interface *intf = pipeline->intf;
 
-	if (!pipeline->encoder_enabled || pipeline->start_mask != 0)
+	if (!ctl->encoder_enabled || ctl->start_mask != 0)
 		return false;
 
-	switch (pipeline->intf.type) {
+	switch (intf->type) {
 	case INTF_WB:
 		return true;
 	case INTF_DSI:
-		return pipeline->intf.mode == MDP5_INTF_DSI_MODE_COMMAND;
+		return intf->mode == MDP5_INTF_DSI_MODE_COMMAND;
 	default:
 		return false;
 	}
@@ -236,19 +227,23 @@ static void send_start_signal(struct mdp5_ctl *ctl)
 	spin_unlock_irqrestore(&ctl->hw_lock, flags);
 }
 
-static void refill_start_mask(struct mdp5_ctl *ctl)
+static void refill_start_mask(struct mdp5_ctl *ctl,
+			      struct mdp5_pipeline *pipeline)
 {
-	struct op_mode *pipeline = &ctl->pipeline;
-	struct mdp5_interface *intf = &ctl->pipeline.intf;
+	struct mdp5_interface *intf = pipeline->intf;
+	struct mdp5_hw_mixer *mixer = pipeline->mixer;
+	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
 
-	pipeline->start_mask = mdp_ctl_flush_mask_lm(ctl->lm);
+	ctl->start_mask = mdp_ctl_flush_mask_lm(mixer->lm);
+	if (r_mixer)
+		ctl->start_mask |= mdp_ctl_flush_mask_lm(r_mixer->lm);
 
 	/*
 	 * Writeback encoder needs to program & flush
 	 * address registers for each page flip..
 	 */
 	if (intf->type == INTF_WB)
-		pipeline->start_mask |= mdp_ctl_flush_mask_encoder(intf);
+		ctl->start_mask |= mdp_ctl_flush_mask_encoder(intf);
 }
 
 /**
@@ -259,17 +254,21 @@ static void refill_start_mask(struct mdp5_ctl *ctl)
  * Note:
  * This encoder state is needed to trigger START signal (data path kickoff).
  */
-int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl, bool enabled)
+int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl,
+			       struct mdp5_pipeline *pipeline,
+			       bool enabled)
 {
+	struct mdp5_interface *intf = pipeline->intf;
+
 	if (WARN_ON(!ctl))
 		return -EINVAL;
 
-	ctl->pipeline.encoder_enabled = enabled;
-	DBG("intf_%d: %s", ctl->pipeline.intf.num, enabled ? "on" : "off");
+	ctl->encoder_enabled = enabled;
+	DBG("intf_%d: %s", intf->num, enabled ? "on" : "off");
 
-	if (start_signal_needed(ctl)) {
+	if (start_signal_needed(ctl, pipeline)) {
 		send_start_signal(ctl);
-		refill_start_mask(ctl);
+		refill_start_mask(ctl, pipeline);
 	}
 
 	return 0;
@@ -280,29 +279,35 @@ int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl, bool enabled)
  * CTL registers need to be flushed after calling this function
  * (call mdp5_ctl_commit() with mdp_ctl_flush_mask_ctl() mask)
  */
-int mdp5_ctl_set_cursor(struct mdp5_ctl *ctl, int cursor_id, bool enable)
+int mdp5_ctl_set_cursor(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+			int cursor_id, bool enable)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
 	unsigned long flags;
 	u32 blend_cfg;
-	int lm = ctl->lm;
+	struct mdp5_hw_mixer *mixer = pipeline->mixer;
+
+	if (unlikely(WARN_ON(!mixer))) {
+		dev_err(ctl_mgr->dev->dev, "CTL %d cannot find LM",
+			ctl->id);
+		return -EINVAL;
+	}
 
-	if (unlikely(WARN_ON(lm < 0))) {
-		dev_err(ctl_mgr->dev->dev, "CTL %d cannot find LM: %d",
-				ctl->id, lm);
+	if (pipeline->r_mixer) {
+		dev_err(ctl_mgr->dev->dev, "unsupported configuration");
 		return -EINVAL;
 	}
 
 	spin_lock_irqsave(&ctl->hw_lock, flags);
 
-	blend_cfg = ctl_read(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, lm));
+	blend_cfg = ctl_read(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, mixer->lm));
 
 	if (enable)
 		blend_cfg |=  MDP5_CTL_LAYER_REG_CURSOR_OUT;
 	else
 		blend_cfg &= ~MDP5_CTL_LAYER_REG_CURSOR_OUT;
 
-	ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, lm), blend_cfg);
+	ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, mixer->lm), blend_cfg);
 	ctl->cursor_on = enable;
 
 	spin_unlock_irqrestore(&ctl->hw_lock, flags);
@@ -355,37 +360,88 @@ static u32 mdp_ctl_blend_ext_mask(enum mdp5_pipe pipe,
 	}
 }
 
-int mdp5_ctl_blend(struct mdp5_ctl *ctl, enum mdp5_pipe *stage, u32 stage_cnt,
-		   u32 ctl_blend_op_flags)
+static void mdp5_ctl_reset_blend_regs(struct mdp5_ctl *ctl)
+{
+	unsigned long flags;
+	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
+	int i;
+
+	spin_lock_irqsave(&ctl->hw_lock, flags);
+
+	for (i = 0; i < ctl_mgr->nlm; i++) {
+		ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, i), 0x0);
+		ctl_write(ctl, REG_MDP5_CTL_LAYER_EXT_REG(ctl->id, i), 0x0);
+	}
+
+	spin_unlock_irqrestore(&ctl->hw_lock, flags);
+}
+
+#define PIPE_LEFT	0
+#define PIPE_RIGHT	1
+int mdp5_ctl_blend(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+		   enum mdp5_pipe stage[][MAX_PIPE_STAGE],
+		   enum mdp5_pipe r_stage[][MAX_PIPE_STAGE],
+		   u32 stage_cnt, u32 ctl_blend_op_flags)
 {
+	struct mdp5_hw_mixer *mixer = pipeline->mixer;
+	struct mdp5_hw_mixer *r_mixer = pipeline->r_mixer;
 	unsigned long flags;
 	u32 blend_cfg = 0, blend_ext_cfg = 0;
+	u32 r_blend_cfg = 0, r_blend_ext_cfg = 0;
 	int i, start_stage;
 
+	mdp5_ctl_reset_blend_regs(ctl);
+
 	if (ctl_blend_op_flags & MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT) {
 		start_stage = STAGE0;
 		blend_cfg |= MDP5_CTL_LAYER_REG_BORDER_COLOR;
+		if (r_mixer)
+			r_blend_cfg |= MDP5_CTL_LAYER_REG_BORDER_COLOR;
 	} else {
 		start_stage = STAGE_BASE;
 	}
 
 	for (i = start_stage; stage_cnt && i <= STAGE_MAX; i++) {
-		blend_cfg |= mdp_ctl_blend_mask(stage[i], i);
-		blend_ext_cfg |= mdp_ctl_blend_ext_mask(stage[i], i);
+		blend_cfg |=
+			mdp_ctl_blend_mask(stage[i][PIPE_LEFT], i) |
+			mdp_ctl_blend_mask(stage[i][PIPE_RIGHT], i);
+		blend_ext_cfg |=
+			mdp_ctl_blend_ext_mask(stage[i][PIPE_LEFT], i) |
+			mdp_ctl_blend_ext_mask(stage[i][PIPE_RIGHT], i);
+		if (r_mixer) {
+			r_blend_cfg |=
+				mdp_ctl_blend_mask(r_stage[i][PIPE_LEFT], i) |
+				mdp_ctl_blend_mask(r_stage[i][PIPE_RIGHT], i);
+			r_blend_ext_cfg |=
+			     mdp_ctl_blend_ext_mask(r_stage[i][PIPE_LEFT], i) |
+			     mdp_ctl_blend_ext_mask(r_stage[i][PIPE_RIGHT], i);
+		}
 	}
 
 	spin_lock_irqsave(&ctl->hw_lock, flags);
 	if (ctl->cursor_on)
 		blend_cfg |=  MDP5_CTL_LAYER_REG_CURSOR_OUT;
 
-	ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, ctl->lm), blend_cfg);
-	ctl_write(ctl, REG_MDP5_CTL_LAYER_EXT_REG(ctl->id, ctl->lm), blend_ext_cfg);
+	ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, mixer->lm), blend_cfg);
+	ctl_write(ctl, REG_MDP5_CTL_LAYER_EXT_REG(ctl->id, mixer->lm),
+		  blend_ext_cfg);
+	if (r_mixer) {
+		ctl_write(ctl, REG_MDP5_CTL_LAYER_REG(ctl->id, r_mixer->lm),
+			  r_blend_cfg);
+		ctl_write(ctl, REG_MDP5_CTL_LAYER_EXT_REG(ctl->id, r_mixer->lm),
+			  r_blend_ext_cfg);
+	}
 	spin_unlock_irqrestore(&ctl->hw_lock, flags);
 
-	ctl->pending_ctl_trigger = mdp_ctl_flush_mask_lm(ctl->lm);
+	ctl->pending_ctl_trigger = mdp_ctl_flush_mask_lm(mixer->lm);
+	if (r_mixer)
+		ctl->pending_ctl_trigger |= mdp_ctl_flush_mask_lm(r_mixer->lm);
 
-	DBG("lm%d: blend config = 0x%08x. ext_cfg = 0x%08x", ctl->lm,
+	DBG("lm%d: blend config = 0x%08x. ext_cfg = 0x%08x", mixer->lm,
 		blend_cfg, blend_ext_cfg);
+	if (r_mixer)
+		DBG("lm%d: blend config = 0x%08x. ext_cfg = 0x%08x",
+		    r_mixer->lm, r_blend_cfg, r_blend_ext_cfg);
 
 	return 0;
 }
@@ -443,7 +499,8 @@ u32 mdp_ctl_flush_mask_lm(int lm)
 	}
 }
 
-static u32 fix_sw_flush(struct mdp5_ctl *ctl, u32 flush_mask)
+static u32 fix_sw_flush(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+			u32 flush_mask)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
 	u32 sw_mask = 0;
@@ -452,7 +509,7 @@ static u32 fix_sw_flush(struct mdp5_ctl *ctl, u32 flush_mask)
 
 	/* for some targets, cursor bit is the same as LM bit */
 	if (BIT_NEEDS_SW_FIX(MDP5_CTL_FLUSH_CURSOR_0))
-		sw_mask |= mdp_ctl_flush_mask_lm(ctl->lm);
+		sw_mask |= mdp_ctl_flush_mask_lm(pipeline->mixer->lm);
 
 	return sw_mask;
 }
@@ -498,25 +555,26 @@ static void fix_for_single_flush(struct mdp5_ctl *ctl, u32 *flush_mask,
  *
  * Return H/W flushed bit mask.
  */
-u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask)
+u32 mdp5_ctl_commit(struct mdp5_ctl *ctl,
+		    struct mdp5_pipeline *pipeline,
+		    u32 flush_mask)
 {
 	struct mdp5_ctl_manager *ctl_mgr = ctl->ctlm;
-	struct op_mode *pipeline = &ctl->pipeline;
 	unsigned long flags;
 	u32 flush_id = ctl->id;
 	u32 curr_ctl_flush_mask;
 
-	pipeline->start_mask &= ~flush_mask;
+	ctl->start_mask &= ~flush_mask;
 
 	VERB("flush_mask=%x, start_mask=%x, trigger=%x", flush_mask,
-			pipeline->start_mask, ctl->pending_ctl_trigger);
+			ctl->start_mask, ctl->pending_ctl_trigger);
 
 	if (ctl->pending_ctl_trigger & flush_mask) {
 		flush_mask |= MDP5_CTL_FLUSH_CTL;
 		ctl->pending_ctl_trigger = 0;
 	}
 
-	flush_mask |= fix_sw_flush(ctl, flush_mask);
+	flush_mask |= fix_sw_flush(ctl, pipeline, flush_mask);
 
 	flush_mask &= ctl_mgr->flush_hw_mask;
 
@@ -530,9 +588,9 @@ u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask)
 		spin_unlock_irqrestore(&ctl->hw_lock, flags);
 	}
 
-	if (start_signal_needed(ctl)) {
+	if (start_signal_needed(ctl, pipeline)) {
 		send_start_signal(ctl);
-		refill_start_mask(ctl);
+		refill_start_mask(ctl, pipeline);
 	}
 
 	return curr_ctl_flush_mask;
@@ -619,8 +677,6 @@ struct mdp5_ctl *mdp5_ctlm_request(struct mdp5_ctl_manager *ctl_mgr,
 
 found:
 	ctl = &ctl_mgr->ctls[c];
-	ctl->pipeline.intf.num = intf_num;
-	ctl->lm = -1;
 	ctl->status |= CTL_STAT_BUSY;
 	ctl->pending_ctl_trigger = 0;
 	DBG("CTL %d allocated", ctl->id);
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
index fda00d33e4db..b63120388dc6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_ctl.h
@@ -37,13 +37,17 @@ struct mdp5_ctl *mdp5_ctlm_request(struct mdp5_ctl_manager *ctlm, int intf_num);
 int mdp5_ctl_get_ctl_id(struct mdp5_ctl *ctl);
 
 struct mdp5_interface;
-int mdp5_ctl_set_pipeline(struct mdp5_ctl *ctl, struct mdp5_interface *intf,
-				int lm);
-int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl, bool enabled);
+struct mdp5_pipeline;
+int mdp5_ctl_set_pipeline(struct mdp5_ctl *ctl, struct mdp5_pipeline *p);
+int mdp5_ctl_set_encoder_state(struct mdp5_ctl *ctl, struct mdp5_pipeline *p,
+			       bool enabled);
 
-int mdp5_ctl_set_cursor(struct mdp5_ctl *ctl, int cursor_id, bool enable);
+int mdp5_ctl_set_cursor(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+			int cursor_id, bool enable);
 int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable);
 
+#define MAX_PIPE_STAGE		2
+
 /*
  * mdp5_ctl_blend() - Blend multiple layers on a Layer Mixer (LM)
  *
@@ -56,8 +60,10 @@ int mdp5_ctl_pair(struct mdp5_ctl *ctlx, struct mdp5_ctl *ctly, bool enable);
  * (call mdp5_ctl_commit() with mdp_ctl_flush_mask_ctl() mask)
  */
 #define MDP5_CTL_BLEND_OP_FLAG_BORDER_OUT	BIT(0)
-int mdp5_ctl_blend(struct mdp5_ctl *ctl, enum mdp5_pipe *stage, u32 stage_cnt,
-		   u32 ctl_blend_op_flags);
+int mdp5_ctl_blend(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+		   enum mdp5_pipe stage[][MAX_PIPE_STAGE],
+		   enum mdp5_pipe r_stage[][MAX_PIPE_STAGE],
+		   u32 stage_cnt, u32 ctl_blend_op_flags);
 
 /**
  * mdp_ctl_flush_mask...() - Register FLUSH masks
@@ -71,7 +77,8 @@ u32 mdp_ctl_flush_mask_cursor(int cursor_id);
 u32 mdp_ctl_flush_mask_encoder(struct mdp5_interface *intf);
 
 /* @flush_mask: see CTL flush masks definitions below */
-u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, u32 flush_mask);
+u32 mdp5_ctl_commit(struct mdp5_ctl *ctl, struct mdp5_pipeline *pipeline,
+		    u32 flush_mask);
 u32 mdp5_ctl_get_commit_status(struct mdp5_ctl *ctl);
 
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
index 80fa482ae8ed..c2ab0f033031 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_encoder.c
@@ -109,7 +109,7 @@ static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
 	struct drm_device *dev = encoder->dev;
 	struct drm_connector *connector;
-	int intf = mdp5_encoder->intf.num;
+	int intf = mdp5_encoder->intf->num;
 	uint32_t dtv_hsync_skew, vsync_period, vsync_len, ctrl_pol;
 	uint32_t display_v_start, display_v_end;
 	uint32_t hsync_start_x, hsync_end_x;
@@ -130,7 +130,7 @@ static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 	ctrl_pol = 0;
 
 	/* DSI controller cannot handle active-low sync signals. */
-	if (mdp5_encoder->intf.type != INTF_DSI) {
+	if (mdp5_encoder->intf->type != INTF_DSI) {
 		if (mode->flags & DRM_MODE_FLAG_NHSYNC)
 			ctrl_pol |= MDP5_INTF_POLARITY_CTL_HSYNC_LOW;
 		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
@@ -175,7 +175,7 @@ static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 	 * DISPLAY_V_START = (VBP * HCYCLE) + HBP
 	 * DISPLAY_V_END = (VBP + VACTIVE) * HCYCLE - 1 - HFP
 	 */
-	if (mdp5_encoder->intf.type == INTF_eDP) {
+	if (mdp5_encoder->intf->type == INTF_eDP) {
 		display_v_start += mode->htotal - mode->hsync_start;
 		display_v_end -= mode->hsync_start - mode->hdisplay;
 	}
@@ -206,8 +206,7 @@ static void mdp5_vid_encoder_mode_set(struct drm_encoder *encoder,
 
 	spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags);
 
-	mdp5_crtc_set_pipeline(encoder->crtc, &mdp5_encoder->intf,
-				mdp5_encoder->ctl);
+	mdp5_crtc_set_pipeline(encoder->crtc);
 }
 
 static void mdp5_vid_encoder_disable(struct drm_encoder *encoder)
@@ -215,20 +214,21 @@ static void mdp5_vid_encoder_disable(struct drm_encoder *encoder)
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
 	struct mdp5_ctl *ctl = mdp5_encoder->ctl;
-	int lm = mdp5_crtc_get_lm(encoder->crtc);
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
-	int intfn = mdp5_encoder->intf.num;
+	struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(encoder->crtc);
+	struct mdp5_hw_mixer *mixer = mdp5_crtc_get_mixer(encoder->crtc);
+	struct mdp5_interface *intf = mdp5_encoder->intf;
+	int intfn = mdp5_encoder->intf->num;
 	unsigned long flags;
 
 	if (WARN_ON(!mdp5_encoder->enabled))
 		return;
 
-	mdp5_ctl_set_encoder_state(ctl, false);
+	mdp5_ctl_set_encoder_state(ctl, pipeline, false);
 
 	spin_lock_irqsave(&mdp5_encoder->intf_lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(intfn), 0);
 	spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags);
-	mdp5_ctl_commit(ctl, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
 
 	/*
 	 * Wait for a vsync so we know the ENABLE=0 latched before
@@ -238,7 +238,7 @@ static void mdp5_vid_encoder_disable(struct drm_encoder *encoder)
 	 * the settings changes for the new modeset (like new
 	 * scanout buffer) don't latch properly..
 	 */
-	mdp_irq_wait(&mdp5_kms->base, intf2vblank(lm, intf));
+	mdp_irq_wait(&mdp5_kms->base, intf2vblank(mixer, intf));
 
 	bs_set(mdp5_encoder, 0);
 
@@ -250,8 +250,9 @@ static void mdp5_vid_encoder_enable(struct drm_encoder *encoder)
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
 	struct mdp5_ctl *ctl = mdp5_encoder->ctl;
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
-	int intfn = mdp5_encoder->intf.num;
+	struct mdp5_interface *intf = mdp5_encoder->intf;
+	struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(encoder->crtc);
+	int intfn = intf->num;
 	unsigned long flags;
 
 	if (WARN_ON(mdp5_encoder->enabled))
@@ -261,9 +262,9 @@ static void mdp5_vid_encoder_enable(struct drm_encoder *encoder)
 	spin_lock_irqsave(&mdp5_encoder->intf_lock, flags);
 	mdp5_write(mdp5_kms, REG_MDP5_INTF_TIMING_ENGINE_EN(intfn), 1);
 	spin_unlock_irqrestore(&mdp5_encoder->intf_lock, flags);
-	mdp5_ctl_commit(ctl, mdp_ctl_flush_mask_encoder(intf));
+	mdp5_ctl_commit(ctl, pipeline, mdp_ctl_flush_mask_encoder(intf));
 
-	mdp5_ctl_set_encoder_state(ctl, true);
+	mdp5_ctl_set_encoder_state(ctl, pipeline, true);
 
 	mdp5_encoder->enabled = true;
 }
@@ -273,7 +274,7 @@ static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
 				  struct drm_display_mode *adjusted_mode)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
+	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
 		mdp5_cmd_encoder_mode_set(encoder, mode, adjusted_mode);
@@ -284,7 +285,7 @@ static void mdp5_encoder_mode_set(struct drm_encoder *encoder,
 static void mdp5_encoder_disable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
+	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
 		mdp5_cmd_encoder_disable(encoder);
@@ -295,7 +296,7 @@ static void mdp5_encoder_disable(struct drm_encoder *encoder)
 static void mdp5_encoder_enable(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
+	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	if (intf->mode == MDP5_INTF_DSI_MODE_COMMAND)
 		mdp5_cmd_encoder_disable(encoder);
@@ -303,17 +304,33 @@ static void mdp5_encoder_enable(struct drm_encoder *encoder)
 		mdp5_vid_encoder_enable(encoder);
 }
 
+static int mdp5_encoder_atomic_check(struct drm_encoder *encoder,
+				     struct drm_crtc_state *crtc_state,
+				     struct drm_connector_state *conn_state)
+{
+	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
+	struct mdp5_crtc_state *mdp5_cstate = to_mdp5_crtc_state(crtc_state);
+	struct mdp5_interface *intf = mdp5_encoder->intf;
+	struct mdp5_ctl *ctl = mdp5_encoder->ctl;
+
+	mdp5_cstate->ctl = ctl;
+	mdp5_cstate->pipeline.intf = intf;
+
+	return 0;
+}
+
 static const struct drm_encoder_helper_funcs mdp5_encoder_helper_funcs = {
 	.mode_set = mdp5_encoder_mode_set,
 	.disable = mdp5_encoder_disable,
 	.enable = mdp5_encoder_enable,
+	.atomic_check = mdp5_encoder_atomic_check,
 };
 
 int mdp5_encoder_get_linecount(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
-	int intf = mdp5_encoder->intf.num;
+	int intf = mdp5_encoder->intf->num;
 
 	return mdp5_read(mdp5_kms, REG_MDP5_INTF_LINE_COUNT(intf));
 }
@@ -322,7 +339,7 @@ u32 mdp5_encoder_get_framecount(struct drm_encoder *encoder)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
 	struct mdp5_kms *mdp5_kms = get_kms(encoder);
-	int intf = mdp5_encoder->intf.num;
+	int intf = mdp5_encoder->intf->num;
 
 	return mdp5_read(mdp5_kms, REG_MDP5_INTF_FRAME_COUNT(intf));
 }
@@ -340,7 +357,7 @@ int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder,
 		return -EINVAL;
 
 	mdp5_kms = get_kms(encoder);
-	intf_num = mdp5_encoder->intf.num;
+	intf_num = mdp5_encoder->intf->num;
 
 	/* Switch slave encoder's TimingGen Sync mode,
 	 * to use the master's enable signal for the slave encoder.
@@ -369,7 +386,7 @@ int mdp5_vid_encoder_set_split_display(struct drm_encoder *encoder,
 void mdp5_encoder_set_intf_mode(struct drm_encoder *encoder, bool cmd_mode)
 {
 	struct mdp5_encoder *mdp5_encoder = to_mdp5_encoder(encoder);
-	struct mdp5_interface *intf = &mdp5_encoder->intf;
+	struct mdp5_interface *intf = mdp5_encoder->intf;
 
 	/* TODO: Expand this to set writeback modes too */
 	if (cmd_mode) {
@@ -385,7 +402,8 @@ void mdp5_encoder_set_intf_mode(struct drm_encoder *encoder, bool cmd_mode)
 
 /* initialize encoder */
 struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
-			struct mdp5_interface *intf, struct mdp5_ctl *ctl)
+				      struct mdp5_interface *intf,
+				      struct mdp5_ctl *ctl)
 {
 	struct drm_encoder *encoder = NULL;
 	struct mdp5_encoder *mdp5_encoder;
@@ -399,9 +417,9 @@ struct drm_encoder *mdp5_encoder_init(struct drm_device *dev,
 		goto fail;
 	}
 
-	memcpy(&mdp5_encoder->intf, intf, sizeof(mdp5_encoder->intf));
 	encoder = &mdp5_encoder->base;
 	mdp5_encoder->ctl = ctl;
+	mdp5_encoder->intf = intf;
 
 	spin_lock_init(&mdp5_encoder->intf_lock);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
index 41ccd2a15d3c..d3d6b4cae1e6 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.c
@@ -93,6 +93,7 @@ struct mdp5_state *mdp5_get_state(struct drm_atomic_state *s)
 
 	/* Copy state: */
 	new_state->hwpipe = mdp5_kms->state->hwpipe;
+	new_state->hwmixer = mdp5_kms->state->hwmixer;
 	if (mdp5_kms->smp)
 		new_state->smp = mdp5_kms->state->smp;
 
@@ -165,13 +166,16 @@ static void mdp5_kms_destroy(struct msm_kms *kms)
 	struct msm_gem_address_space *aspace = mdp5_kms->aspace;
 	int i;
 
+	for (i = 0; i < mdp5_kms->num_hwmixers; i++)
+		mdp5_mixer_destroy(mdp5_kms->hwmixers[i]);
+
 	for (i = 0; i < mdp5_kms->num_hwpipes; i++)
 		mdp5_pipe_destroy(mdp5_kms->hwpipes[i]);
 
 	if (aspace) {
 		aspace->mmu->funcs->detach(aspace->mmu,
 				iommu_ports, ARRAY_SIZE(iommu_ports));
-		msm_gem_address_space_destroy(aspace);
+		msm_gem_address_space_put(aspace);
 	}
 }
 
@@ -268,19 +272,14 @@ int mdp5_enable(struct mdp5_kms *mdp5_kms)
 }
 
 static struct drm_encoder *construct_encoder(struct mdp5_kms *mdp5_kms,
-		enum mdp5_intf_type intf_type, int intf_num,
-		struct mdp5_ctl *ctl)
+					     struct mdp5_interface *intf,
+					     struct mdp5_ctl *ctl)
 {
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
 	struct drm_encoder *encoder;
-	struct mdp5_interface intf = {
-			.num	= intf_num,
-			.type	= intf_type,
-			.mode	= MDP5_INTF_MODE_NONE,
-	};
 
-	encoder = mdp5_encoder_init(dev, &intf, ctl);
+	encoder = mdp5_encoder_init(dev, intf, ctl);
 	if (IS_ERR(encoder)) {
 		dev_err(dev->dev, "failed to construct encoder\n");
 		return encoder;
@@ -309,32 +308,28 @@ static int get_dsi_id_from_intf(const struct mdp5_cfg_hw *hw_cfg, int intf_num)
 	return -EINVAL;
 }
 
-static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
+static int modeset_init_intf(struct mdp5_kms *mdp5_kms,
+			     struct mdp5_interface *intf)
 {
 	struct drm_device *dev = mdp5_kms->dev;
 	struct msm_drm_private *priv = dev->dev_private;
-	const struct mdp5_cfg_hw *hw_cfg =
-					mdp5_cfg_get_hw_config(mdp5_kms->cfg);
-	enum mdp5_intf_type intf_type = hw_cfg->intf.connect[intf_num];
 	struct mdp5_ctl_manager *ctlm = mdp5_kms->ctlm;
 	struct mdp5_ctl *ctl;
 	struct drm_encoder *encoder;
 	int ret = 0;
 
-	switch (intf_type) {
-	case INTF_DISABLED:
-		break;
+	switch (intf->type) {
 	case INTF_eDP:
 		if (!priv->edp)
 			break;
 
-		ctl = mdp5_ctlm_request(ctlm, intf_num);
+		ctl = mdp5_ctlm_request(ctlm, intf->num);
 		if (!ctl) {
 			ret = -EINVAL;
 			break;
 		}
 
-		encoder = construct_encoder(mdp5_kms, INTF_eDP, intf_num, ctl);
+		encoder = construct_encoder(mdp5_kms, intf, ctl);
 		if (IS_ERR(encoder)) {
 			ret = PTR_ERR(encoder);
 			break;
@@ -346,13 +341,13 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 		if (!priv->hdmi)
 			break;
 
-		ctl = mdp5_ctlm_request(ctlm, intf_num);
+		ctl = mdp5_ctlm_request(ctlm, intf->num);
 		if (!ctl) {
 			ret = -EINVAL;
 			break;
 		}
 
-		encoder = construct_encoder(mdp5_kms, INTF_HDMI, intf_num, ctl);
+		encoder = construct_encoder(mdp5_kms, intf, ctl);
 		if (IS_ERR(encoder)) {
 			ret = PTR_ERR(encoder);
 			break;
@@ -362,11 +357,13 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 		break;
 	case INTF_DSI:
 	{
-		int dsi_id = get_dsi_id_from_intf(hw_cfg, intf_num);
+		const struct mdp5_cfg_hw *hw_cfg =
+					mdp5_cfg_get_hw_config(mdp5_kms->cfg);
+		int dsi_id = get_dsi_id_from_intf(hw_cfg, intf->num);
 
 		if ((dsi_id >= ARRAY_SIZE(priv->dsi)) || (dsi_id < 0)) {
 			dev_err(dev->dev, "failed to find dsi from intf %d\n",
-				intf_num);
+				intf->num);
 			ret = -EINVAL;
 			break;
 		}
@@ -374,13 +371,13 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 		if (!priv->dsi[dsi_id])
 			break;
 
-		ctl = mdp5_ctlm_request(ctlm, intf_num);
+		ctl = mdp5_ctlm_request(ctlm, intf->num);
 		if (!ctl) {
 			ret = -EINVAL;
 			break;
 		}
 
-		encoder = construct_encoder(mdp5_kms, INTF_DSI, intf_num, ctl);
+		encoder = construct_encoder(mdp5_kms, intf, ctl);
 		if (IS_ERR(encoder)) {
 			ret = PTR_ERR(encoder);
 			break;
@@ -390,7 +387,7 @@ static int modeset_init_intf(struct mdp5_kms *mdp5_kms, int intf_num)
 		break;
 	}
 	default:
-		dev_err(dev->dev, "unknown intf: %d\n", intf_type);
+		dev_err(dev->dev, "unknown intf: %d\n", intf->type);
 		ret = -EINVAL;
 		break;
 	}
@@ -414,8 +411,8 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 	 * Construct encoders and modeset initialize connector devices
 	 * for each external display interface.
 	 */
-	for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) {
-		ret = modeset_init_intf(mdp5_kms, i);
+	for (i = 0; i < mdp5_kms->num_intfs; i++) {
+		ret = modeset_init_intf(mdp5_kms, mdp5_kms->intfs[i]);
 		if (ret)
 			goto fail;
 	}
@@ -425,7 +422,7 @@ static int modeset_init(struct mdp5_kms *mdp5_kms)
 	 * the MDP5 interfaces) than the number of layer mixers present in HW,
 	 * but let's be safe here anyway
 	 */
-	num_crtcs = min(priv->num_encoders, mdp5_cfg->lm.count);
+	num_crtcs = min(priv->num_encoders, mdp5_kms->num_hwmixers);
 
 	/*
 	 * Construct planes equaling the number of hw pipes, and CRTCs for the
@@ -744,6 +741,7 @@ fail:
 static void mdp5_destroy(struct platform_device *pdev)
 {
 	struct mdp5_kms *mdp5_kms = platform_get_drvdata(pdev);
+	int i;
 
 	if (mdp5_kms->ctlm)
 		mdp5_ctlm_destroy(mdp5_kms->ctlm);
@@ -752,6 +750,9 @@ static void mdp5_destroy(struct platform_device *pdev)
 	if (mdp5_kms->cfg)
 		mdp5_cfg_destroy(mdp5_kms->cfg);
 
+	for (i = 0; i < mdp5_kms->num_intfs; i++)
+		kfree(mdp5_kms->intfs[i]);
+
 	if (mdp5_kms->rpm_enabled)
 		pm_runtime_disable(&pdev->dev);
 
@@ -829,6 +830,64 @@ static int hwpipe_init(struct mdp5_kms *mdp5_kms)
 	return 0;
 }
 
+static int hwmixer_init(struct mdp5_kms *mdp5_kms)
+{
+	struct drm_device *dev = mdp5_kms->dev;
+	const struct mdp5_cfg_hw *hw_cfg;
+	int i, ret;
+
+	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
+
+	for (i = 0; i < hw_cfg->lm.count; i++) {
+		struct mdp5_hw_mixer *mixer;
+
+		mixer = mdp5_mixer_init(&hw_cfg->lm.instances[i]);
+		if (IS_ERR(mixer)) {
+			ret = PTR_ERR(mixer);
+			dev_err(dev->dev, "failed to construct LM%d (%d)\n",
+				i, ret);
+			return ret;
+		}
+
+		mixer->idx = mdp5_kms->num_hwmixers;
+		mdp5_kms->hwmixers[mdp5_kms->num_hwmixers++] = mixer;
+	}
+
+	return 0;
+}
+
+static int interface_init(struct mdp5_kms *mdp5_kms)
+{
+	struct drm_device *dev = mdp5_kms->dev;
+	const struct mdp5_cfg_hw *hw_cfg;
+	const enum mdp5_intf_type *intf_types;
+	int i;
+
+	hw_cfg = mdp5_cfg_get_hw_config(mdp5_kms->cfg);
+	intf_types = hw_cfg->intf.connect;
+
+	for (i = 0; i < ARRAY_SIZE(hw_cfg->intf.connect); i++) {
+		struct mdp5_interface *intf;
+
+		if (intf_types[i] == INTF_DISABLED)
+			continue;
+
+		intf = kzalloc(sizeof(*intf), GFP_KERNEL);
+		if (!intf) {
+			dev_err(dev->dev, "failed to construct INTF%d\n", i);
+			return -ENOMEM;
+		}
+
+		intf->num = i;
+		intf->type = intf_types[i];
+		intf->mode = MDP5_INTF_MODE_NONE;
+		intf->idx = mdp5_kms->num_intfs;
+		mdp5_kms->intfs[mdp5_kms->num_intfs++] = intf;
+	}
+
+	return 0;
+}
+
 static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 {
 	struct msm_drm_private *priv = dev->dev_private;
@@ -929,6 +988,14 @@ static int mdp5_init(struct platform_device *pdev, struct drm_device *dev)
 	if (ret)
 		goto fail;
 
+	ret = hwmixer_init(mdp5_kms);
+	if (ret)
+		goto fail;
+
+	ret = interface_init(mdp5_kms);
+	if (ret)
+		goto fail;
+
 	/* set uninit-ed kms */
 	priv->kms = &mdp5_kms->base.base;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
index 9de471191eba..8bdb7ee4983b 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_kms.h
@@ -23,8 +23,9 @@
 #include "mdp/mdp_kms.h"
 #include "mdp5_cfg.h"	/* must be included before mdp5.xml.h */
 #include "mdp5.xml.h"
-#include "mdp5_ctl.h"
 #include "mdp5_pipe.h"
+#include "mdp5_mixer.h"
+#include "mdp5_ctl.h"
 #include "mdp5_smp.h"
 
 struct mdp5_state;
@@ -39,6 +40,12 @@ struct mdp5_kms {
 	unsigned num_hwpipes;
 	struct mdp5_hw_pipe *hwpipes[SSPP_MAX];
 
+	unsigned num_hwmixers;
+	struct mdp5_hw_mixer *hwmixers[8];
+
+	unsigned num_intfs;
+	struct mdp5_interface *intfs[5];
+
 	struct mdp5_cfg_handler *cfg;
 	uint32_t caps;	/* MDP capabilities (MDP_CAP_XXX bits) */
 
@@ -83,6 +90,7 @@ struct mdp5_kms {
  */
 struct mdp5_state {
 	struct mdp5_hw_pipe_state hwpipe;
+	struct mdp5_hw_mixer_state hwmixer;
 	struct mdp5_smp_state smp;
 };
 
@@ -96,6 +104,7 @@ struct mdp5_plane_state {
 	struct drm_plane_state base;
 
 	struct mdp5_hw_pipe *hwpipe;
+	struct mdp5_hw_pipe *r_hwpipe;	/* right hwpipe */
 
 	/* aligned with property */
 	uint8_t premultiplied;
@@ -108,6 +117,28 @@ struct mdp5_plane_state {
 #define to_mdp5_plane_state(x) \
 		container_of(x, struct mdp5_plane_state, base)
 
+struct mdp5_pipeline {
+	struct mdp5_interface *intf;
+	struct mdp5_hw_mixer *mixer;
+	struct mdp5_hw_mixer *r_mixer;	/* right mixer */
+};
+
+struct mdp5_crtc_state {
+	struct drm_crtc_state base;
+
+	struct mdp5_ctl *ctl;
+	struct mdp5_pipeline pipeline;
+
+	/* these are derivatives of intf/mixer state in mdp5_pipeline */
+	u32 vblank_irqmask;
+	u32 err_irqmask;
+	u32 pp_done_irqmask;
+
+	bool cmd_mode;
+};
+#define to_mdp5_crtc_state(x) \
+		container_of(x, struct mdp5_crtc_state, base)
+
 enum mdp5_intf_mode {
 	MDP5_INTF_MODE_NONE = 0,
 
@@ -121,6 +152,7 @@ enum mdp5_intf_mode {
 };
 
 struct mdp5_interface {
+	int idx;
 	int num; /* display interface number */
 	enum mdp5_intf_type type;
 	enum mdp5_intf_mode mode;
@@ -128,11 +160,11 @@ struct mdp5_interface {
 
 struct mdp5_encoder {
 	struct drm_encoder base;
-	struct mdp5_interface intf;
 	spinlock_t intf_lock;	/* protect REG_MDP5_INTF_* registers */
 	bool enabled;
 	uint32_t bsc;
 
+	struct mdp5_interface *intf;
 	struct mdp5_ctl *ctl;
 };
 #define to_mdp5_encoder(x) container_of(x, struct mdp5_encoder, base)
@@ -197,8 +229,8 @@ static inline uint32_t intf2err(int intf_num)
 	}
 }
 
-#define GET_PING_PONG_ID(layer_mixer)	((layer_mixer == 5) ? 3 : layer_mixer)
-static inline uint32_t intf2vblank(int lm, struct mdp5_interface *intf)
+static inline uint32_t intf2vblank(struct mdp5_hw_mixer *mixer,
+				   struct mdp5_interface *intf)
 {
 	/*
 	 * In case of DSI Command Mode, the Ping Pong's read pointer IRQ
@@ -208,7 +240,7 @@ static inline uint32_t intf2vblank(int lm, struct mdp5_interface *intf)
 
 	if ((intf->type == INTF_DSI) &&
 			(intf->mode == MDP5_INTF_DSI_MODE_COMMAND))
-		return MDP5_IRQ_PING_PONG_0_RD_PTR << GET_PING_PONG_ID(lm);
+		return MDP5_IRQ_PING_PONG_0_RD_PTR << mixer->pp;
 
 	if (intf->type == INTF_WB)
 		return MDP5_IRQ_WB_2_DONE;
@@ -222,9 +254,9 @@ static inline uint32_t intf2vblank(int lm, struct mdp5_interface *intf)
 	}
 }
 
-static inline uint32_t lm2ppdone(int lm)
+static inline uint32_t lm2ppdone(struct mdp5_hw_mixer *mixer)
 {
-	return MDP5_IRQ_PING_PONG_0_DONE << GET_PING_PONG_ID(lm);
+	return MDP5_IRQ_PING_PONG_0_DONE << mixer->pp;
 }
 
 int mdp5_disable(struct mdp5_kms *mdp5_kms);
@@ -243,15 +275,16 @@ void mdp5_irq_domain_fini(struct mdp5_kms *mdp5_kms);
 
 uint32_t mdp5_plane_get_flush(struct drm_plane *plane);
 enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane);
+enum mdp5_pipe mdp5_plane_right_pipe(struct drm_plane *plane);
 struct drm_plane *mdp5_plane_init(struct drm_device *dev,
 				  enum drm_plane_type type);
 
 struct mdp5_ctl *mdp5_crtc_get_ctl(struct drm_crtc *crtc);
 uint32_t mdp5_crtc_vblank(struct drm_crtc *crtc);
 
-int mdp5_crtc_get_lm(struct drm_crtc *crtc);
-void mdp5_crtc_set_pipeline(struct drm_crtc *crtc,
-		struct mdp5_interface *intf, struct mdp5_ctl *ctl);
+struct mdp5_hw_mixer *mdp5_crtc_get_mixer(struct drm_crtc *crtc);
+struct mdp5_pipeline *mdp5_crtc_get_pipeline(struct drm_crtc *crtc);
+void mdp5_crtc_set_pipeline(struct drm_crtc *crtc);
 void mdp5_crtc_wait_for_commit_done(struct drm_crtc *crtc);
 struct drm_crtc *mdp5_crtc_init(struct drm_device *dev,
 				struct drm_plane *plane,
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.c
new file mode 100644
index 000000000000..8a00991f03c7
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.c
@@ -0,0 +1,172 @@
+/*
+ * Copyright (C) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mdp5_kms.h"
+
+/*
+ * As of now, there are only 2 combinations possible for source split:
+ *
+ * Left | Right
+ * -----|------
+ *  LM0 | LM1
+ *  LM2 | LM5
+ *
+ */
+static int lm_right_pair[] = { 1, -1, 5, -1, -1, -1 };
+
+static int get_right_pair_idx(struct mdp5_kms *mdp5_kms, int lm)
+{
+	int i;
+	int pair_lm;
+
+	pair_lm = lm_right_pair[lm];
+	if (pair_lm < 0)
+		return -EINVAL;
+
+	for (i = 0; i < mdp5_kms->num_hwmixers; i++) {
+		struct mdp5_hw_mixer *mixer = mdp5_kms->hwmixers[i];
+
+		if (mixer->lm == pair_lm)
+			return mixer->idx;
+	}
+
+	return -1;
+}
+
+int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
+		      uint32_t caps, struct mdp5_hw_mixer **mixer,
+		      struct mdp5_hw_mixer **r_mixer)
+{
+	struct msm_drm_private *priv = s->dev->dev_private;
+	struct mdp5_kms *mdp5_kms = to_mdp5_kms(to_mdp_kms(priv->kms));
+	struct mdp5_state *state = mdp5_get_state(s);
+	struct mdp5_hw_mixer_state *new_state;
+	int i;
+
+	if (IS_ERR(state))
+		return PTR_ERR(state);
+
+	new_state = &state->hwmixer;
+
+	for (i = 0; i < mdp5_kms->num_hwmixers; i++) {
+		struct mdp5_hw_mixer *cur = mdp5_kms->hwmixers[i];
+
+		/*
+		 * skip if already in-use by a different CRTC. If there is a
+		 * mixer already assigned to this CRTC, it means this call is
+		 * a request to get an additional right mixer. Assume that the
+		 * existing mixer is the 'left' one, and try to see if we can
+		 * get its corresponding 'right' pair.
+		 */
+		if (new_state->hwmixer_to_crtc[cur->idx] &&
+		    new_state->hwmixer_to_crtc[cur->idx] != crtc)
+			continue;
+
+		/* skip if doesn't support some required caps: */
+		if (caps & ~cur->caps)
+			continue;
+
+		if (r_mixer) {
+			int pair_idx;
+
+			pair_idx = get_right_pair_idx(mdp5_kms, cur->lm);
+			if (pair_idx < 0)
+				return -EINVAL;
+
+			if (new_state->hwmixer_to_crtc[pair_idx])
+				continue;
+
+			*r_mixer = mdp5_kms->hwmixers[pair_idx];
+		}
+
+		/*
+		 * prefer a pair-able LM over an unpairable one. We can
+		 * switch the CRTC from Normal mode to Source Split mode
+		 * without requiring a full modeset if we had already
+		 * assigned this CRTC a pair-able LM.
+		 *
+		 * TODO: There will be assignment sequences which would
+		 * result in the CRTC requiring a full modeset, even
+		 * if we have the LM resources to prevent it. For a platform
+		 * with a few displays, we don't run out of pair-able LMs
+		 * so easily. For now, ignore the possibility of requiring
+		 * a full modeset.
+		 */
+		if (!(*mixer) || cur->caps & MDP_LM_CAP_PAIR)
+			*mixer = cur;
+	}
+
+	if (!(*mixer))
+		return -ENOMEM;
+
+	if (r_mixer && !(*r_mixer))
+		return -ENOMEM;
+
+	DBG("assigning Layer Mixer %d to crtc %s", (*mixer)->lm, crtc->name);
+
+	new_state->hwmixer_to_crtc[(*mixer)->idx] = crtc;
+	if (r_mixer) {
+		DBG("assigning Right Layer Mixer %d to crtc %s", (*r_mixer)->lm,
+		    crtc->name);
+		new_state->hwmixer_to_crtc[(*r_mixer)->idx] = crtc;
+	}
+
+	return 0;
+}
+
+void mdp5_mixer_release(struct drm_atomic_state *s, struct mdp5_hw_mixer *mixer)
+{
+	struct mdp5_state *state = mdp5_get_state(s);
+	struct mdp5_hw_mixer_state *new_state = &state->hwmixer;
+
+	if (!mixer)
+		return;
+
+	if (WARN_ON(!new_state->hwmixer_to_crtc[mixer->idx]))
+		return;
+
+	DBG("%s: release from crtc %s", mixer->name,
+	    new_state->hwmixer_to_crtc[mixer->idx]->name);
+
+	new_state->hwmixer_to_crtc[mixer->idx] = NULL;
+}
+
+void mdp5_mixer_destroy(struct mdp5_hw_mixer *mixer)
+{
+	kfree(mixer);
+}
+
+static const char * const mixer_names[] = {
+	"LM0", "LM1", "LM2", "LM3", "LM4", "LM5",
+};
+
+struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm)
+{
+	struct mdp5_hw_mixer *mixer;
+
+	mixer = kzalloc(sizeof(*mixer), GFP_KERNEL);
+	if (!mixer)
+		return ERR_PTR(-ENOMEM);
+
+	mixer->name = mixer_names[lm->id];
+	mixer->lm = lm->id;
+	mixer->caps = lm->caps;
+	mixer->pp = lm->pp;
+	mixer->dspp = lm->dspp;
+	mixer->flush_mask = mdp_ctl_flush_mask_lm(lm->id);
+
+	return mixer;
+}
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.h
new file mode 100644
index 000000000000..9be94f567fbd
--- /dev/null
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_mixer.h
@@ -0,0 +1,47 @@
+/*
+ * Copyright (C) 2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __MDP5_LM_H__
+#define __MDP5_LM_H__
+
+/* represents a hw Layer Mixer, one (or more) is dynamically assigned to a crtc */
+struct mdp5_hw_mixer {
+	int idx;
+
+	const char *name;
+
+	int lm;			/* the LM instance # */
+	uint32_t caps;
+	int pp;
+	int dspp;
+
+	uint32_t flush_mask;      /* used to commit LM registers */
+};
+
+/* global atomic state of assignment between CRTCs and Layer Mixers: */
+struct mdp5_hw_mixer_state {
+	struct drm_crtc *hwmixer_to_crtc[8];
+};
+
+struct mdp5_hw_mixer *mdp5_mixer_init(const struct mdp5_lm_instance *lm);
+void mdp5_mixer_destroy(struct mdp5_hw_mixer *lm);
+int mdp5_mixer_assign(struct drm_atomic_state *s, struct drm_crtc *crtc,
+		      uint32_t caps, struct mdp5_hw_mixer **mixer,
+		      struct mdp5_hw_mixer **r_mixer);
+void mdp5_mixer_release(struct drm_atomic_state *s,
+			struct mdp5_hw_mixer *mixer);
+
+#endif /* __MDP5_LM_H__ */
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
index 35c4dabb0c0c..2bfac3712685 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.c
@@ -135,7 +135,5 @@ struct mdp5_hw_pipe *mdp5_pipe_init(enum mdp5_pipe pipe,
 	hwpipe->caps = caps;
 	hwpipe->flush_mask = mdp_ctl_flush_mask_pipe(pipe);
 
-	spin_lock_init(&hwpipe->pipe_lock);
-
 	return hwpipe;
 }
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
index 611da7a660c9..924c3e6f9517 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_pipe.h
@@ -18,7 +18,8 @@
 #ifndef __MDP5_PIPE_H__
 #define __MDP5_PIPE_H__
 
-#define SSPP_MAX	(SSPP_RGB3 + 1) /* TODO: Add SSPP_MAX in mdp5.xml.h */
+/* TODO: Add SSPP_MAX in mdp5.xml.h */
+#define SSPP_MAX	(SSPP_CURSOR1 + 1)
 
 /* represents a hw pipe, which is dynamically assigned to a plane */
 struct mdp5_hw_pipe {
@@ -27,7 +28,6 @@ struct mdp5_hw_pipe {
 	const char *name;
 	enum mdp5_pipe pipe;
 
-	spinlock_t pipe_lock;     /* protect REG_MDP5_PIPE_* registers */
 	uint32_t reg_offset;
 	uint32_t caps;
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
index 60a5451ae0b9..a38c5fe6cc19 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_plane.c
@@ -22,6 +22,8 @@
 struct mdp5_plane {
 	struct drm_plane base;
 
+	spinlock_t pipe_lock;     /* protect REG_MDP5_PIPE_* registers */
+
 	uint32_t nformats;
 	uint32_t formats[32];
 };
@@ -40,9 +42,6 @@ static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane,
 		uint32_t src_w, uint32_t src_h,
 		struct drm_modeset_acquire_ctx *ctx);
 
-static void set_scanout_locked(struct drm_plane *plane,
-		struct drm_framebuffer *fb);
-
 static struct mdp5_kms *get_kms(struct drm_plane *plane)
 {
 	struct msm_drm_private *priv = plane->dev->dev_private;
@@ -178,9 +177,14 @@ mdp5_plane_atomic_print_state(struct drm_printer *p,
 		const struct drm_plane_state *state)
 {
 	struct mdp5_plane_state *pstate = to_mdp5_plane_state(state);
+	struct mdp5_kms *mdp5_kms = get_kms(state->plane);
 
 	drm_printf(p, "\thwpipe=%s\n", pstate->hwpipe ?
 			pstate->hwpipe->name : "(null)");
+	if (mdp5_kms->caps & MDP_CAP_SRC_SPLIT)
+		drm_printf(p, "\tright-hwpipe=%s\n",
+			   pstate->r_hwpipe ? pstate->r_hwpipe->name :
+					      "(null)");
 	drm_printf(p, "\tpremultiplied=%u\n", pstate->premultiplied);
 	drm_printf(p, "\tzpos=%u\n", pstate->zpos);
 	drm_printf(p, "\talpha=%u\n", pstate->alpha);
@@ -300,7 +304,9 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 	struct drm_plane_state *old_state = plane->state;
 	struct mdp5_cfg *config = mdp5_cfg_get_config(get_kms(plane)->cfg);
 	bool new_hwpipe = false;
+	bool need_right_hwpipe = false;
 	uint32_t max_width, max_height;
+	bool out_of_bounds = false;
 	uint32_t caps = 0;
 	struct drm_rect clip;
 	int min_scale, max_scale;
@@ -313,7 +319,23 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 	max_height = config->hw->lm.max_height << 16;
 
 	/* Make sure source dimensions are within bounds. */
-	if ((state->src_w > max_width) || (state->src_h > max_height)) {
+	if (state->src_h > max_height)
+		out_of_bounds = true;
+
+	if (state->src_w > max_width) {
+		/* If source split is supported, we can go up to 2x
+		 * the max LM width, but we'd need to stage another
+		 * hwpipe to the right LM. So, the drm_plane would
+		 * consist of 2 hwpipes.
+		 */
+		if (config->hw->mdp.caps & MDP_CAP_SRC_SPLIT &&
+		    (state->src_w <= 2 * max_width))
+			need_right_hwpipe = true;
+		else
+			out_of_bounds = true;
+	}
+
+	if (out_of_bounds) {
 		struct drm_rect src = drm_plane_state_src(state);
 		DBG("Invalid source size "DRM_RECT_FP_FMT,
 				DRM_RECT_FP_ARG(&src));
@@ -364,6 +386,15 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 		if (!mdp5_state->hwpipe || (caps & ~mdp5_state->hwpipe->caps))
 			new_hwpipe = true;
 
+		/*
+		 * (re)allocte hw pipe if we're either requesting for 2 hw pipes
+		 * or we're switching from 2 hw pipes to 1 hw pipe because the
+		 * new src_w can be supported by 1 hw pipe itself.
+		 */
+		if ((need_right_hwpipe && !mdp5_state->r_hwpipe) ||
+		    (!need_right_hwpipe && mdp5_state->r_hwpipe))
+			new_hwpipe = true;
+
 		if (mdp5_kms->smp) {
 			const struct mdp_format *format =
 				to_mdp_format(msm_framebuffer_format(state->fb));
@@ -382,13 +413,36 @@ static int mdp5_plane_atomic_check_with_state(struct drm_crtc_state *crtc_state,
 			 * it available for other planes?
 			 */
 			struct mdp5_hw_pipe *old_hwpipe = mdp5_state->hwpipe;
+			struct mdp5_hw_pipe *old_right_hwpipe =
+							  mdp5_state->r_hwpipe;
+
 			mdp5_state->hwpipe = mdp5_pipe_assign(state->state,
 					plane, caps, blkcfg);
 			if (IS_ERR(mdp5_state->hwpipe)) {
 				DBG("%s: failed to assign hwpipe!", plane->name);
 				return PTR_ERR(mdp5_state->hwpipe);
 			}
+
+			if (need_right_hwpipe) {
+				mdp5_state->r_hwpipe =
+					mdp5_pipe_assign(state->state, plane,
+							 caps, blkcfg);
+				if (IS_ERR(mdp5_state->r_hwpipe)) {
+					DBG("%s: failed to assign right hwpipe",
+					    plane->name);
+					return PTR_ERR(mdp5_state->r_hwpipe);
+				}
+			} else {
+				/*
+				 * set it to NULL so that the driver knows we
+				 * don't have a right hwpipe when committing a
+				 * new state
+				 */
+				mdp5_state->r_hwpipe = NULL;
+			}
+
 			mdp5_pipe_release(state->state, old_hwpipe);
+			mdp5_pipe_release(state->state, old_right_hwpipe);
 		}
 	}
 
@@ -437,13 +491,10 @@ static const struct drm_plane_helper_funcs mdp5_plane_helper_funcs = {
 		.atomic_update = mdp5_plane_atomic_update,
 };
 
-static void set_scanout_locked(struct drm_plane *plane,
-		struct drm_framebuffer *fb)
+static void set_scanout_locked(struct mdp5_kms *mdp5_kms,
+			       enum mdp5_pipe pipe,
+			       struct drm_framebuffer *fb)
 {
-	struct mdp5_kms *mdp5_kms = get_kms(plane);
-	struct mdp5_hw_pipe *hwpipe = to_mdp5_plane_state(plane->state)->hwpipe;
-	enum mdp5_pipe pipe = hwpipe->pipe;
-
 	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_STRIDE_A(pipe),
 			MDP5_PIPE_SRC_STRIDE_A_P0(fb->pitches[0]) |
 			MDP5_PIPE_SRC_STRIDE_A_P1(fb->pitches[1]));
@@ -460,8 +511,6 @@ static void set_scanout_locked(struct drm_plane *plane,
 			msm_framebuffer_iova(fb, mdp5_kms->id, 2));
 	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC3_ADDR(pipe),
 			msm_framebuffer_iova(fb, mdp5_kms->id, 3));
-
-	plane->fb = fb;
 }
 
 /* Note: mdp5_plane->pipe_lock must be locked */
@@ -714,21 +763,129 @@ static void mdp5_write_pixel_ext(struct mdp5_kms *mdp5_kms, enum mdp5_pipe pipe,
 	}
 }
 
+struct pixel_ext {
+	int left[COMP_MAX];
+	int right[COMP_MAX];
+	int top[COMP_MAX];
+	int bottom[COMP_MAX];
+};
+
+struct phase_step {
+	u32 x[COMP_MAX];
+	u32 y[COMP_MAX];
+};
+
+static void mdp5_hwpipe_mode_set(struct mdp5_kms *mdp5_kms,
+				 struct mdp5_hw_pipe *hwpipe,
+				 struct drm_framebuffer *fb,
+				 struct phase_step *step,
+				 struct pixel_ext *pe,
+				 u32 scale_config, u32 hdecm, u32 vdecm,
+				 bool hflip, bool vflip,
+				 int crtc_x, int crtc_y,
+				 unsigned int crtc_w, unsigned int crtc_h,
+				 u32 src_img_w, u32 src_img_h,
+				 u32 src_x, u32 src_y,
+				 u32 src_w, u32 src_h)
+{
+	enum mdp5_pipe pipe = hwpipe->pipe;
+	bool has_pe = hwpipe->caps & MDP_PIPE_CAP_SW_PIX_EXT;
+	const struct mdp_format *format =
+			to_mdp_format(msm_framebuffer_format(fb));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_IMG_SIZE(pipe),
+			MDP5_PIPE_SRC_IMG_SIZE_WIDTH(src_img_w) |
+			MDP5_PIPE_SRC_IMG_SIZE_HEIGHT(src_img_h));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_SIZE(pipe),
+			MDP5_PIPE_SRC_SIZE_WIDTH(src_w) |
+			MDP5_PIPE_SRC_SIZE_HEIGHT(src_h));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_XY(pipe),
+			MDP5_PIPE_SRC_XY_X(src_x) |
+			MDP5_PIPE_SRC_XY_Y(src_y));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_OUT_SIZE(pipe),
+			MDP5_PIPE_OUT_SIZE_WIDTH(crtc_w) |
+			MDP5_PIPE_OUT_SIZE_HEIGHT(crtc_h));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_OUT_XY(pipe),
+			MDP5_PIPE_OUT_XY_X(crtc_x) |
+			MDP5_PIPE_OUT_XY_Y(crtc_y));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_FORMAT(pipe),
+			MDP5_PIPE_SRC_FORMAT_A_BPC(format->bpc_a) |
+			MDP5_PIPE_SRC_FORMAT_R_BPC(format->bpc_r) |
+			MDP5_PIPE_SRC_FORMAT_G_BPC(format->bpc_g) |
+			MDP5_PIPE_SRC_FORMAT_B_BPC(format->bpc_b) |
+			COND(format->alpha_enable, MDP5_PIPE_SRC_FORMAT_ALPHA_ENABLE) |
+			MDP5_PIPE_SRC_FORMAT_CPP(format->cpp - 1) |
+			MDP5_PIPE_SRC_FORMAT_UNPACK_COUNT(format->unpack_count - 1) |
+			COND(format->unpack_tight, MDP5_PIPE_SRC_FORMAT_UNPACK_TIGHT) |
+			MDP5_PIPE_SRC_FORMAT_FETCH_TYPE(format->fetch_type) |
+			MDP5_PIPE_SRC_FORMAT_CHROMA_SAMP(format->chroma_sample));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_UNPACK(pipe),
+			MDP5_PIPE_SRC_UNPACK_ELEM0(format->unpack[0]) |
+			MDP5_PIPE_SRC_UNPACK_ELEM1(format->unpack[1]) |
+			MDP5_PIPE_SRC_UNPACK_ELEM2(format->unpack[2]) |
+			MDP5_PIPE_SRC_UNPACK_ELEM3(format->unpack[3]));
+
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_OP_MODE(pipe),
+			(hflip ? MDP5_PIPE_SRC_OP_MODE_FLIP_LR : 0) |
+			(vflip ? MDP5_PIPE_SRC_OP_MODE_FLIP_UD : 0) |
+			COND(has_pe, MDP5_PIPE_SRC_OP_MODE_SW_PIX_EXT_OVERRIDE) |
+			MDP5_PIPE_SRC_OP_MODE_BWC(BWC_LOSSLESS));
+
+	/* not using secure mode: */
+	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_ADDR_SW_STATUS(pipe), 0);
+
+	if (hwpipe->caps & MDP_PIPE_CAP_SW_PIX_EXT)
+		mdp5_write_pixel_ext(mdp5_kms, pipe, format,
+				src_w, pe->left, pe->right,
+				src_h, pe->top, pe->bottom);
+
+	if (hwpipe->caps & MDP_PIPE_CAP_SCALE) {
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_PHASE_STEP_X(pipe),
+				step->x[COMP_0]);
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_PHASE_STEP_Y(pipe),
+				step->y[COMP_0]);
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CR_PHASE_STEP_X(pipe),
+				step->x[COMP_1_2]);
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CR_PHASE_STEP_Y(pipe),
+				step->y[COMP_1_2]);
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_DECIMATION(pipe),
+				MDP5_PIPE_DECIMATION_VERT(vdecm) |
+				MDP5_PIPE_DECIMATION_HORZ(hdecm));
+		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CONFIG(pipe),
+			   scale_config);
+	}
+
+	if (hwpipe->caps & MDP_PIPE_CAP_CSC) {
+		if (MDP_FORMAT_IS_YUV(format))
+			csc_enable(mdp5_kms, pipe,
+					mdp_get_default_csc_cfg(CSC_YUV2RGB));
+		else
+			csc_disable(mdp5_kms, pipe);
+	}
+
+	set_scanout_locked(mdp5_kms, pipe, fb);
+}
 
 static int mdp5_plane_mode_set(struct drm_plane *plane,
 		struct drm_crtc *crtc, struct drm_framebuffer *fb,
 		struct drm_rect *src, struct drm_rect *dest)
 {
+	struct mdp5_plane *mdp5_plane = to_mdp5_plane(plane);
 	struct drm_plane_state *pstate = plane->state;
 	struct mdp5_hw_pipe *hwpipe = to_mdp5_plane_state(pstate)->hwpipe;
 	struct mdp5_kms *mdp5_kms = get_kms(plane);
 	enum mdp5_pipe pipe = hwpipe->pipe;
+	struct mdp5_hw_pipe *right_hwpipe;
 	const struct mdp_format *format;
 	uint32_t nplanes, config = 0;
-	uint32_t phasex_step[COMP_MAX] = {0,}, phasey_step[COMP_MAX] = {0,};
-	bool pe = hwpipe->caps & MDP_PIPE_CAP_SW_PIX_EXT;
-	int pe_left[COMP_MAX], pe_right[COMP_MAX];
-	int pe_top[COMP_MAX], pe_bottom[COMP_MAX];
+	struct phase_step step = { 0 };
+	struct pixel_ext pe = { 0 };
 	uint32_t hdecm = 0, vdecm = 0;
 	uint32_t pix_format;
 	unsigned int rotation;
@@ -737,6 +894,9 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	unsigned int crtc_w, crtc_h;
 	uint32_t src_x, src_y;
 	uint32_t src_w, src_h;
+	uint32_t src_img_w, src_img_h;
+	uint32_t src_x_r;
+	int crtc_x_r;
 	unsigned long flags;
 	int ret;
 
@@ -765,23 +925,41 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	src_w = src_w >> 16;
 	src_h = src_h >> 16;
 
+	src_img_w = min(fb->width, src_w);
+	src_img_h = min(fb->height, src_h);
+
 	DBG("%s: FB[%u] %u,%u,%u,%u -> CRTC[%u] %d,%d,%u,%u", plane->name,
 			fb->base.id, src_x, src_y, src_w, src_h,
 			crtc->base.id, crtc_x, crtc_y, crtc_w, crtc_h);
 
-	ret = calc_scalex_steps(plane, pix_format, src_w, crtc_w, phasex_step);
+	right_hwpipe = to_mdp5_plane_state(pstate)->r_hwpipe;
+	if (right_hwpipe) {
+		/*
+		 * if the plane comprises of 2 hw pipes, assume that the width
+		 * is split equally across them. The only parameters that varies
+		 * between the 2 pipes are src_x and crtc_x
+		 */
+		crtc_w /= 2;
+		src_w /= 2;
+		src_img_w /= 2;
+
+		crtc_x_r = crtc_x + crtc_w;
+		src_x_r = src_x + src_w;
+	}
+
+	ret = calc_scalex_steps(plane, pix_format, src_w, crtc_w, step.x);
 	if (ret)
 		return ret;
 
-	ret = calc_scaley_steps(plane, pix_format, src_h, crtc_h, phasey_step);
+	ret = calc_scaley_steps(plane, pix_format, src_h, crtc_h, step.y);
 	if (ret)
 		return ret;
 
 	if (hwpipe->caps & MDP_PIPE_CAP_SW_PIX_EXT) {
-		calc_pixel_ext(format, src_w, crtc_w, phasex_step,
-					 pe_left, pe_right, true);
-		calc_pixel_ext(format, src_h, crtc_h, phasey_step,
-					pe_top, pe_bottom, false);
+		calc_pixel_ext(format, src_w, crtc_w, step.x,
+			       pe.left, pe.right, true);
+		calc_pixel_ext(format, src_h, crtc_h, step.y,
+			       pe.top, pe.bottom, false);
 	}
 
 	/* TODO calc hdecm, vdecm */
@@ -798,86 +976,23 @@ static int mdp5_plane_mode_set(struct drm_plane *plane,
 	hflip = !!(rotation & DRM_REFLECT_X);
 	vflip = !!(rotation & DRM_REFLECT_Y);
 
-	spin_lock_irqsave(&hwpipe->pipe_lock, flags);
-
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_IMG_SIZE(pipe),
-			MDP5_PIPE_SRC_IMG_SIZE_WIDTH(min(fb->width, src_w)) |
-			MDP5_PIPE_SRC_IMG_SIZE_HEIGHT(min(fb->height, src_h)));
-
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_SIZE(pipe),
-			MDP5_PIPE_SRC_SIZE_WIDTH(src_w) |
-			MDP5_PIPE_SRC_SIZE_HEIGHT(src_h));
+	spin_lock_irqsave(&mdp5_plane->pipe_lock, flags);
 
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_XY(pipe),
-			MDP5_PIPE_SRC_XY_X(src_x) |
-			MDP5_PIPE_SRC_XY_Y(src_y));
+	mdp5_hwpipe_mode_set(mdp5_kms, hwpipe, fb, &step, &pe,
+			     config, hdecm, vdecm, hflip, vflip,
+			     crtc_x, crtc_y, crtc_w, crtc_h,
+			     src_img_w, src_img_h,
+			     src_x, src_y, src_w, src_h);
+	if (right_hwpipe)
+		mdp5_hwpipe_mode_set(mdp5_kms, right_hwpipe, fb, &step, &pe,
+				     config, hdecm, vdecm, hflip, vflip,
+				     crtc_x_r, crtc_y, crtc_w, crtc_h,
+				     src_img_w, src_img_h,
+				     src_x_r, src_y, src_w, src_h);
 
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_OUT_SIZE(pipe),
-			MDP5_PIPE_OUT_SIZE_WIDTH(crtc_w) |
-			MDP5_PIPE_OUT_SIZE_HEIGHT(crtc_h));
+	spin_unlock_irqrestore(&mdp5_plane->pipe_lock, flags);
 
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_OUT_XY(pipe),
-			MDP5_PIPE_OUT_XY_X(crtc_x) |
-			MDP5_PIPE_OUT_XY_Y(crtc_y));
-
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_FORMAT(pipe),
-			MDP5_PIPE_SRC_FORMAT_A_BPC(format->bpc_a) |
-			MDP5_PIPE_SRC_FORMAT_R_BPC(format->bpc_r) |
-			MDP5_PIPE_SRC_FORMAT_G_BPC(format->bpc_g) |
-			MDP5_PIPE_SRC_FORMAT_B_BPC(format->bpc_b) |
-			COND(format->alpha_enable, MDP5_PIPE_SRC_FORMAT_ALPHA_ENABLE) |
-			MDP5_PIPE_SRC_FORMAT_CPP(format->cpp - 1) |
-			MDP5_PIPE_SRC_FORMAT_UNPACK_COUNT(format->unpack_count - 1) |
-			COND(format->unpack_tight, MDP5_PIPE_SRC_FORMAT_UNPACK_TIGHT) |
-			MDP5_PIPE_SRC_FORMAT_FETCH_TYPE(format->fetch_type) |
-			MDP5_PIPE_SRC_FORMAT_CHROMA_SAMP(format->chroma_sample));
-
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_UNPACK(pipe),
-			MDP5_PIPE_SRC_UNPACK_ELEM0(format->unpack[0]) |
-			MDP5_PIPE_SRC_UNPACK_ELEM1(format->unpack[1]) |
-			MDP5_PIPE_SRC_UNPACK_ELEM2(format->unpack[2]) |
-			MDP5_PIPE_SRC_UNPACK_ELEM3(format->unpack[3]));
-
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_OP_MODE(pipe),
-			(hflip ? MDP5_PIPE_SRC_OP_MODE_FLIP_LR : 0) |
-			(vflip ? MDP5_PIPE_SRC_OP_MODE_FLIP_UD : 0) |
-			COND(pe, MDP5_PIPE_SRC_OP_MODE_SW_PIX_EXT_OVERRIDE) |
-			MDP5_PIPE_SRC_OP_MODE_BWC(BWC_LOSSLESS));
-
-	/* not using secure mode: */
-	mdp5_write(mdp5_kms, REG_MDP5_PIPE_SRC_ADDR_SW_STATUS(pipe), 0);
-
-	if (hwpipe->caps & MDP_PIPE_CAP_SW_PIX_EXT)
-		mdp5_write_pixel_ext(mdp5_kms, pipe, format,
-				src_w, pe_left, pe_right,
-				src_h, pe_top, pe_bottom);
-
-	if (hwpipe->caps & MDP_PIPE_CAP_SCALE) {
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_PHASE_STEP_X(pipe),
-				phasex_step[COMP_0]);
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_PHASE_STEP_Y(pipe),
-				phasey_step[COMP_0]);
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CR_PHASE_STEP_X(pipe),
-				phasex_step[COMP_1_2]);
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CR_PHASE_STEP_Y(pipe),
-				phasey_step[COMP_1_2]);
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_DECIMATION(pipe),
-				MDP5_PIPE_DECIMATION_VERT(vdecm) |
-				MDP5_PIPE_DECIMATION_HORZ(hdecm));
-		mdp5_write(mdp5_kms, REG_MDP5_PIPE_SCALE_CONFIG(pipe), config);
-	}
-
-	if (hwpipe->caps & MDP_PIPE_CAP_CSC) {
-		if (MDP_FORMAT_IS_YUV(format))
-			csc_enable(mdp5_kms, pipe,
-					mdp_get_default_csc_cfg(CSC_YUV2RGB));
-		else
-			csc_disable(mdp5_kms, pipe);
-	}
-
-	set_scanout_locked(plane, fb);
-
-	spin_unlock_irqrestore(&hwpipe->pipe_lock, flags);
+	plane->fb = fb;
 
 	return ret;
 }
@@ -934,6 +1049,7 @@ static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane,
 
 	if (new_plane_state->visible) {
 		struct mdp5_ctl *ctl;
+		struct mdp5_pipeline *pipeline = mdp5_crtc_get_pipeline(crtc);
 
 		ret = mdp5_plane_mode_set(plane, crtc, fb,
 					  &new_plane_state->src,
@@ -942,7 +1058,7 @@ static int mdp5_update_cursor_plane_legacy(struct drm_plane *plane,
 
 		ctl = mdp5_crtc_get_ctl(crtc);
 
-		mdp5_ctl_commit(ctl, mdp5_plane_get_flush(plane));
+		mdp5_ctl_commit(ctl, pipeline, mdp5_plane_get_flush(plane));
 	}
 
 	*to_mdp5_plane_state(plane_state) =
@@ -959,6 +1075,10 @@ slow:
 					      src_x, src_y, src_w, src_h, ctx);
 }
 
+/*
+ * Use this func and the one below only after the atomic state has been
+ * successfully swapped
+ */
 enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane)
 {
 	struct mdp5_plane_state *pstate = to_mdp5_plane_state(plane->state);
@@ -969,14 +1089,30 @@ enum mdp5_pipe mdp5_plane_pipe(struct drm_plane *plane)
 	return pstate->hwpipe->pipe;
 }
 
+enum mdp5_pipe mdp5_plane_right_pipe(struct drm_plane *plane)
+{
+	struct mdp5_plane_state *pstate = to_mdp5_plane_state(plane->state);
+
+	if (!pstate->r_hwpipe)
+		return SSPP_NONE;
+
+	return pstate->r_hwpipe->pipe;
+}
+
 uint32_t mdp5_plane_get_flush(struct drm_plane *plane)
 {
 	struct mdp5_plane_state *pstate = to_mdp5_plane_state(plane->state);
+	u32 mask;
 
 	if (WARN_ON(!pstate->hwpipe))
 		return 0;
 
-	return pstate->hwpipe->flush_mask;
+	mask = pstate->hwpipe->flush_mask;
+
+	if (pstate->r_hwpipe)
+		mask |= pstate->r_hwpipe->flush_mask;
+
+	return mask;
 }
 
 /* initialize plane */
@@ -998,6 +1134,8 @@ struct drm_plane *mdp5_plane_init(struct drm_device *dev,
 	mdp5_plane->nformats = mdp_get_formats(mdp5_plane->formats,
 		ARRAY_SIZE(mdp5_plane->formats), false);
 
+	spin_lock_init(&mdp5_plane->pipe_lock);
+
 	if (type == DRM_PLANE_TYPE_CURSOR)
 		ret = drm_universal_plane_init(dev, plane, 0xff,
 				&mdp5_cursor_plane_funcs,
diff --git a/drivers/gpu/drm/msm/mdp/mdp_kms.h b/drivers/gpu/drm/msm/mdp/mdp_kms.h
index 7574cdfef418..1185487e7e5e 100644
--- a/drivers/gpu/drm/msm/mdp/mdp_kms.h
+++ b/drivers/gpu/drm/msm/mdp/mdp_kms.h
@@ -104,6 +104,7 @@ const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format);
 #define MDP_CAP_SMP		BIT(0)	/* Shared Memory Pool                 */
 #define MDP_CAP_DSC		BIT(1)	/* VESA Display Stream Compression    */
 #define MDP_CAP_CDM		BIT(2)	/* Chroma Down Module (HDMI 2.0 YUV)  */
+#define MDP_CAP_SRC_SPLIT	BIT(3)	/* Source Split of SSPPs */
 
 /* MDP pipe capabilities */
 #define MDP_PIPE_CAP_HFLIP			BIT(0)
@@ -114,6 +115,11 @@ const struct msm_format *mdp_get_format(struct msm_kms *kms, uint32_t format);
 #define MDP_PIPE_CAP_SW_PIX_EXT			BIT(5)
 #define MDP_PIPE_CAP_CURSOR			BIT(6)
 
+/* MDP layer mixer caps */
+#define MDP_LM_CAP_DISPLAY			BIT(0)
+#define MDP_LM_CAP_WB				BIT(1)
+#define MDP_LM_CAP_PAIR				BIT(2)
+
 static inline bool pipe_supports_yuv(uint32_t pipe_caps)
 {
 	return (pipe_caps & MDP_PIPE_CAP_SCALE) &&
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 4f35d4eb85d0..1855182c76ce 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -28,7 +28,9 @@ static int msm_gpu_show(struct drm_device *dev, struct seq_file *m)
 
 	if (gpu) {
 		seq_printf(m, "%s Status:\n", gpu->name);
+		pm_runtime_get_sync(&gpu->pdev->dev);
 		gpu->funcs->show(gpu, m);
+		pm_runtime_put_sync(&gpu->pdev->dev);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 9208e67be453..87b5695d4034 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -55,14 +55,13 @@ int msm_register_address_space(struct drm_device *dev,
 		struct msm_gem_address_space *aspace)
 {
 	struct msm_drm_private *priv = dev->dev_private;
-	int idx = priv->num_aspaces++;
 
-	if (WARN_ON(idx >= ARRAY_SIZE(priv->aspace)))
+	if (WARN_ON(priv->num_aspaces >= ARRAY_SIZE(priv->aspace)))
 		return -EINVAL;
 
-	priv->aspace[idx] = aspace;
+	priv->aspace[priv->num_aspaces] = aspace;
 
-	return idx;
+	return priv->num_aspaces++;
 }
 
 #ifdef CONFIG_DRM_MSM_REGISTER_LOGGING
@@ -265,6 +264,8 @@ static int msm_drm_uninit(struct device *dev)
 
 	if (gpu) {
 		mutex_lock(&ddev->struct_mutex);
+		// XXX what do we do here?
+		//pm_runtime_enable(&pdev->dev);
 		gpu->funcs->pm_suspend(gpu);
 		mutex_unlock(&ddev->struct_mutex);
 		gpu->funcs->destroy(gpu);
@@ -539,7 +540,7 @@ static int msm_open(struct drm_device *dev, struct drm_file *file)
 	return 0;
 }
 
-static void msm_preclose(struct drm_device *dev, struct drm_file *file)
+static void msm_postclose(struct drm_device *dev, struct drm_file *file)
 {
 	struct msm_drm_private *priv = dev->dev_private;
 	struct msm_file_private *ctx = file->driver_priv;
@@ -812,7 +813,7 @@ static struct drm_driver msm_driver = {
 				DRIVER_ATOMIC |
 				DRIVER_MODESET,
 	.open               = msm_open,
-	.preclose           = msm_preclose,
+	.postclose           = msm_postclose,
 	.lastclose          = msm_lastclose,
 	.irq_handler        = msm_irq,
 	.irq_preinstall     = msm_irq_preinstall,
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index b885c3d5ae4d..28b6f9ba5066 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -191,7 +191,8 @@ void msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 int msm_gem_map_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma, struct sg_table *sgt, int npages);
 
-void msm_gem_address_space_destroy(struct msm_gem_address_space *aspace);
+void msm_gem_address_space_put(struct msm_gem_address_space *aspace);
+
 struct msm_gem_address_space *
 msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
 		const char *name);
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index 59811f29607d..68e509b3b9e4 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -812,6 +812,12 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 
 	size = PAGE_ALIGN(size);
 
+	/* Disallow zero sized objects as they make the underlying
+	 * infrastructure grumpy
+	 */
+	if (size == 0)
+		return ERR_PTR(-EINVAL);
+
 	ret = msm_gem_new_impl(dev, size, flags, NULL, &obj);
 	if (ret)
 		goto fail;
diff --git a/drivers/gpu/drm/msm/msm_gem.h b/drivers/gpu/drm/msm/msm_gem.h
index 7d529516b332..1b4cf20043ea 100644
--- a/drivers/gpu/drm/msm/msm_gem.h
+++ b/drivers/gpu/drm/msm/msm_gem.h
@@ -18,6 +18,7 @@
 #ifndef __MSM_GEM_H__
 #define __MSM_GEM_H__
 
+#include <linux/kref.h>
 #include <linux/reservation.h>
 #include "msm_drv.h"
 
@@ -31,6 +32,7 @@ struct msm_gem_address_space {
 	 */
 	struct drm_mm mm;
 	struct msm_mmu *mmu;
+	struct kref kref;
 };
 
 struct msm_gem_vma {
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 1172fe7a9252..1c545ebe6a5a 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -404,6 +404,24 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (MSM_PIPE_FLAGS(args->flags) & ~MSM_SUBMIT_FLAGS)
 		return -EINVAL;
 
+	if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+		in_fence = sync_file_get_fence(args->fence_fd);
+
+		if (!in_fence)
+			return -EINVAL;
+
+		/* TODO if we get an array-fence due to userspace merging multiple
+		 * fences, we need a way to determine if all the backing fences
+		 * are from our own context..
+		 */
+
+		if (in_fence->context != gpu->fctx->context) {
+			ret = dma_fence_wait(in_fence, true);
+			if (ret)
+				return ret;
+		}
+	}
+
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
 		return ret;
@@ -431,27 +449,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto out;
 
-	if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
-		in_fence = sync_file_get_fence(args->fence_fd);
-
-		if (!in_fence) {
-			ret = -EINVAL;
-			goto out;
-		}
-
-		/* TODO if we get an array-fence due to userspace merging multiple
-		 * fences, we need a way to determine if all the backing fences
-		 * are from our own context..
-		 */
-
-		if (in_fence->context != gpu->fctx->context) {
-			ret = dma_fence_wait(in_fence, true);
-			if (ret)
-				goto out;
-		}
-
-	}
-
 	if (!(args->fence & MSM_SUBMIT_NO_IMPLICIT)) {
 		ret = submit_fence_sync(submit);
 		if (ret)
diff --git a/drivers/gpu/drm/msm/msm_gem_vma.c b/drivers/gpu/drm/msm/msm_gem_vma.c
index b654eca7636a..f285d7e210db 100644
--- a/drivers/gpu/drm/msm/msm_gem_vma.c
+++ b/drivers/gpu/drm/msm/msm_gem_vma.c
@@ -19,6 +19,25 @@
 #include "msm_gem.h"
 #include "msm_mmu.h"
 
+static void
+msm_gem_address_space_destroy(struct kref *kref)
+{
+	struct msm_gem_address_space *aspace = container_of(kref,
+			struct msm_gem_address_space, kref);
+
+	drm_mm_takedown(&aspace->mm);
+	if (aspace->mmu)
+		aspace->mmu->funcs->destroy(aspace->mmu);
+	kfree(aspace);
+}
+
+
+void msm_gem_address_space_put(struct msm_gem_address_space *aspace)
+{
+	if (aspace)
+		kref_put(&aspace->kref, msm_gem_address_space_destroy);
+}
+
 void
 msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 		struct msm_gem_vma *vma, struct sg_table *sgt)
@@ -34,6 +53,8 @@ msm_gem_unmap_vma(struct msm_gem_address_space *aspace,
 	drm_mm_remove_node(&vma->node);
 
 	vma->iova = 0;
+
+	msm_gem_address_space_put(aspace);
 }
 
 int
@@ -57,16 +78,10 @@ msm_gem_map_vma(struct msm_gem_address_space *aspace,
 				size, IOMMU_READ | IOMMU_WRITE);
 	}
 
-	return ret;
-}
+	/* Get a reference to the aspace to keep it around */
+	kref_get(&aspace->kref);
 
-void
-msm_gem_address_space_destroy(struct msm_gem_address_space *aspace)
-{
-	drm_mm_takedown(&aspace->mm);
-	if (aspace->mmu)
-		aspace->mmu->funcs->destroy(aspace->mmu);
-	kfree(aspace);
+	return ret;
 }
 
 struct msm_gem_address_space *
@@ -85,5 +100,7 @@ msm_gem_address_space_create(struct device *dev, struct iommu_domain *domain,
 	drm_mm_init(&aspace->mm, (domain->geometry.aperture_start >> PAGE_SHIFT),
 			(domain->geometry.aperture_end >> PAGE_SHIFT) - 1);
 
+	kref_init(&aspace->kref);
+
 	return aspace;
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 99e05aacbee1..97b9c38c6b3f 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -93,18 +93,18 @@ static int enable_clk(struct msm_gpu *gpu)
 {
 	int i;
 
-	if (gpu->grp_clks[0] && gpu->fast_rate)
-		clk_set_rate(gpu->grp_clks[0], gpu->fast_rate);
+	if (gpu->core_clk && gpu->fast_rate)
+		clk_set_rate(gpu->core_clk, gpu->fast_rate);
 
 	/* Set the RBBM timer rate to 19.2Mhz */
-	if (gpu->grp_clks[2])
-		clk_set_rate(gpu->grp_clks[2], 19200000);
+	if (gpu->rbbmtimer_clk)
+		clk_set_rate(gpu->rbbmtimer_clk, 19200000);
 
-	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i >= 0; i--)
+	for (i = gpu->nr_clocks - 1; i >= 0; i--)
 		if (gpu->grp_clks[i])
 			clk_prepare(gpu->grp_clks[i]);
 
-	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i >= 0; i--)
+	for (i = gpu->nr_clocks - 1; i >= 0; i--)
 		if (gpu->grp_clks[i])
 			clk_enable(gpu->grp_clks[i]);
 
@@ -115,19 +115,24 @@ static int disable_clk(struct msm_gpu *gpu)
 {
 	int i;
 
-	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i >= 0; i--)
+	for (i = gpu->nr_clocks - 1; i >= 0; i--)
 		if (gpu->grp_clks[i])
 			clk_disable(gpu->grp_clks[i]);
 
-	for (i = ARRAY_SIZE(gpu->grp_clks) - 1; i >= 0; i--)
+	for (i = gpu->nr_clocks - 1; i >= 0; i--)
 		if (gpu->grp_clks[i])
 			clk_unprepare(gpu->grp_clks[i]);
 
-	if (gpu->grp_clks[0] && gpu->slow_rate)
-		clk_set_rate(gpu->grp_clks[0], gpu->slow_rate);
+	/*
+	 * Set the clock to a deliberately low rate. On older targets the clock
+	 * speed had to be non zero to avoid problems. On newer targets this
+	 * will be rounded down to zero anyway so it all works out.
+	 */
+	if (gpu->core_clk)
+		clk_set_rate(gpu->core_clk, 27000000);
 
-	if (gpu->grp_clks[2])
-		clk_set_rate(gpu->grp_clks[2], 0);
+	if (gpu->rbbmtimer_clk)
+		clk_set_rate(gpu->rbbmtimer_clk, 0);
 
 	return 0;
 }
@@ -152,18 +157,9 @@ static int disable_axi(struct msm_gpu *gpu)
 
 int msm_gpu_pm_resume(struct msm_gpu *gpu)
 {
-	struct drm_device *dev = gpu->dev;
 	int ret;
 
-	DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
-
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
-	if (gpu->active_cnt++ > 0)
-		return 0;
-
-	if (WARN_ON(gpu->active_cnt <= 0))
-		return -EINVAL;
+	DBG("%s", gpu->name);
 
 	ret = enable_pwrrail(gpu);
 	if (ret)
@@ -177,23 +173,16 @@ int msm_gpu_pm_resume(struct msm_gpu *gpu)
 	if (ret)
 		return ret;
 
+	gpu->needs_hw_init = true;
+
 	return 0;
 }
 
 int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 {
-	struct drm_device *dev = gpu->dev;
 	int ret;
 
-	DBG("%s: active_cnt=%d", gpu->name, gpu->active_cnt);
-
-	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
-
-	if (--gpu->active_cnt > 0)
-		return 0;
-
-	if (WARN_ON(gpu->active_cnt < 0))
-		return -EINVAL;
+	DBG("%s", gpu->name);
 
 	ret = disable_axi(gpu);
 	if (ret)
@@ -210,53 +199,20 @@ int msm_gpu_pm_suspend(struct msm_gpu *gpu)
 	return 0;
 }
 
-/*
- * Inactivity detection (for suspend):
- */
-
-static void inactive_worker(struct work_struct *work)
+int msm_gpu_hw_init(struct msm_gpu *gpu)
 {
-	struct msm_gpu *gpu = container_of(work, struct msm_gpu, inactive_work);
-	struct drm_device *dev = gpu->dev;
-
-	if (gpu->inactive)
-		return;
-
-	DBG("%s: inactive!\n", gpu->name);
-	mutex_lock(&dev->struct_mutex);
-	if (!(msm_gpu_active(gpu) || gpu->inactive)) {
-		disable_axi(gpu);
-		disable_clk(gpu);
-		gpu->inactive = true;
-	}
-	mutex_unlock(&dev->struct_mutex);
-}
+	int ret;
 
-static void inactive_handler(unsigned long data)
-{
-	struct msm_gpu *gpu = (struct msm_gpu *)data;
-	struct msm_drm_private *priv = gpu->dev->dev_private;
+	if (!gpu->needs_hw_init)
+		return 0;
 
-	queue_work(priv->wq, &gpu->inactive_work);
-}
+	disable_irq(gpu->irq);
+	ret = gpu->funcs->hw_init(gpu);
+	if (!ret)
+		gpu->needs_hw_init = false;
+	enable_irq(gpu->irq);
 
-/* cancel inactive timer and make sure we are awake: */
-static void inactive_cancel(struct msm_gpu *gpu)
-{
-	DBG("%s", gpu->name);
-	del_timer(&gpu->inactive_timer);
-	if (gpu->inactive) {
-		enable_clk(gpu);
-		enable_axi(gpu);
-		gpu->inactive = false;
-	}
-}
-
-static void inactive_start(struct msm_gpu *gpu)
-{
-	DBG("%s", gpu->name);
-	mod_timer(&gpu->inactive_timer,
-			round_jiffies_up(jiffies + DRM_MSM_INACTIVE_JIFFIES));
+	return ret;
 }
 
 /*
@@ -296,8 +252,9 @@ static void recover_worker(struct work_struct *work)
 		/* retire completed submits, plus the one that hung: */
 		retire_submits(gpu);
 
-		inactive_cancel(gpu);
+		pm_runtime_get_sync(&gpu->pdev->dev);
 		gpu->funcs->recover(gpu);
+		pm_runtime_put_sync(&gpu->pdev->dev);
 
 		/* replay the remaining submits after the one that hung: */
 		list_for_each_entry(submit, &gpu->submit_list, node) {
@@ -400,6 +357,8 @@ void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
 {
 	unsigned long flags;
 
+	pm_runtime_get_sync(&gpu->pdev->dev);
+
 	spin_lock_irqsave(&gpu->perf_lock, flags);
 	/* we could dynamically enable/disable perfcntr registers too.. */
 	gpu->last_sample.active = msm_gpu_active(gpu);
@@ -413,6 +372,7 @@ void msm_gpu_perfcntr_start(struct msm_gpu *gpu)
 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu)
 {
 	gpu->perfcntr_active = false;
+	pm_runtime_put_sync(&gpu->pdev->dev);
 }
 
 /* returns -errno or # of cntrs sampled */
@@ -458,6 +418,8 @@ static void retire_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
 		drm_gem_object_unreference(&msm_obj->base);
 	}
 
+	pm_runtime_mark_last_busy(&gpu->pdev->dev);
+	pm_runtime_put_autosuspend(&gpu->pdev->dev);
 	msm_gem_submit_free(submit);
 }
 
@@ -492,9 +454,6 @@ static void retire_worker(struct work_struct *work)
 	mutex_lock(&dev->struct_mutex);
 	retire_submits(gpu);
 	mutex_unlock(&dev->struct_mutex);
-
-	if (!msm_gpu_active(gpu))
-		inactive_start(gpu);
 }
 
 /* call from irq handler to schedule work to retire bo's */
@@ -515,7 +474,9 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 
 	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
 
-	inactive_cancel(gpu);
+	pm_runtime_get_sync(&gpu->pdev->dev);
+
+	msm_gpu_hw_init(gpu);
 
 	list_add_tail(&submit->node, &gpu->submit_list);
 
@@ -559,16 +520,52 @@ static irqreturn_t irq_handler(int irq, void *data)
 	return gpu->funcs->irq(gpu);
 }
 
-static const char *clk_names[] = {
-	"core", "iface", "rbbmtimer", "mem", "mem_iface", "alt_mem_iface",
-};
+static struct clk *get_clock(struct device *dev, const char *name)
+{
+	struct clk *clk = devm_clk_get(dev, name);
+
+	return IS_ERR(clk) ? NULL : clk;
+}
+
+static int get_clocks(struct platform_device *pdev, struct msm_gpu *gpu)
+{
+	struct device *dev = &pdev->dev;
+	struct property *prop;
+	const char *name;
+	int i = 0;
+
+	gpu->nr_clocks = of_property_count_strings(dev->of_node, "clock-names");
+	if (gpu->nr_clocks < 1) {
+		gpu->nr_clocks = 0;
+		return 0;
+	}
+
+	gpu->grp_clks = devm_kcalloc(dev, sizeof(struct clk *), gpu->nr_clocks,
+		GFP_KERNEL);
+	if (!gpu->grp_clks)
+		return -ENOMEM;
+
+	of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
+		gpu->grp_clks[i] = get_clock(dev, name);
+
+		/* Remember the key clocks that we need to control later */
+		if (!strcmp(name, "core"))
+			gpu->core_clk = gpu->grp_clks[i];
+		else if (!strcmp(name, "rbbmtimer"))
+			gpu->rbbmtimer_clk = gpu->grp_clks[i];
+
+		++i;
+	}
+
+	return 0;
+}
 
 int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
 		const char *name, const char *ioname, const char *irqname, int ringsz)
 {
 	struct iommu_domain *iommu;
-	int i, ret;
+	int ret;
 
 	if (WARN_ON(gpu->num_perfcntrs > ARRAY_SIZE(gpu->last_cntrs)))
 		gpu->num_perfcntrs = ARRAY_SIZE(gpu->last_cntrs);
@@ -576,7 +573,6 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 	gpu->dev = drm;
 	gpu->funcs = funcs;
 	gpu->name = name;
-	gpu->inactive = true;
 	gpu->fctx = msm_fence_context_alloc(drm, name);
 	if (IS_ERR(gpu->fctx)) {
 		ret = PTR_ERR(gpu->fctx);
@@ -586,19 +582,15 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 
 	INIT_LIST_HEAD(&gpu->active_list);
 	INIT_WORK(&gpu->retire_work, retire_worker);
-	INIT_WORK(&gpu->inactive_work, inactive_worker);
 	INIT_WORK(&gpu->recover_work, recover_worker);
 
 	INIT_LIST_HEAD(&gpu->submit_list);
 
-	setup_timer(&gpu->inactive_timer, inactive_handler,
-			(unsigned long)gpu);
 	setup_timer(&gpu->hangcheck_timer, hangcheck_handler,
 			(unsigned long)gpu);
 
 	spin_lock_init(&gpu->perf_lock);
 
-	BUG_ON(ARRAY_SIZE(clk_names) != ARRAY_SIZE(gpu->grp_clks));
 
 	/* Map registers: */
 	gpu->mmio = msm_ioremap(pdev, ioname, name);
@@ -622,13 +614,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		goto fail;
 	}
 
-	/* Acquire clocks: */
-	for (i = 0; i < ARRAY_SIZE(clk_names); i++) {
-		gpu->grp_clks[i] = msm_clk_get(pdev, clk_names[i]);
-		DBG("grp_clks[%s]: %p", clk_names[i], gpu->grp_clks[i]);
-		if (IS_ERR(gpu->grp_clks[i]))
-			gpu->grp_clks[i] = NULL;
-	}
+	ret = get_clocks(pdev, gpu);
+	if (ret)
+		goto fail;
 
 	gpu->ebi1_clk = msm_clk_get(pdev, "bus");
 	DBG("ebi1_clk: %p", gpu->ebi1_clk);
@@ -684,6 +672,9 @@ int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
 		goto fail;
 	}
 
+	gpu->pdev = pdev;
+	platform_set_drvdata(pdev, gpu);
+
 	bs_init(gpu);
 
 	return 0;
@@ -706,9 +697,6 @@ void msm_gpu_cleanup(struct msm_gpu *gpu)
 		msm_ringbuffer_destroy(gpu->rb);
 	}
 
-	if (gpu->aspace)
-		msm_gem_address_space_destroy(gpu->aspace);
-
 	if (gpu->fctx)
 		msm_fence_context_free(gpu->fctx);
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
index c4c39d3272c7..aa3241000455 100644
--- a/drivers/gpu/drm/msm/msm_gpu.h
+++ b/drivers/gpu/drm/msm/msm_gpu.h
@@ -64,6 +64,7 @@ struct msm_gpu_funcs {
 struct msm_gpu {
 	const char *name;
 	struct drm_device *dev;
+	struct platform_device *pdev;
 	const struct msm_gpu_funcs *funcs;
 
 	/* performance counters (hw & sw): */
@@ -88,9 +89,8 @@ struct msm_gpu {
 	/* fencing: */
 	struct msm_fence_context *fctx;
 
-	/* is gpu powered/active? */
-	int active_cnt;
-	bool inactive;
+	/* does gpu need hw_init? */
+	bool needs_hw_init;
 
 	/* worker for handling active-list retiring: */
 	struct work_struct retire_work;
@@ -103,8 +103,10 @@ struct msm_gpu {
 
 	/* Power Control: */
 	struct regulator *gpu_reg, *gpu_cx;
-	struct clk *ebi1_clk, *grp_clks[6];
-	uint32_t fast_rate, slow_rate, bus_freq;
+	struct clk **grp_clks;
+	int nr_clocks;
+	struct clk *ebi1_clk, *core_clk, *rbbmtimer_clk;
+	uint32_t fast_rate, bus_freq;
 
 #ifdef DOWNSTREAM_CONFIG_MSM_BUS_SCALING
 	struct msm_bus_scale_pdata *bus_scale_table;
@@ -114,9 +116,7 @@ struct msm_gpu {
 	/* Hang and Inactivity Detection:
 	 */
 #define DRM_MSM_INACTIVE_PERIOD   66 /* in ms (roughly four frames) */
-#define DRM_MSM_INACTIVE_JIFFIES  msecs_to_jiffies(DRM_MSM_INACTIVE_PERIOD)
-	struct timer_list inactive_timer;
-	struct work_struct inactive_work;
+
 #define DRM_MSM_HANGCHECK_PERIOD 500 /* in ms */
 #define DRM_MSM_HANGCHECK_JIFFIES msecs_to_jiffies(DRM_MSM_HANGCHECK_PERIOD)
 	struct timer_list hangcheck_timer;
@@ -196,6 +196,8 @@ static inline void gpu_write64(struct msm_gpu *gpu, u32 lo, u32 hi, u64 val)
 int msm_gpu_pm_suspend(struct msm_gpu *gpu);
 int msm_gpu_pm_resume(struct msm_gpu *gpu);
 
+int msm_gpu_hw_init(struct msm_gpu *gpu);
+
 void msm_gpu_perfcntr_start(struct msm_gpu *gpu);
 void msm_gpu_perfcntr_stop(struct msm_gpu *gpu);
 int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 7f5779daf5c8..b23d33622f37 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -38,78 +38,47 @@ static int msm_iommu_attach(struct msm_mmu *mmu, const char * const *names,
 			    int cnt)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
-	return iommu_attach_device(iommu->domain, mmu->dev);
+	int ret;
+
+	pm_runtime_get_sync(mmu->dev);
+	ret = iommu_attach_device(iommu->domain, mmu->dev);
+	pm_runtime_put_sync(mmu->dev);
+
+	return ret;
 }
 
 static void msm_iommu_detach(struct msm_mmu *mmu, const char * const *names,
 			     int cnt)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
+
+	pm_runtime_get_sync(mmu->dev);
 	iommu_detach_device(iommu->domain, mmu->dev);
+	pm_runtime_put_sync(mmu->dev);
 }
 
 static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
 		struct sg_table *sgt, unsigned len, int prot)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
-	struct iommu_domain *domain = iommu->domain;
-	struct scatterlist *sg;
-	unsigned long da = iova;
-	unsigned int i, j;
-	int ret;
-
-	if (!domain || !sgt)
-		return -EINVAL;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		dma_addr_t pa = sg_phys(sg) - sg->offset;
-		size_t bytes = sg->length + sg->offset;
-
-		VERB("map[%d]: %08lx %08lx(%zx)", i, da, (unsigned long)pa, bytes);
+	size_t ret;
 
-		ret = iommu_map(domain, da, pa, bytes, prot);
-		if (ret)
-			goto fail;
+//	pm_runtime_get_sync(mmu->dev);
+	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
+//	pm_runtime_put_sync(mmu->dev);
+	WARN_ON(ret < 0);
 
-		da += bytes;
-	}
-
-	return 0;
-
-fail:
-	da = iova;
-
-	for_each_sg(sgt->sgl, sg, i, j) {
-		size_t bytes = sg->length + sg->offset;
-		iommu_unmap(domain, da, bytes);
-		da += bytes;
-	}
-	return ret;
+	return (ret == len) ? 0 : -EINVAL;
 }
 
 static int msm_iommu_unmap(struct msm_mmu *mmu, uint64_t iova,
 		struct sg_table *sgt, unsigned len)
 {
 	struct msm_iommu *iommu = to_msm_iommu(mmu);
-	struct iommu_domain *domain = iommu->domain;
-	struct scatterlist *sg;
-	unsigned long da = iova;
-	int i;
-
-	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
-		size_t bytes = sg->length + sg->offset;
-		size_t unmapped;
-
-		unmapped = iommu_unmap(domain, da, bytes);
-		if (unmapped < bytes)
-			return unmapped;
-
-		VERB("unmap[%d]: %08lx(%zx)", i, da, bytes);
-
-		BUG_ON(!PAGE_ALIGNED(bytes));
 
-		da += bytes;
-	}
+	pm_runtime_get_sync(mmu->dev);
+	iommu_unmap(iommu->domain, iova, len);
+	pm_runtime_put_sync(mmu->dev);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index 3df7322fd74e..0e81faab2c50 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -322,7 +322,7 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 	}
 
 	for (i = 0; i < submit->nr_cmds; i++) {
-		uint32_t iova = submit->cmd[i].iova;
+		uint64_t iova = submit->cmd[i].iova;
 		uint32_t szd  = submit->cmd[i].size; /* in dwords */
 
 		/* snapshot cmdstream bo's (if we haven't already): */
@@ -341,7 +341,7 @@ void msm_rd_dump_submit(struct msm_gem_submit *submit)
 		case MSM_SUBMIT_CMD_CTX_RESTORE_BUF:
 		case MSM_SUBMIT_CMD_BUF:
 			rd_write_section(rd, RD_CMDSTREAM_ADDR,
-					(uint32_t[2]){ iova, szd }, 8);
+				(uint32_t[3]){ iova, szd, iova >> 32 }, 12);
 			break;
 		}
 	}
diff --git a/drivers/gpu/drm/mxsfb/mxsfb_out.c b/drivers/gpu/drm/mxsfb/mxsfb_out.c
index b8e81422d4e2..f7d729aa09bd 100644
--- a/drivers/gpu/drm/mxsfb/mxsfb_out.c
+++ b/drivers/gpu/drm/mxsfb/mxsfb_out.c
@@ -19,6 +19,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_cma_helper.h>
 #include <drm/drm_gem_cma_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_simple_kms_helper.h>
@@ -82,20 +83,15 @@ static const struct drm_connector_funcs mxsfb_panel_connector_funcs = {
 	.atomic_destroy_state	= drm_atomic_helper_connector_destroy_state,
 };
 
-static int mxsfb_attach_endpoint(struct drm_device *drm,
-				 const struct of_endpoint *ep)
+int mxsfb_create_output(struct drm_device *drm)
 {
 	struct mxsfb_drm_private *mxsfb = drm->dev_private;
-	struct device_node *np;
 	struct drm_panel *panel;
-	int ret = -EPROBE_DEFER;
-
-	np = of_graph_get_remote_port_parent(ep->local_node);
-	panel = of_drm_find_panel(np);
-	of_node_put(np);
+	int ret;
 
-	if (!panel)
-		return -EPROBE_DEFER;
+	ret = drm_of_find_panel_or_bridge(drm->dev->of_node, 0, 0, &panel, NULL);
+	if (ret)
+		return ret;
 
 	mxsfb->connector.dpms = DRM_MODE_DPMS_OFF;
 	mxsfb->connector.polled = 0;
@@ -109,27 +105,3 @@ static int mxsfb_attach_endpoint(struct drm_device *drm,
 
 	return ret;
 }
-
-int mxsfb_create_output(struct drm_device *drm)
-{
-	struct mxsfb_drm_private *mxsfb = drm->dev_private;
-	struct device_node *ep_np = NULL;
-	struct of_endpoint ep;
-	int ret;
-
-	for_each_endpoint_of_node(drm->dev->of_node, ep_np) {
-		ret = of_graph_parse_endpoint(ep_np, &ep);
-		if (!ret)
-			ret = mxsfb_attach_endpoint(drm, &ep);
-
-		if (ret) {
-			of_node_put(ep_np);
-			return ret;
-		}
-	}
-
-	if (!mxsfb->panel)
-		return -EPROBE_DEFER;
-
-	return 0;
-}
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 43ab560de7f9..4b4b0b496262 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -788,7 +788,8 @@ nv_crtc_disable(struct drm_crtc *crtc)
 
 static int
 nv_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		  uint32_t size)
+		  uint32_t size,
+		  struct drm_modeset_acquire_ctx *ctx)
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	int i;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 548f36d33924..e427f80344c4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1574,6 +1574,7 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 struct nvkm_vma *
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index c9910c8537ed..0e58537352fe 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -2210,25 +2210,16 @@ nv50_head_lut_load(struct drm_crtc *crtc)
 	}
 }
 
-static int
-nv50_head_mode_set_base_atomic(struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb, int x, int y,
-			       enum mode_set_atomic state)
-{
-	WARN_ON(1);
-	return 0;
-}
-
 static const struct drm_crtc_helper_funcs
 nv50_head_help = {
-	.mode_set_base_atomic = nv50_head_mode_set_base_atomic,
 	.load_lut = nv50_head_lut_load,
 	.atomic_check = nv50_head_atomic_check,
 };
 
 static int
 nv50_head_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		    uint32_t size)
+		    uint32_t size,
+		    struct drm_modeset_acquire_ctx *ctx)
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	u32 i;
diff --git a/drivers/gpu/drm/omapdrm/dss/dpi.c b/drivers/gpu/drm/omapdrm/dss/dpi.c
index 51d90a8a61cd..8a730a7afe76 100644
--- a/drivers/gpu/drm/omapdrm/dss/dpi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dpi.c
@@ -877,7 +877,7 @@ int dpi_init_port(struct platform_device *pdev, struct device_node *port)
 	if (!dpi)
 		return -ENOMEM;
 
-	ep = omapdss_of_get_next_endpoint(port, NULL);
+	ep = of_get_next_child(port, NULL);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index f50d6fc0d92e..910754bf8cf9 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -39,6 +39,7 @@
 #include <linux/debugfs.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <linux/of_platform.h>
 #include <linux/component.h>
 
@@ -5090,7 +5091,7 @@ static int dsi_probe_of(struct platform_device *pdev)
 	struct device_node *ep;
 	struct omap_dsi_pin_config pin_cfg;
 
-	ep = omapdss_of_get_first_endpoint(node);
+	ep = of_graph_get_endpoint_by_regs(node, 0, 0);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/dss-of.c b/drivers/gpu/drm/omapdrm/dss/dss-of.c
index b46606b0f014..c6b86f348a5c 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss-of.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss-of.c
@@ -16,76 +16,11 @@
 #include <linux/err.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <linux/seq_file.h>
 
 #include "omapdss.h"
 
-struct device_node *
-omapdss_of_get_next_port(const struct device_node *parent,
-			 struct device_node *prev)
-{
-	struct device_node *port = NULL;
-
-	if (!parent)
-		return NULL;
-
-	if (!prev) {
-		struct device_node *ports;
-		/*
-		 * It's the first call, we have to find a port subnode
-		 * within this node or within an optional 'ports' node.
-		 */
-		ports = of_get_child_by_name(parent, "ports");
-		if (ports)
-			parent = ports;
-
-		port = of_get_child_by_name(parent, "port");
-
-		/* release the 'ports' node */
-		of_node_put(ports);
-	} else {
-		struct device_node *ports;
-
-		ports = of_get_parent(prev);
-		if (!ports)
-			return NULL;
-
-		do {
-			port = of_get_next_child(ports, prev);
-			if (!port) {
-				of_node_put(ports);
-				return NULL;
-			}
-			prev = port;
-		} while (of_node_cmp(port->name, "port") != 0);
-
-		of_node_put(ports);
-	}
-
-	return port;
-}
-EXPORT_SYMBOL_GPL(omapdss_of_get_next_port);
-
-struct device_node *
-omapdss_of_get_next_endpoint(const struct device_node *parent,
-			     struct device_node *prev)
-{
-	struct device_node *ep = NULL;
-
-	if (!parent)
-		return NULL;
-
-	do {
-		ep = of_get_next_child(parent, prev);
-		if (!ep)
-			return NULL;
-		prev = ep;
-	} while (of_node_cmp(ep->name, "endpoint") != 0);
-
-	return ep;
-}
-EXPORT_SYMBOL_GPL(omapdss_of_get_next_endpoint);
-
 struct device_node *dss_of_port_get_parent_device(struct device_node *port)
 {
 	struct device_node *np;
@@ -124,37 +59,6 @@ u32 dss_of_port_get_port_number(struct device_node *port)
 }
 EXPORT_SYMBOL_GPL(dss_of_port_get_port_number);
 
-static struct device_node *omapdss_of_get_remote_port(const struct device_node *node)
-{
-	struct device_node *np;
-
-	np = of_parse_phandle(node, "remote-endpoint", 0);
-	if (!np)
-		return NULL;
-
-	np = of_get_next_parent(np);
-
-	return np;
-}
-
-struct device_node *
-omapdss_of_get_first_endpoint(const struct device_node *parent)
-{
-	struct device_node *port, *ep;
-
-	port = omapdss_of_get_next_port(parent, NULL);
-
-	if (!port)
-		return NULL;
-
-	ep = omapdss_of_get_next_endpoint(port, NULL);
-
-	of_node_put(port);
-
-	return ep;
-}
-EXPORT_SYMBOL_GPL(omapdss_of_get_first_endpoint);
-
 struct omap_dss_device *
 omapdss_of_find_source_for_first_ep(struct device_node *node)
 {
@@ -162,11 +66,11 @@ omapdss_of_find_source_for_first_ep(struct device_node *node)
 	struct device_node *src_port;
 	struct omap_dss_device *src;
 
-	ep = omapdss_of_get_first_endpoint(node);
+	ep = of_graph_get_endpoint_by_regs(node, 0, 0);
 	if (!ep)
 		return ERR_PTR(-EINVAL);
 
-	src_port = omapdss_of_get_remote_port(ep);
+	src_port = of_graph_get_remote_port(ep);
 	if (!src_port) {
 		of_node_put(ep);
 		return ERR_PTR(-EINVAL);
diff --git a/drivers/gpu/drm/omapdrm/dss/dss.c b/drivers/gpu/drm/omapdrm/dss/dss.c
index ceb483650f8c..fa99ec72d832 100644
--- a/drivers/gpu/drm/omapdrm/dss/dss.c
+++ b/drivers/gpu/drm/omapdrm/dss/dss.c
@@ -38,6 +38,7 @@
 #include <linux/mfd/syscon.h>
 #include <linux/regmap.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <linux/regulator/consumer.h>
 #include <linux/suspend.h>
 #include <linux/component.h>
@@ -1035,32 +1036,14 @@ static int dss_init_ports(struct platform_device *pdev)
 {
 	struct device_node *parent = pdev->dev.of_node;
 	struct device_node *port;
-	int r;
-
-	if (parent == NULL)
-		return 0;
+	int i;
 
-	port = omapdss_of_get_next_port(parent, NULL);
-	if (!port)
-		return 0;
-
-	if (dss.feat->num_ports == 0)
-		return 0;
-
-	do {
-		enum omap_display_type port_type;
-		u32 reg;
-
-		r = of_property_read_u32(port, "reg", &reg);
-		if (r)
-			reg = 0;
-
-		if (reg >= dss.feat->num_ports)
+	for (i = 0; i < dss.feat->num_ports; i++) {
+		port = of_graph_get_port_by_id(parent, i);
+		if (!port)
 			continue;
 
-		port_type = dss.feat->ports[reg];
-
-		switch (port_type) {
+		switch (dss.feat->ports[i]) {
 		case OMAP_DISPLAY_TYPE_DPI:
 			dpi_init_port(pdev, port);
 			break;
@@ -1070,7 +1053,7 @@ static int dss_init_ports(struct platform_device *pdev)
 		default:
 			break;
 		}
-	} while ((port = omapdss_of_get_next_port(parent, port)) != NULL);
+	}
 
 	return 0;
 }
@@ -1079,32 +1062,14 @@ static void dss_uninit_ports(struct platform_device *pdev)
 {
 	struct device_node *parent = pdev->dev.of_node;
 	struct device_node *port;
+	int i;
 
-	if (parent == NULL)
-		return;
-
-	port = omapdss_of_get_next_port(parent, NULL);
-	if (!port)
-		return;
-
-	if (dss.feat->num_ports == 0)
-		return;
-
-	do {
-		enum omap_display_type port_type;
-		u32 reg;
-		int r;
-
-		r = of_property_read_u32(port, "reg", &reg);
-		if (r)
-			reg = 0;
-
-		if (reg >= dss.feat->num_ports)
+	for (i = 0; i < dss.feat->num_ports; i++) {
+		port = of_graph_get_port_by_id(parent, i);
+		if (!port)
 			continue;
 
-		port_type = dss.feat->ports[reg];
-
-		switch (port_type) {
+		switch (dss.feat->ports[i]) {
 		case OMAP_DISPLAY_TYPE_DPI:
 			dpi_uninit_port(port);
 			break;
@@ -1114,7 +1079,7 @@ static void dss_uninit_ports(struct platform_device *pdev)
 		default:
 			break;
 		}
-	} while ((port = omapdss_of_get_next_port(parent, port)) != NULL);
+	}
 }
 
 static int dss_video_pll_probe(struct platform_device *pdev)
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi4.c b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
index e7162c16de2e..87c53034c634 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi4.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi4.c
@@ -34,6 +34,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/component.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <sound/omap-hdmi-audio.h>
 
 #include "omapdss.h"
@@ -546,7 +547,7 @@ static int hdmi_probe_of(struct platform_device *pdev)
 	struct device_node *ep;
 	int r;
 
-	ep = omapdss_of_get_first_endpoint(node);
+	ep = of_graph_get_endpoint_by_regs(node, 0, 0);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/hdmi5.c b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
index 678dfb02764a..d13dce7e8079 100644
--- a/drivers/gpu/drm/omapdrm/dss/hdmi5.c
+++ b/drivers/gpu/drm/omapdrm/dss/hdmi5.c
@@ -39,6 +39,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/component.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <sound/omap-hdmi-audio.h>
 
 #include "omapdss.h"
@@ -572,7 +573,7 @@ static int hdmi_probe_of(struct platform_device *pdev)
 	struct device_node *ep;
 	int r;
 
-	ep = omapdss_of_get_first_endpoint(node);
+	ep = of_graph_get_endpoint_by_regs(node, 0, 0);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
index 63c2684f889b..b19dae1fd6c5 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss.h
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -830,17 +830,6 @@ static inline bool omapdss_device_is_enabled(struct omap_dss_device *dssdev)
 	return dssdev->state == OMAP_DSS_DISPLAY_ACTIVE;
 }
 
-struct device_node *
-omapdss_of_get_next_port(const struct device_node *parent,
-			 struct device_node *prev);
-
-struct device_node *
-omapdss_of_get_next_endpoint(const struct device_node *parent,
-			     struct device_node *prev);
-
-struct device_node *
-omapdss_of_get_first_endpoint(const struct device_node *parent);
-
 struct omap_dss_device *
 omapdss_of_find_source_for_first_ep(struct device_node *node);
 
diff --git a/drivers/gpu/drm/omapdrm/dss/sdi.c b/drivers/gpu/drm/omapdrm/dss/sdi.c
index b3bda2d3c08d..0620b9f8c231 100644
--- a/drivers/gpu/drm/omapdrm/dss/sdi.c
+++ b/drivers/gpu/drm/omapdrm/dss/sdi.c
@@ -414,7 +414,7 @@ int sdi_init_port(struct platform_device *pdev, struct device_node *port)
 	u32 datapairs;
 	int r;
 
-	ep = omapdss_of_get_next_endpoint(port, NULL);
+	ep = of_get_next_child(port, NULL);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/dss/venc.c b/drivers/gpu/drm/omapdrm/dss/venc.c
index d74f7fcc2e46..19d14957f566 100644
--- a/drivers/gpu/drm/omapdrm/dss/venc.c
+++ b/drivers/gpu/drm/omapdrm/dss/venc.c
@@ -35,6 +35,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
+#include <linux/of_graph.h>
 #include <linux/component.h>
 
 #include "omapdss.h"
@@ -818,7 +819,7 @@ static int venc_probe_of(struct platform_device *pdev)
 	u32 channels;
 	int r;
 
-	ep = omapdss_of_get_first_endpoint(node);
+	ep = of_graph_get_endpoint_by_regs(node, 0, 0);
 	if (!ep)
 		return 0;
 
diff --git a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
index ee5883f59be5..0dbe0306953d 100644
--- a/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem_dmabuf.c
@@ -160,10 +160,10 @@ static struct dma_buf_ops omap_dmabuf_ops = {
 	.release = omap_gem_dmabuf_release,
 	.begin_cpu_access = omap_gem_dmabuf_begin_cpu_access,
 	.end_cpu_access = omap_gem_dmabuf_end_cpu_access,
-	.kmap_atomic = omap_gem_dmabuf_kmap_atomic,
-	.kunmap_atomic = omap_gem_dmabuf_kunmap_atomic,
-	.kmap = omap_gem_dmabuf_kmap,
-	.kunmap = omap_gem_dmabuf_kunmap,
+	.map_atomic = omap_gem_dmabuf_kmap_atomic,
+	.unmap_atomic = omap_gem_dmabuf_kunmap_atomic,
+	.map = omap_gem_dmabuf_kmap,
+	.unmap = omap_gem_dmabuf_kunmap,
 	.mmap = omap_gem_dmabuf_mmap,
 };
 
diff --git a/drivers/gpu/drm/panel/Kconfig b/drivers/gpu/drm/panel/Kconfig
index 5dc2106da2bc..3e29a9903303 100644
--- a/drivers/gpu/drm/panel/Kconfig
+++ b/drivers/gpu/drm/panel/Kconfig
@@ -62,6 +62,12 @@ config DRM_PANEL_PANASONIC_VVX10F034N00
 	  WUXGA (1920x1200) Novatek NT1397-based DSI panel as found in some
 	  Xperia Z2 tablets
 
+config DRM_PANEL_SAMSUNG_S6E3HA2
+	tristate "Samsung S6E3HA2 DSI video mode panel"
+	depends on OF
+	depends on DRM_MIPI_DSI
+	select VIDEOMODE_HELPERS
+
 config DRM_PANEL_SAMSUNG_S6E8AA0
 	tristate "Samsung S6E8AA0 DSI video mode panel"
 	depends on OF
@@ -91,4 +97,11 @@ config DRM_PANEL_SHARP_LS043T1LE01
 	  Say Y here if you want to enable support for Sharp LS043T1LE01 qHD
 	  (540x960) DSI panel as found on the Qualcomm APQ8074 Dragonboard
 
+config DRM_PANEL_SITRONIX_ST7789V
+	tristate "Sitronix ST7789V panel"
+	depends on OF && SPI
+	help
+	  Say Y here if you want to enable support for the Sitronix
+	  ST7789V controller for 240x320 LCD panels
+
 endmenu
diff --git a/drivers/gpu/drm/panel/Makefile b/drivers/gpu/drm/panel/Makefile
index 20b5060d1f47..292b3c77aede 100644
--- a/drivers/gpu/drm/panel/Makefile
+++ b/drivers/gpu/drm/panel/Makefile
@@ -4,6 +4,8 @@ obj-$(CONFIG_DRM_PANEL_JDI_LT070ME05000) += panel-jdi-lt070me05000.o
 obj-$(CONFIG_DRM_PANEL_LG_LG4573) += panel-lg-lg4573.o
 obj-$(CONFIG_DRM_PANEL_PANASONIC_VVX10F034N00) += panel-panasonic-vvx10f034n00.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_LD9040) += panel-samsung-ld9040.o
+obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E3HA2) += panel-samsung-s6e3ha2.o
 obj-$(CONFIG_DRM_PANEL_SAMSUNG_S6E8AA0) += panel-samsung-s6e8aa0.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LQ101R1SX01) += panel-sharp-lq101r1sx01.o
 obj-$(CONFIG_DRM_PANEL_SHARP_LS043T1LE01) += panel-sharp-ls043t1le01.o
+obj-$(CONFIG_DRM_PANEL_SITRONIX_ST7789V) += panel-sitronix-st7789v.o
diff --git a/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
new file mode 100644
index 000000000000..4cc08d7b3de4
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-samsung-s6e3ha2.c
@@ -0,0 +1,739 @@
+/*
+ * MIPI-DSI based s6e3ha2 AMOLED 5.7 inch panel driver.
+ *
+ * Copyright (c) 2016 Samsung Electronics Co., Ltd.
+ * Donghwa Lee <dh09.lee@samsung.com>
+ * Hyungwon Hwang <human.hwang@samsung.com>
+ * Hoegeun Kwon <hoegeun.kwon@samsung.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <drm/drmP.h>
+#include <drm/drm_mipi_dsi.h>
+#include <drm/drm_panel.h>
+#include <linux/backlight.h>
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+
+#define S6E3HA2_MIN_BRIGHTNESS		0
+#define S6E3HA2_MAX_BRIGHTNESS		100
+#define S6E3HA2_DEFAULT_BRIGHTNESS	80
+
+#define S6E3HA2_NUM_GAMMA_STEPS		46
+#define S6E3HA2_GAMMA_CMD_CNT		35
+#define S6E3HA2_VINT_STATUS_MAX		10
+
+static const u8 gamma_tbl[S6E3HA2_NUM_GAMMA_STEPS][S6E3HA2_GAMMA_CMD_CNT] = {
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x82, 0x83,
+	  0x85, 0x88, 0x8b, 0x8b, 0x84, 0x88, 0x82, 0x82, 0x89, 0x86, 0x8c,
+	  0x94, 0x84, 0xb1, 0xaf, 0x8e, 0xcf, 0xad, 0xc9, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x84, 0x84,
+	  0x85, 0x87, 0x8b, 0x8a, 0x84, 0x88, 0x82, 0x82, 0x89, 0x86, 0x8a,
+	  0x93, 0x84, 0xb0, 0xae, 0x8e, 0xc9, 0xa8, 0xc5, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x8a, 0x8a, 0x84, 0x88, 0x81, 0x84, 0x8a, 0x88, 0x8a,
+	  0x91, 0x84, 0xb1, 0xae, 0x8b, 0xd5, 0xb2, 0xcc, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x8a, 0x8a, 0x84, 0x87, 0x81, 0x84, 0x8a, 0x87, 0x8a,
+	  0x91, 0x85, 0xae, 0xac, 0x8a, 0xc3, 0xa3, 0xc0, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x85, 0x85,
+	  0x86, 0x85, 0x88, 0x89, 0x84, 0x89, 0x82, 0x84, 0x87, 0x85, 0x8b,
+	  0x91, 0x88, 0xad, 0xab, 0x8a, 0xb7, 0x9b, 0xb6, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x89, 0x8a, 0x84, 0x89, 0x83, 0x83, 0x86, 0x84, 0x8b,
+	  0x90, 0x84, 0xb0, 0xae, 0x8b, 0xce, 0xad, 0xc8, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x87, 0x89, 0x8a, 0x83, 0x87, 0x82, 0x85, 0x88, 0x87, 0x89,
+	  0x8f, 0x84, 0xac, 0xaa, 0x89, 0xb1, 0x98, 0xaf, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x88, 0x89, 0x84, 0x88, 0x83, 0x82, 0x85, 0x84, 0x8c,
+	  0x91, 0x86, 0xac, 0xaa, 0x89, 0xc2, 0xa5, 0xbd, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x85, 0x87, 0x89, 0x8a, 0x83, 0x87, 0x82, 0x85, 0x88, 0x87, 0x88,
+	  0x8b, 0x82, 0xad, 0xaa, 0x8a, 0xc2, 0xa5, 0xbd, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x89, 0x87, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x87, 0x89, 0x84, 0x88, 0x83, 0x82, 0x85, 0x84, 0x8a,
+	  0x8e, 0x84, 0xae, 0xac, 0x89, 0xda, 0xb7, 0xd0, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x85, 0x86, 0x87, 0x89, 0x84, 0x88, 0x83, 0x80, 0x83, 0x82, 0x8b,
+	  0x8e, 0x85, 0xac, 0xaa, 0x89, 0xc8, 0xaa, 0xc1, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x85, 0x86, 0x87, 0x89, 0x81, 0x85, 0x81, 0x84, 0x86, 0x84, 0x8c,
+	  0x8c, 0x84, 0xa9, 0xa8, 0x87, 0xa3, 0x92, 0xa1, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x85, 0x86, 0x87, 0x89, 0x84, 0x86, 0x83, 0x80, 0x83, 0x81, 0x8c,
+	  0x8d, 0x84, 0xaa, 0xaa, 0x89, 0xce, 0xaf, 0xc5, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x85, 0x86, 0x87, 0x89, 0x81, 0x83, 0x80, 0x83, 0x85, 0x85, 0x8c,
+	  0x8c, 0x84, 0xa8, 0xa8, 0x88, 0xb5, 0x9f, 0xb0, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x86, 0x86, 0x87, 0x88, 0x81, 0x83, 0x80, 0x83, 0x85, 0x85, 0x8c,
+	  0x8b, 0x84, 0xab, 0xa8, 0x86, 0xd4, 0xb4, 0xc9, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x86, 0x86, 0x87, 0x88, 0x81, 0x83, 0x80, 0x84, 0x84, 0x85, 0x8b,
+	  0x8a, 0x83, 0xa6, 0xa5, 0x84, 0xbb, 0xa4, 0xb3, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x84, 0x84,
+	  0x86, 0x85, 0x86, 0x86, 0x82, 0x85, 0x81, 0x82, 0x83, 0x84, 0x8e,
+	  0x8b, 0x83, 0xa4, 0xa3, 0x8a, 0xa1, 0x93, 0x9d, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x87, 0x87, 0x82, 0x85, 0x81, 0x82, 0x82, 0x84, 0x8e,
+	  0x8b, 0x83, 0xa4, 0xa2, 0x86, 0xc1, 0xa9, 0xb7, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x87, 0x87, 0x82, 0x85, 0x81, 0x82, 0x82, 0x84, 0x8d,
+	  0x89, 0x82, 0xa2, 0xa1, 0x84, 0xa7, 0x98, 0xa1, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xb8, 0x00, 0xc3, 0x00, 0xb1, 0x88, 0x86, 0x87, 0x83, 0x83,
+	  0x85, 0x86, 0x87, 0x87, 0x82, 0x85, 0x81, 0x83, 0x83, 0x85, 0x8c,
+	  0x87, 0x7f, 0xa2, 0x9d, 0x88, 0x8d, 0x88, 0x8b, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xbb, 0x00, 0xc5, 0x00, 0xb4, 0x87, 0x86, 0x86, 0x84, 0x83,
+	  0x86, 0x87, 0x87, 0x87, 0x80, 0x82, 0x7f, 0x86, 0x86, 0x88, 0x8a,
+	  0x84, 0x7e, 0x9d, 0x9c, 0x82, 0x8d, 0x88, 0x8b, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xbd, 0x00, 0xc7, 0x00, 0xb7, 0x87, 0x85, 0x85, 0x84, 0x83,
+	  0x86, 0x86, 0x86, 0x88, 0x81, 0x83, 0x80, 0x83, 0x84, 0x85, 0x8a,
+	  0x85, 0x7e, 0x9c, 0x9b, 0x85, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xc0, 0x00, 0xca, 0x00, 0xbb, 0x87, 0x86, 0x85, 0x83, 0x83,
+	  0x85, 0x86, 0x86, 0x88, 0x81, 0x83, 0x80, 0x84, 0x85, 0x86, 0x89,
+	  0x83, 0x7d, 0x9c, 0x99, 0x87, 0x7b, 0x7b, 0x7c, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xc4, 0x00, 0xcd, 0x00, 0xbe, 0x87, 0x86, 0x85, 0x83, 0x83,
+	  0x86, 0x85, 0x85, 0x87, 0x81, 0x82, 0x80, 0x82, 0x82, 0x83, 0x8a,
+	  0x85, 0x7f, 0x9f, 0x9b, 0x86, 0xb4, 0xa1, 0xac, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xc7, 0x00, 0xd0, 0x00, 0xc2, 0x87, 0x85, 0x85, 0x83, 0x82,
+	  0x85, 0x85, 0x85, 0x86, 0x82, 0x83, 0x80, 0x82, 0x82, 0x84, 0x87,
+	  0x86, 0x80, 0x9e, 0x9a, 0x87, 0xa7, 0x98, 0xa1, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xca, 0x00, 0xd2, 0x00, 0xc5, 0x87, 0x85, 0x84, 0x82, 0x82,
+	  0x84, 0x85, 0x85, 0x86, 0x81, 0x82, 0x7f, 0x82, 0x82, 0x84, 0x88,
+	  0x86, 0x81, 0x9d, 0x98, 0x86, 0x8d, 0x88, 0x8b, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xce, 0x00, 0xd6, 0x00, 0xca, 0x86, 0x85, 0x84, 0x83, 0x83,
+	  0x85, 0x84, 0x84, 0x85, 0x81, 0x82, 0x80, 0x81, 0x81, 0x82, 0x89,
+	  0x86, 0x81, 0x9c, 0x97, 0x86, 0xa7, 0x98, 0xa1, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xd1, 0x00, 0xd9, 0x00, 0xce, 0x86, 0x84, 0x83, 0x83, 0x82,
+	  0x85, 0x85, 0x85, 0x86, 0x81, 0x83, 0x81, 0x82, 0x82, 0x83, 0x86,
+	  0x83, 0x7f, 0x99, 0x95, 0x86, 0xbb, 0xa4, 0xb3, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xd4, 0x00, 0xdb, 0x00, 0xd1, 0x86, 0x85, 0x83, 0x83, 0x82,
+	  0x85, 0x84, 0x84, 0x85, 0x80, 0x83, 0x82, 0x80, 0x80, 0x81, 0x87,
+	  0x84, 0x81, 0x98, 0x93, 0x85, 0xae, 0x9c, 0xa8, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xd8, 0x00, 0xde, 0x00, 0xd6, 0x86, 0x84, 0x83, 0x81, 0x81,
+	  0x83, 0x85, 0x85, 0x85, 0x82, 0x83, 0x81, 0x81, 0x81, 0x83, 0x86,
+	  0x84, 0x80, 0x98, 0x91, 0x85, 0x7b, 0x7b, 0x7c, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xdc, 0x00, 0xe2, 0x00, 0xda, 0x85, 0x84, 0x83, 0x82, 0x82,
+	  0x84, 0x84, 0x84, 0x85, 0x81, 0x82, 0x82, 0x80, 0x80, 0x81, 0x83,
+	  0x82, 0x7f, 0x99, 0x93, 0x86, 0x94, 0x8b, 0x92, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xdf, 0x00, 0xe5, 0x00, 0xde, 0x85, 0x84, 0x82, 0x82, 0x82,
+	  0x84, 0x83, 0x83, 0x84, 0x81, 0x81, 0x80, 0x83, 0x82, 0x84, 0x82,
+	  0x81, 0x7f, 0x99, 0x92, 0x86, 0x7b, 0x7b, 0x7c, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x83, 0x83, 0x84, 0x80, 0x81, 0x80, 0x83, 0x83, 0x84, 0x80,
+	  0x81, 0x7c, 0x99, 0x92, 0x87, 0xa1, 0x93, 0x9d, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x85, 0x84, 0x83, 0x81, 0x81,
+	  0x82, 0x82, 0x82, 0x83, 0x80, 0x81, 0x80, 0x81, 0x80, 0x82, 0x83,
+	  0x82, 0x80, 0x91, 0x8d, 0x83, 0x9a, 0x90, 0x96, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x83, 0x83, 0x84, 0x80, 0x81, 0x80, 0x81, 0x80, 0x82, 0x83,
+	  0x81, 0x7f, 0x91, 0x8c, 0x82, 0x8d, 0x88, 0x8b, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x83, 0x83, 0x83, 0x82, 0x82, 0x81, 0x81, 0x80, 0x82, 0x82,
+	  0x82, 0x7f, 0x94, 0x89, 0x84, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x83, 0x83, 0x83, 0x82, 0x82, 0x81, 0x81, 0x80, 0x82, 0x83,
+	  0x82, 0x7f, 0x91, 0x85, 0x81, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x83, 0x83, 0x83, 0x80, 0x80, 0x7f, 0x83, 0x82, 0x84, 0x83,
+	  0x82, 0x7f, 0x90, 0x84, 0x81, 0x9a, 0x90, 0x96, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe4, 0x00, 0xe9, 0x00, 0xe3, 0x84, 0x83, 0x82, 0x80, 0x80,
+	  0x82, 0x83, 0x83, 0x83, 0x80, 0x80, 0x7f, 0x80, 0x80, 0x81, 0x81,
+	  0x82, 0x83, 0x7e, 0x80, 0x7c, 0xa4, 0x97, 0x9f, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xe9, 0x00, 0xec, 0x00, 0xe8, 0x84, 0x83, 0x82, 0x81, 0x81,
+	  0x82, 0x82, 0x82, 0x83, 0x7f, 0x7f, 0x7f, 0x81, 0x80, 0x82, 0x83,
+	  0x83, 0x84, 0x79, 0x7c, 0x79, 0xb1, 0xa0, 0xaa, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xed, 0x00, 0xf0, 0x00, 0xec, 0x83, 0x83, 0x82, 0x80, 0x80,
+	  0x81, 0x82, 0x82, 0x82, 0x7f, 0x7f, 0x7e, 0x81, 0x81, 0x82, 0x80,
+	  0x81, 0x81, 0x84, 0x84, 0x83, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xf1, 0x00, 0xf4, 0x00, 0xf1, 0x83, 0x82, 0x82, 0x80, 0x80,
+	  0x81, 0x82, 0x82, 0x82, 0x80, 0x80, 0x80, 0x80, 0x80, 0x81, 0x7d,
+	  0x7e, 0x7f, 0x84, 0x84, 0x83, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xf6, 0x00, 0xf7, 0x00, 0xf5, 0x82, 0x82, 0x81, 0x80, 0x80,
+	  0x80, 0x82, 0x82, 0x82, 0x80, 0x80, 0x80, 0x7f, 0x7f, 0x7f, 0x82,
+	  0x82, 0x82, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x00, 0xfa, 0x00, 0xfb, 0x00, 0xfa, 0x81, 0x81, 0x81, 0x80, 0x80,
+	  0x80, 0x82, 0x82, 0x82, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+	  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80,
+	  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+	  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 },
+	{ 0x01, 0x00, 0x01, 0x00, 0x01, 0x00, 0x80, 0x80, 0x80, 0x80, 0x80,
+	  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80,
+	  0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00, 0x00, 0x00,
+	  0x00, 0x00 }
+};
+
+unsigned char vint_table[S6E3HA2_VINT_STATUS_MAX] = {
+	0x18, 0x19, 0x1a, 0x1b, 0x1c,
+	0x1d, 0x1e, 0x1f, 0x20, 0x21
+};
+
+struct s6e3ha2 {
+	struct device *dev;
+	struct drm_panel panel;
+	struct backlight_device *bl_dev;
+
+	struct regulator_bulk_data supplies[2];
+	struct gpio_desc *reset_gpio;
+	struct gpio_desc *enable_gpio;
+};
+
+static int s6e3ha2_dcs_write(struct s6e3ha2 *ctx, const void *data, size_t len)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+
+	return mipi_dsi_dcs_write_buffer(dsi, data, len);
+}
+
+#define s6e3ha2_dcs_write_seq_static(ctx, seq...) do {	\
+	static const u8 d[] = { seq };			\
+	int ret;					\
+	ret = s6e3ha2_dcs_write(ctx, d, ARRAY_SIZE(d));	\
+	if (ret < 0)					\
+		return ret;				\
+} while (0)
+
+#define s6e3ha2_call_write_func(ret, func) do {	\
+	ret = (func);				\
+	if (ret < 0)				\
+		return ret;			\
+} while (0)
+
+static int s6e3ha2_test_key_on_f0(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf0, 0x5a, 0x5a);
+	return 0;
+}
+
+static int s6e3ha2_test_key_off_f0(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf0, 0xa5, 0xa5);
+	return 0;
+}
+
+static int s6e3ha2_test_key_on_fc(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfc, 0x5a, 0x5a);
+	return 0;
+}
+
+static int s6e3ha2_test_key_off_fc(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfc, 0xa5, 0xa5);
+	return 0;
+}
+
+static int s6e3ha2_single_dsi_set(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf2, 0x67);
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf9, 0x09);
+	return 0;
+}
+
+static int s6e3ha2_freq_calibration(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfd, 0x1c);
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfe, 0x20, 0x39);
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfe, 0xa0);
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfe, 0x20);
+	s6e3ha2_dcs_write_seq_static(ctx, 0xce, 0x03, 0x3b, 0x12, 0x62, 0x40,
+				0x80, 0xc0, 0x28, 0x28, 0x28, 0x28, 0x39, 0xc5);
+	return 0;
+}
+
+static int s6e3ha2_aor_control(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb2, 0x03, 0x10);
+	return 0;
+}
+
+static int s6e3ha2_caps_elvss_set(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb6, 0x9c, 0x0a);
+	return 0;
+}
+
+static int s6e3ha2_acl_off(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0x55, 0x00);
+	return 0;
+}
+
+static int s6e3ha2_acl_off_opr(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb5, 0x40);
+	return 0;
+}
+
+static int s6e3ha2_test_global(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb0, 0x07);
+	return 0;
+}
+
+static int s6e3ha2_test(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb8, 0x19);
+	return 0;
+}
+
+static int s6e3ha2_touch_hsync_on1(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xbd, 0x33, 0x11, 0x02,
+					0x16, 0x02, 0x16);
+	return 0;
+}
+
+static int s6e3ha2_pentile_control(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xc0, 0x00, 0x00, 0xd8, 0xd8);
+	return 0;
+}
+
+static int s6e3ha2_poc_global(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb0, 0x20);
+	return 0;
+}
+
+static int s6e3ha2_poc_setting(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xfe, 0x08);
+	return 0;
+}
+
+static int s6e3ha2_pcd_set_off(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xcc, 0x40, 0x51);
+	return 0;
+}
+
+static int s6e3ha2_err_fg_set(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xed, 0x44);
+	return 0;
+}
+
+static int s6e3ha2_hbm_off(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0x53, 0x00);
+	return 0;
+}
+
+static int s6e3ha2_te_start_setting(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xb9, 0x10, 0x09, 0xff, 0x00, 0x09);
+	return 0;
+}
+
+static int s6e3ha2_gamma_update(struct s6e3ha2 *ctx)
+{
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf7, 0x03);
+	ndelay(100); /* need for 100ns delay */
+	s6e3ha2_dcs_write_seq_static(ctx, 0xf7, 0x00);
+	return 0;
+}
+
+static int s6e3ha2_get_brightness(struct backlight_device *bl_dev)
+{
+	return bl_dev->props.brightness;
+}
+
+static int s6e3ha2_set_vint(struct s6e3ha2 *ctx)
+{
+	struct backlight_device *bl_dev = ctx->bl_dev;
+	unsigned int brightness = bl_dev->props.brightness;
+	unsigned char data[] = { 0xf4, 0x8b,
+			vint_table[brightness * (S6E3HA2_VINT_STATUS_MAX - 1) /
+			S6E3HA2_MAX_BRIGHTNESS] };
+
+	return s6e3ha2_dcs_write(ctx, data, ARRAY_SIZE(data));
+}
+
+static unsigned int s6e3ha2_get_brightness_index(unsigned int brightness)
+{
+	return (brightness * (S6E3HA2_NUM_GAMMA_STEPS - 1)) /
+		S6E3HA2_MAX_BRIGHTNESS;
+}
+
+static int s6e3ha2_update_gamma(struct s6e3ha2 *ctx, unsigned int brightness)
+{
+	struct backlight_device *bl_dev = ctx->bl_dev;
+	unsigned int index = s6e3ha2_get_brightness_index(brightness);
+	u8 data[S6E3HA2_GAMMA_CMD_CNT + 1] = { 0xca, };
+	int ret;
+
+	memcpy(data + 1, gamma_tbl + index, S6E3HA2_GAMMA_CMD_CNT);
+	s6e3ha2_call_write_func(ret,
+				s6e3ha2_dcs_write(ctx, data, ARRAY_SIZE(data)));
+
+	s6e3ha2_call_write_func(ret, s6e3ha2_gamma_update(ctx));
+	bl_dev->props.brightness = brightness;
+
+	return 0;
+}
+
+static int s6e3ha2_set_brightness(struct backlight_device *bl_dev)
+{
+	struct s6e3ha2 *ctx = bl_get_data(bl_dev);
+	unsigned int brightness = bl_dev->props.brightness;
+	int ret;
+
+	if (brightness < S6E3HA2_MIN_BRIGHTNESS ||
+		brightness > bl_dev->props.max_brightness) {
+		dev_err(ctx->dev, "Invalid brightness: %u\n", brightness);
+		return -EINVAL;
+	}
+
+	if (bl_dev->props.power > FB_BLANK_NORMAL)
+		return -EPERM;
+
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_on_f0(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_update_gamma(ctx, brightness));
+	s6e3ha2_call_write_func(ret, s6e3ha2_aor_control(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_set_vint(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_off_f0(ctx));
+
+	return 0;
+}
+
+static const struct backlight_ops s6e3ha2_bl_ops = {
+	.get_brightness = s6e3ha2_get_brightness,
+	.update_status = s6e3ha2_set_brightness,
+};
+
+static int s6e3ha2_panel_init(struct s6e3ha2 *ctx)
+{
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	s6e3ha2_call_write_func(ret, mipi_dsi_dcs_exit_sleep_mode(dsi));
+	usleep_range(5000, 6000);
+
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_on_f0(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_single_dsi_set(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_on_fc(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_freq_calibration(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_off_fc(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_off_f0(ctx));
+
+	return 0;
+}
+
+static int s6e3ha2_power_off(struct s6e3ha2 *ctx)
+{
+	return regulator_bulk_disable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+}
+
+static int s6e3ha2_disable(struct drm_panel *panel)
+{
+	struct s6e3ha2 *ctx = container_of(panel, struct s6e3ha2, panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	s6e3ha2_call_write_func(ret, mipi_dsi_dcs_enter_sleep_mode(dsi));
+	s6e3ha2_call_write_func(ret, mipi_dsi_dcs_set_display_off(dsi));
+
+	msleep(40);
+	ctx->bl_dev->props.power = FB_BLANK_NORMAL;
+
+	return 0;
+}
+
+static int s6e3ha2_unprepare(struct drm_panel *panel)
+{
+	struct s6e3ha2 *ctx = container_of(panel, struct s6e3ha2, panel);
+
+	return s6e3ha2_power_off(ctx);
+}
+
+static int s6e3ha2_power_on(struct s6e3ha2 *ctx)
+{
+	int ret;
+
+	ret = regulator_bulk_enable(ARRAY_SIZE(ctx->supplies), ctx->supplies);
+	if (ret < 0)
+		return ret;
+
+	msleep(120);
+
+	gpiod_set_value(ctx->enable_gpio, 0);
+	usleep_range(5000, 6000);
+	gpiod_set_value(ctx->enable_gpio, 1);
+
+	gpiod_set_value(ctx->reset_gpio, 1);
+	usleep_range(5000, 6000);
+	gpiod_set_value(ctx->reset_gpio, 0);
+	usleep_range(5000, 6000);
+
+	return 0;
+}
+static int s6e3ha2_prepare(struct drm_panel *panel)
+{
+	struct s6e3ha2 *ctx = container_of(panel, struct s6e3ha2, panel);
+	int ret;
+
+	ret = s6e3ha2_power_on(ctx);
+	if (ret < 0)
+		return ret;
+
+	ret = s6e3ha2_panel_init(ctx);
+	if (ret < 0)
+		goto err;
+
+	ctx->bl_dev->props.power = FB_BLANK_NORMAL;
+
+	return 0;
+
+err:
+	s6e3ha2_power_off(ctx);
+	return ret;
+}
+
+static int s6e3ha2_enable(struct drm_panel *panel)
+{
+	struct s6e3ha2 *ctx = container_of(panel, struct s6e3ha2, panel);
+	struct mipi_dsi_device *dsi = to_mipi_dsi_device(ctx->dev);
+	int ret;
+
+	/* common setting */
+	s6e3ha2_call_write_func(ret,
+		mipi_dsi_dcs_set_tear_on(dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK));
+
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_on_f0(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_on_fc(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_touch_hsync_on1(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_pentile_control(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_poc_global(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_poc_setting(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_off_fc(ctx));
+
+	/* pcd setting off for TB */
+	s6e3ha2_call_write_func(ret, s6e3ha2_pcd_set_off(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_err_fg_set(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_te_start_setting(ctx));
+
+	/* brightness setting */
+	s6e3ha2_call_write_func(ret, s6e3ha2_set_brightness(ctx->bl_dev));
+	s6e3ha2_call_write_func(ret, s6e3ha2_aor_control(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_caps_elvss_set(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_gamma_update(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_acl_off(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_acl_off_opr(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_hbm_off(ctx));
+
+	/* elvss temp compensation */
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_global(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test(ctx));
+	s6e3ha2_call_write_func(ret, s6e3ha2_test_key_off_f0(ctx));
+
+	s6e3ha2_call_write_func(ret, mipi_dsi_dcs_set_display_on(dsi));
+	ctx->bl_dev->props.power = FB_BLANK_UNBLANK;
+
+	return 0;
+}
+
+static const struct drm_display_mode default_mode = {
+	.clock = 222372,
+	.hdisplay = 1440,
+	.hsync_start = 1440 + 1,
+	.hsync_end = 1440 + 1 + 1,
+	.htotal = 1440 + 1 + 1 + 1,
+	.vdisplay = 2560,
+	.vsync_start = 2560 + 1,
+	.vsync_end = 2560 + 1 + 1,
+	.vtotal = 2560 + 1 + 1 + 15,
+	.vrefresh = 60,
+	.flags = 0,
+};
+
+static int s6e3ha2_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		DRM_ERROR("failed to add mode %ux%ux@%u\n",
+				default_mode.hdisplay, default_mode.vdisplay,
+				default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	connector->display_info.width_mm = 71;
+	connector->display_info.height_mm = 125;
+
+	return 1;
+}
+
+static const struct drm_panel_funcs s6e3ha2_drm_funcs = {
+	.disable = s6e3ha2_disable,
+	.unprepare = s6e3ha2_unprepare,
+	.prepare = s6e3ha2_prepare,
+	.enable = s6e3ha2_enable,
+	.get_modes = s6e3ha2_get_modes,
+};
+
+static int s6e3ha2_probe(struct mipi_dsi_device *dsi)
+{
+	struct device *dev = &dsi->dev;
+	struct s6e3ha2 *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	mipi_dsi_set_drvdata(dsi, ctx);
+
+	ctx->dev = dev;
+
+	dsi->lanes = 4;
+	dsi->format = MIPI_DSI_FMT_RGB888;
+	dsi->mode_flags = MIPI_DSI_CLOCK_NON_CONTINUOUS;
+
+	ctx->supplies[0].supply = "vdd3";
+	ctx->supplies[1].supply = "vci";
+
+	ret = devm_regulator_bulk_get(dev, ARRAY_SIZE(ctx->supplies),
+				      ctx->supplies);
+	if (ret < 0) {
+		dev_err(dev, "failed to get regulators: %d\n", ret);
+		return ret;
+	}
+
+	ctx->reset_gpio = devm_gpiod_get(dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset_gpio)) {
+		dev_err(dev, "cannot get reset-gpios %ld\n",
+			PTR_ERR(ctx->reset_gpio));
+		return PTR_ERR(ctx->reset_gpio);
+	}
+
+	ctx->enable_gpio = devm_gpiod_get(dev, "enable", GPIOD_OUT_HIGH);
+	if (IS_ERR(ctx->enable_gpio)) {
+		dev_err(dev, "cannot get enable-gpios %ld\n",
+			PTR_ERR(ctx->enable_gpio));
+		return PTR_ERR(ctx->enable_gpio);
+	}
+
+	ctx->bl_dev = backlight_device_register("s6e3ha2", dev, ctx,
+						&s6e3ha2_bl_ops, NULL);
+	if (IS_ERR(ctx->bl_dev)) {
+		dev_err(dev, "failed to register backlight device\n");
+		return PTR_ERR(ctx->bl_dev);
+	}
+
+	ctx->bl_dev->props.max_brightness = S6E3HA2_MAX_BRIGHTNESS;
+	ctx->bl_dev->props.brightness = S6E3HA2_DEFAULT_BRIGHTNESS;
+	ctx->bl_dev->props.power = FB_BLANK_POWERDOWN;
+
+	drm_panel_init(&ctx->panel);
+	ctx->panel.dev = dev;
+	ctx->panel.funcs = &s6e3ha2_drm_funcs;
+
+	ret = drm_panel_add(&ctx->panel);
+	if (ret < 0)
+		goto unregister_backlight;
+
+	ret = mipi_dsi_attach(dsi);
+	if (ret < 0)
+		goto remove_panel;
+
+	return ret;
+
+remove_panel:
+	drm_panel_remove(&ctx->panel);
+
+unregister_backlight:
+	backlight_device_unregister(ctx->bl_dev);
+
+	return ret;
+}
+
+static int s6e3ha2_remove(struct mipi_dsi_device *dsi)
+{
+	struct s6e3ha2 *ctx = mipi_dsi_get_drvdata(dsi);
+
+	mipi_dsi_detach(dsi);
+	drm_panel_remove(&ctx->panel);
+	backlight_device_unregister(ctx->bl_dev);
+
+	return 0;
+}
+
+static const struct of_device_id s6e3ha2_of_match[] = {
+	{ .compatible = "samsung,s6e3ha2" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, s6e3ha2_of_match);
+
+static struct mipi_dsi_driver s6e3ha2_driver = {
+	.probe = s6e3ha2_probe,
+	.remove = s6e3ha2_remove,
+	.driver = {
+		.name = "panel-samsung-s6e3ha2",
+		.of_match_table = s6e3ha2_of_match,
+	},
+};
+module_mipi_dsi_driver(s6e3ha2_driver);
+
+MODULE_AUTHOR("Donghwa Lee <dh09.lee@samsung.com>");
+MODULE_AUTHOR("Hyungwon Hwang <human.hwang@samsung.com>");
+MODULE_AUTHOR("Hoegeun Kwon <hoegeun.kwon@samsung.com>");
+MODULE_DESCRIPTION("MIPI-DSI based s6e3ha2 AMOLED Panel Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/panel/panel-simple.c b/drivers/gpu/drm/panel/panel-simple.c
index 89eb0422821c..c4566ce8fda7 100644
--- a/drivers/gpu/drm/panel/panel-simple.c
+++ b/drivers/gpu/drm/panel/panel-simple.c
@@ -386,6 +386,31 @@ static void panel_simple_shutdown(struct device *dev)
 	panel_simple_disable(&panel->base);
 }
 
+static const struct drm_display_mode ampire_am_480272h3tmqw_t01h_mode = {
+	.clock = 9000,
+	.hdisplay = 480,
+	.hsync_start = 480 + 2,
+	.hsync_end = 480 + 2 + 41,
+	.htotal = 480 + 2 + 41 + 2,
+	.vdisplay = 272,
+	.vsync_start = 272 + 2,
+	.vsync_end = 272 + 2 + 10,
+	.vtotal = 272 + 2 + 10 + 2,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_PHSYNC | DRM_MODE_FLAG_PVSYNC,
+};
+
+static const struct panel_desc ampire_am_480272h3tmqw_t01h = {
+	.modes = &ampire_am_480272h3tmqw_t01h_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 105,
+		.height = 67,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+};
+
 static const struct drm_display_mode ampire_am800480r3tmqwa1h_mode = {
 	.clock = 33333,
 	.hdisplay = 800,
@@ -1806,8 +1831,36 @@ static const struct panel_desc urt_umsh_8596md_parallel = {
 	.bus_format = MEDIA_BUS_FMT_RGB666_1X18,
 };
 
+static const struct drm_display_mode winstar_wf35ltiacd_mode = {
+	.clock = 6410,
+	.hdisplay = 320,
+	.hsync_start = 320 + 20,
+	.hsync_end = 320 + 20 + 30,
+	.htotal = 320 + 20 + 30 + 38,
+	.vdisplay = 240,
+	.vsync_start = 240 + 4,
+	.vsync_end = 240 + 4 + 3,
+	.vtotal = 240 + 4 + 3 + 15,
+	.vrefresh = 60,
+	.flags = DRM_MODE_FLAG_NVSYNC | DRM_MODE_FLAG_NHSYNC,
+};
+
+static const struct panel_desc winstar_wf35ltiacd = {
+	.modes = &winstar_wf35ltiacd_mode,
+	.num_modes = 1,
+	.bpc = 8,
+	.size = {
+		.width = 70,
+		.height = 53,
+	},
+	.bus_format = MEDIA_BUS_FMT_RGB888_1X24,
+};
+
 static const struct of_device_id platform_of_match[] = {
 	{
+		.compatible = "ampire,am-480272h3tmqw-t01h",
+		.data = &ampire_am_480272h3tmqw_t01h,
+	}, {
 		.compatible = "ampire,am800480r3tmqwa1h",
 		.data = &ampire_am800480r3tmqwa1h,
 	}, {
@@ -1994,6 +2047,9 @@ static const struct of_device_id platform_of_match[] = {
 		.compatible = "urt,umsh-8596md-20t",
 		.data = &urt_umsh_8596md_parallel,
 	}, {
+		.compatible = "winstar,wf35ltiacd",
+		.data = &winstar_wf35ltiacd,
+	}, {
 		/* sentinel */
 	}
 };
diff --git a/drivers/gpu/drm/panel/panel-sitronix-st7789v.c b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
new file mode 100644
index 000000000000..358c64ef1922
--- /dev/null
+++ b/drivers/gpu/drm/panel/panel-sitronix-st7789v.c
@@ -0,0 +1,449 @@
+/*
+ * Copyright (C) 2017 Free Electrons
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/gpio/consumer.h>
+#include <linux/regulator/consumer.h>
+#include <linux/spi/spi.h>
+
+#include <drm/drmP.h>
+#include <drm/drm_panel.h>
+
+#include <video/mipi_display.h>
+
+#define ST7789V_COLMOD_RGB_FMT_18BITS		(6 << 4)
+#define ST7789V_COLMOD_CTRL_FMT_18BITS		(6 << 0)
+
+#define ST7789V_RAMCTRL_CMD		0xb0
+#define ST7789V_RAMCTRL_RM_RGB			BIT(4)
+#define ST7789V_RAMCTRL_DM_RGB			BIT(0)
+#define ST7789V_RAMCTRL_MAGIC			(3 << 6)
+#define ST7789V_RAMCTRL_EPF(n)			(((n) & 3) << 4)
+
+#define ST7789V_RGBCTRL_CMD		0xb1
+#define ST7789V_RGBCTRL_WO			BIT(7)
+#define ST7789V_RGBCTRL_RCM(n)			(((n) & 3) << 5)
+#define ST7789V_RGBCTRL_VSYNC_HIGH		BIT(3)
+#define ST7789V_RGBCTRL_HSYNC_HIGH		BIT(2)
+#define ST7789V_RGBCTRL_PCLK_HIGH		BIT(1)
+#define ST7789V_RGBCTRL_VBP(n)			((n) & 0x7f)
+#define ST7789V_RGBCTRL_HBP(n)			((n) & 0x1f)
+
+#define ST7789V_PORCTRL_CMD		0xb2
+#define ST7789V_PORCTRL_IDLE_BP(n)		(((n) & 0xf) << 4)
+#define ST7789V_PORCTRL_IDLE_FP(n)		((n) & 0xf)
+#define ST7789V_PORCTRL_PARTIAL_BP(n)		(((n) & 0xf) << 4)
+#define ST7789V_PORCTRL_PARTIAL_FP(n)		((n) & 0xf)
+
+#define ST7789V_GCTRL_CMD		0xb7
+#define ST7789V_GCTRL_VGHS(n)			(((n) & 7) << 4)
+#define ST7789V_GCTRL_VGLS(n)			((n) & 7)
+
+#define ST7789V_VCOMS_CMD		0xbb
+
+#define ST7789V_LCMCTRL_CMD		0xc0
+#define ST7789V_LCMCTRL_XBGR			BIT(5)
+#define ST7789V_LCMCTRL_XMX			BIT(3)
+#define ST7789V_LCMCTRL_XMH			BIT(2)
+
+#define ST7789V_VDVVRHEN_CMD		0xc2
+#define ST7789V_VDVVRHEN_CMDEN			BIT(0)
+
+#define ST7789V_VRHS_CMD		0xc3
+
+#define ST7789V_VDVS_CMD		0xc4
+
+#define ST7789V_FRCTRL2_CMD		0xc6
+
+#define ST7789V_PWCTRL1_CMD		0xd0
+#define ST7789V_PWCTRL1_MAGIC			0xa4
+#define ST7789V_PWCTRL1_AVDD(n)			(((n) & 3) << 6)
+#define ST7789V_PWCTRL1_AVCL(n)			(((n) & 3) << 4)
+#define ST7789V_PWCTRL1_VDS(n)			((n) & 3)
+
+#define ST7789V_PVGAMCTRL_CMD		0xe0
+#define ST7789V_PVGAMCTRL_JP0(n)		(((n) & 3) << 4)
+#define ST7789V_PVGAMCTRL_JP1(n)		(((n) & 3) << 4)
+#define ST7789V_PVGAMCTRL_VP0(n)		((n) & 0xf)
+#define ST7789V_PVGAMCTRL_VP1(n)		((n) & 0x3f)
+#define ST7789V_PVGAMCTRL_VP2(n)		((n) & 0x3f)
+#define ST7789V_PVGAMCTRL_VP4(n)		((n) & 0x1f)
+#define ST7789V_PVGAMCTRL_VP6(n)		((n) & 0x1f)
+#define ST7789V_PVGAMCTRL_VP13(n)		((n) & 0xf)
+#define ST7789V_PVGAMCTRL_VP20(n)		((n) & 0x7f)
+#define ST7789V_PVGAMCTRL_VP27(n)		((n) & 7)
+#define ST7789V_PVGAMCTRL_VP36(n)		(((n) & 7) << 4)
+#define ST7789V_PVGAMCTRL_VP43(n)		((n) & 0x7f)
+#define ST7789V_PVGAMCTRL_VP50(n)		((n) & 0xf)
+#define ST7789V_PVGAMCTRL_VP57(n)		((n) & 0x1f)
+#define ST7789V_PVGAMCTRL_VP59(n)		((n) & 0x1f)
+#define ST7789V_PVGAMCTRL_VP61(n)		((n) & 0x3f)
+#define ST7789V_PVGAMCTRL_VP62(n)		((n) & 0x3f)
+#define ST7789V_PVGAMCTRL_VP63(n)		(((n) & 0xf) << 4)
+
+#define ST7789V_NVGAMCTRL_CMD		0xe1
+#define ST7789V_NVGAMCTRL_JN0(n)		(((n) & 3) << 4)
+#define ST7789V_NVGAMCTRL_JN1(n)		(((n) & 3) << 4)
+#define ST7789V_NVGAMCTRL_VN0(n)		((n) & 0xf)
+#define ST7789V_NVGAMCTRL_VN1(n)		((n) & 0x3f)
+#define ST7789V_NVGAMCTRL_VN2(n)		((n) & 0x3f)
+#define ST7789V_NVGAMCTRL_VN4(n)		((n) & 0x1f)
+#define ST7789V_NVGAMCTRL_VN6(n)		((n) & 0x1f)
+#define ST7789V_NVGAMCTRL_VN13(n)		((n) & 0xf)
+#define ST7789V_NVGAMCTRL_VN20(n)		((n) & 0x7f)
+#define ST7789V_NVGAMCTRL_VN27(n)		((n) & 7)
+#define ST7789V_NVGAMCTRL_VN36(n)		(((n) & 7) << 4)
+#define ST7789V_NVGAMCTRL_VN43(n)		((n) & 0x7f)
+#define ST7789V_NVGAMCTRL_VN50(n)		((n) & 0xf)
+#define ST7789V_NVGAMCTRL_VN57(n)		((n) & 0x1f)
+#define ST7789V_NVGAMCTRL_VN59(n)		((n) & 0x1f)
+#define ST7789V_NVGAMCTRL_VN61(n)		((n) & 0x3f)
+#define ST7789V_NVGAMCTRL_VN62(n)		((n) & 0x3f)
+#define ST7789V_NVGAMCTRL_VN63(n)		(((n) & 0xf) << 4)
+
+#define ST7789V_TEST(val, func)			\
+	do {					\
+		if ((val = (func)))		\
+			return val;		\
+	} while (0)
+
+struct st7789v {
+	struct drm_panel panel;
+	struct spi_device *spi;
+	struct gpio_desc *reset;
+	struct backlight_device *backlight;
+	struct regulator *power;
+};
+
+enum st7789v_prefix {
+	ST7789V_COMMAND = 0,
+	ST7789V_DATA = 1,
+};
+
+static inline struct st7789v *panel_to_st7789v(struct drm_panel *panel)
+{
+	return container_of(panel, struct st7789v, panel);
+}
+
+static int st7789v_spi_write(struct st7789v *ctx, enum st7789v_prefix prefix,
+			     u8 data)
+{
+	struct spi_transfer xfer = { };
+	struct spi_message msg;
+	u16 txbuf = ((prefix & 1) << 8) | data;
+
+	spi_message_init(&msg);
+
+	xfer.tx_buf = &txbuf;
+	xfer.bits_per_word = 9;
+	xfer.len = sizeof(txbuf);
+
+	spi_message_add_tail(&xfer, &msg);
+	return spi_sync(ctx->spi, &msg);
+}
+
+static int st7789v_write_command(struct st7789v *ctx, u8 cmd)
+{
+	return st7789v_spi_write(ctx, ST7789V_COMMAND, cmd);
+}
+
+static int st7789v_write_data(struct st7789v *ctx, u8 cmd)
+{
+	return st7789v_spi_write(ctx, ST7789V_DATA, cmd);
+}
+
+static const struct drm_display_mode default_mode = {
+	.clock = 7000,
+	.hdisplay = 240,
+	.hsync_start = 240 + 38,
+	.hsync_end = 240 + 38 + 10,
+	.htotal = 240 + 38 + 10 + 10,
+	.vdisplay = 320,
+	.vsync_start = 320 + 8,
+	.vsync_end = 320 + 8 + 4,
+	.vtotal = 320 + 8 + 4 + 4,
+	.vrefresh = 60,
+};
+
+static int st7789v_get_modes(struct drm_panel *panel)
+{
+	struct drm_connector *connector = panel->connector;
+	struct drm_display_mode *mode;
+
+	mode = drm_mode_duplicate(panel->drm, &default_mode);
+	if (!mode) {
+		dev_err(panel->drm->dev, "failed to add mode %ux%ux@%u\n",
+			default_mode.hdisplay, default_mode.vdisplay,
+			default_mode.vrefresh);
+		return -ENOMEM;
+	}
+
+	drm_mode_set_name(mode);
+
+	mode->type = DRM_MODE_TYPE_DRIVER | DRM_MODE_TYPE_PREFERRED;
+	drm_mode_probed_add(connector, mode);
+
+	panel->connector->display_info.width_mm = 61;
+	panel->connector->display_info.height_mm = 103;
+
+	return 1;
+}
+
+static int st7789v_prepare(struct drm_panel *panel)
+{
+	struct st7789v *ctx = panel_to_st7789v(panel);
+	int ret;
+
+	ret = regulator_enable(ctx->power);
+	if (ret)
+		return ret;
+
+	gpiod_set_value(ctx->reset, 1);
+	msleep(30);
+	gpiod_set_value(ctx->reset, 0);
+	msleep(120);
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, MIPI_DCS_EXIT_SLEEP_MODE));
+
+	/* We need to wait 120ms after a sleep out command */
+	msleep(120);
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx,
+						MIPI_DCS_SET_ADDRESS_MODE));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx,
+						MIPI_DCS_SET_PIXEL_FORMAT));
+	ST7789V_TEST(ret, st7789v_write_data(ctx,
+					     (MIPI_DCS_PIXEL_FMT_18BIT << 4) |
+					     (MIPI_DCS_PIXEL_FMT_18BIT)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_PORCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0xc));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0xc));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PORCTRL_IDLE_BP(3) |
+					     ST7789V_PORCTRL_IDLE_FP(3)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx,
+					     ST7789V_PORCTRL_PARTIAL_BP(3) |
+					     ST7789V_PORCTRL_PARTIAL_FP(3)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_GCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_GCTRL_VGLS(5) |
+					     ST7789V_GCTRL_VGHS(3)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_VCOMS_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0x2b));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_LCMCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_LCMCTRL_XMH |
+					     ST7789V_LCMCTRL_XMX |
+					     ST7789V_LCMCTRL_XBGR));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_VDVVRHEN_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_VDVVRHEN_CMDEN));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_VRHS_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0xf));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_VDVS_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0x20));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_FRCTRL2_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, 0xf));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_PWCTRL1_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PWCTRL1_MAGIC));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PWCTRL1_AVDD(2) |
+					     ST7789V_PWCTRL1_AVCL(2) |
+					     ST7789V_PWCTRL1_VDS(1)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_PVGAMCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP63(0xd)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP1(0xca)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP2(0xe)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP4(8)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP6(9)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP13(7)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP20(0x2d)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP27(0xb) |
+					     ST7789V_PVGAMCTRL_VP36(3)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP43(0x3d)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_JP1(3) |
+					     ST7789V_PVGAMCTRL_VP50(4)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP57(0xa)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP59(0xa)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP61(0x1b)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_PVGAMCTRL_VP62(0x28)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_NVGAMCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN63(0xd)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN1(0xca)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN2(0xf)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN4(8)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN6(8)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN13(7)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN20(0x2e)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN27(0xc) |
+					     ST7789V_NVGAMCTRL_VN36(5)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN43(0x40)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_JN1(3) |
+					     ST7789V_NVGAMCTRL_VN50(4)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN57(9)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN59(0xb)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN61(0x1b)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_NVGAMCTRL_VN62(0x28)));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, MIPI_DCS_ENTER_INVERT_MODE));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_RAMCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_RAMCTRL_DM_RGB |
+					     ST7789V_RAMCTRL_RM_RGB));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_RAMCTRL_EPF(3) |
+					     ST7789V_RAMCTRL_MAGIC));
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, ST7789V_RGBCTRL_CMD));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_RGBCTRL_WO |
+					     ST7789V_RGBCTRL_RCM(2) |
+					     ST7789V_RGBCTRL_VSYNC_HIGH |
+					     ST7789V_RGBCTRL_HSYNC_HIGH |
+					     ST7789V_RGBCTRL_PCLK_HIGH));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_RGBCTRL_VBP(8)));
+	ST7789V_TEST(ret, st7789v_write_data(ctx, ST7789V_RGBCTRL_HBP(20)));
+
+	return 0;
+}
+
+static int st7789v_enable(struct drm_panel *panel)
+{
+	struct st7789v *ctx = panel_to_st7789v(panel);
+
+	if (ctx->backlight) {
+		ctx->backlight->props.state &= ~BL_CORE_FBBLANK;
+		ctx->backlight->props.power = FB_BLANK_UNBLANK;
+		backlight_update_status(ctx->backlight);
+	}
+
+	return st7789v_write_command(ctx, MIPI_DCS_SET_DISPLAY_ON);
+}
+
+static int st7789v_disable(struct drm_panel *panel)
+{
+	struct st7789v *ctx = panel_to_st7789v(panel);
+	int ret;
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, MIPI_DCS_SET_DISPLAY_OFF));
+
+	if (ctx->backlight) {
+		ctx->backlight->props.power = FB_BLANK_POWERDOWN;
+		ctx->backlight->props.state |= BL_CORE_FBBLANK;
+		backlight_update_status(ctx->backlight);
+	}
+
+	return 0;
+}
+
+static int st7789v_unprepare(struct drm_panel *panel)
+{
+	struct st7789v *ctx = panel_to_st7789v(panel);
+	int ret;
+
+	ST7789V_TEST(ret, st7789v_write_command(ctx, MIPI_DCS_ENTER_SLEEP_MODE));
+
+	regulator_disable(ctx->power);
+
+	return 0;
+}
+
+static const struct drm_panel_funcs st7789v_drm_funcs = {
+	.disable	= st7789v_disable,
+	.enable		= st7789v_enable,
+	.get_modes	= st7789v_get_modes,
+	.prepare	= st7789v_prepare,
+	.unprepare	= st7789v_unprepare,
+};
+
+static int st7789v_probe(struct spi_device *spi)
+{
+	struct device_node *backlight;
+	struct st7789v *ctx;
+	int ret;
+
+	ctx = devm_kzalloc(&spi->dev, sizeof(*ctx), GFP_KERNEL);
+	if (!ctx)
+		return -ENOMEM;
+
+	spi_set_drvdata(spi, ctx);
+	ctx->spi = spi;
+
+	ctx->panel.dev = &spi->dev;
+	ctx->panel.funcs = &st7789v_drm_funcs;
+
+	ctx->power = devm_regulator_get(&spi->dev, "power");
+	if (IS_ERR(ctx->power))
+		return PTR_ERR(ctx->power);
+
+	ctx->reset = devm_gpiod_get(&spi->dev, "reset", GPIOD_OUT_LOW);
+	if (IS_ERR(ctx->reset)) {
+		dev_err(&spi->dev, "Couldn't get our reset line\n");
+		return PTR_ERR(ctx->reset);
+	}
+
+	backlight = of_parse_phandle(spi->dev.of_node, "backlight", 0);
+	if (backlight) {
+		ctx->backlight = of_find_backlight_by_node(backlight);
+		of_node_put(backlight);
+
+		if (!ctx->backlight)
+			return -EPROBE_DEFER;
+	}
+
+	ret = drm_panel_add(&ctx->panel);
+	if (ret < 0)
+		goto err_free_backlight;
+
+	return 0;
+
+err_free_backlight:
+	if (ctx->backlight)
+		put_device(&ctx->backlight->dev);
+
+	return ret;
+}
+
+static int st7789v_remove(struct spi_device *spi)
+{
+	struct st7789v *ctx = spi_get_drvdata(spi);
+
+	drm_panel_detach(&ctx->panel);
+	drm_panel_remove(&ctx->panel);
+
+	if (ctx->backlight)
+		put_device(&ctx->backlight->dev);
+
+	return 0;
+}
+
+static const struct of_device_id st7789v_of_match[] = {
+	{ .compatible = "sitronix,st7789v" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, st7789v_of_match);
+
+static struct spi_driver st7789v_driver = {
+	.probe = st7789v_probe,
+	.remove = st7789v_remove,
+	.driver = {
+		.name = "st7789v",
+		.of_match_table = st7789v_of_match,
+	},
+};
+module_spi_driver(st7789v_driver);
+
+MODULE_AUTHOR("Maxime Ripard <maxime.ripard@free-electrons.com>");
+MODULE_DESCRIPTION("Sitronix st7789v LCD Driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/gpu/drm/qxl/qxl_ttm.c b/drivers/gpu/drm/qxl/qxl_ttm.c
index 7d1cab57c89e..0fdedee4509d 100644
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@@ -393,6 +393,7 @@ static struct ttm_bo_driver qxl_bo_driver = {
 	.verify_access = &qxl_verify_access,
 	.io_mem_reserve = &qxl_ttm_io_mem_reserve,
 	.io_mem_free = &qxl_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &qxl_bo_move_notify,
 };
 
diff --git a/drivers/gpu/drm/radeon/r100.c b/drivers/gpu/drm/radeon/r100.c
index aefca0b03f38..c31e660e35db 100644
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@@ -3295,7 +3295,7 @@ void r100_bandwidth_update(struct radeon_device *rdev)
 		mem_trp = ((temp >> 8) & 0x7) + 1;
 		mem_tras = ((temp >> 11) & 0xf) + 4;
 	} else if (rdev->family == CHIP_RV350 ||
-		   rdev->family <= CHIP_RV380) {
+		   rdev->family == CHIP_RV380) {
 		/* rv3x0 */
 		mem_trcd = (temp & 0x7) + 3;
 		mem_trp = ((temp >> 8) & 0x7) + 3;
diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 2e400dc414e3..c1c8e2208a21 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -499,6 +499,7 @@ struct radeon_bo {
 	u32				tiling_flags;
 	u32				pitch;
 	int				surface_reg;
+	unsigned			prime_shared_count;
 	/* list of all virtual address to which this bo
 	 * is associated to
 	 */
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index a8442f7196d6..df6b58c08544 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -164,6 +164,16 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			p->relocs[i].allowed_domains = domain;
 		}
 
+		/* Objects shared as dma-bufs cannot be moved to VRAM */
+		if (p->relocs[i].robj->prime_shared_count) {
+			p->relocs[i].allowed_domains &= ~RADEON_GEM_DOMAIN_VRAM;
+			if (!p->relocs[i].allowed_domains) {
+				DRM_ERROR("BO associated with dma-buf cannot "
+					  "be moved to VRAM\n");
+				return -EINVAL;
+			}
+		}
+
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].tv.shared = !r->write_domain;
 
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index 146297a702ab..17d3dafc8319 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -232,7 +232,8 @@ void radeon_crtc_fb_gamma_get(struct drm_crtc *crtc, u16 *red, u16 *green,
 }
 
 static int radeon_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				 u16 *blue, uint32_t size)
+				 u16 *blue, uint32_t size,
+				 struct drm_modeset_acquire_ctx *ctx)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
 	int i;
@@ -1354,6 +1355,12 @@ radeon_user_framebuffer_create(struct drm_device *dev,
 		return ERR_PTR(-ENOENT);
 	}
 
+	/* Handle is imported dma-buf, so cannot be migrated to VRAM for scanout */
+	if (obj->import_attach) {
+		DRM_DEBUG_KMS("Cannot create framebuffer from imported dma_buf\n");
+		return ERR_PTR(-EINVAL);
+	}
+
 	radeon_fb = kzalloc(sizeof(*radeon_fb), GFP_KERNEL);
 	if (radeon_fb == NULL) {
 		drm_gem_object_unreference_unlocked(obj);
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 9b0b123ce079..dddb372de2b9 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -120,6 +120,10 @@ static int radeon_gem_set_domain(struct drm_gem_object *gobj,
 			return r;
 		}
 	}
+	if (domain == RADEON_GEM_DOMAIN_VRAM && robj->prime_shared_count) {
+		/* A BO that is associated with a dma-buf cannot be sensibly migrated to VRAM */
+		return -EINVAL;
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_object.c b/drivers/gpu/drm/radeon/radeon_object.c
index 74b276060c20..bec2ec056de4 100644
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@@ -352,6 +352,11 @@ int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
 
 		return 0;
 	}
+	if (bo->prime_shared_count && domain == RADEON_GEM_DOMAIN_VRAM) {
+		/* A BO shared as a dma-buf cannot be sensibly migrated to VRAM */
+		return -EINVAL;
+	}
+
 	radeon_ttm_placement_from_domain(bo, domain);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		/* force to pin into visible video ram */
diff --git a/drivers/gpu/drm/radeon/radeon_prime.c b/drivers/gpu/drm/radeon/radeon_prime.c
index f3609c97496b..7110d403322c 100644
--- a/drivers/gpu/drm/radeon/radeon_prime.c
+++ b/drivers/gpu/drm/radeon/radeon_prime.c
@@ -77,6 +77,7 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
 	list_add_tail(&bo->list, &rdev->gem.objects);
 	mutex_unlock(&rdev->gem.mutex);
 
+	bo->prime_shared_count = 1;
 	return &bo->gem_base;
 }
 
@@ -91,6 +92,9 @@ int radeon_gem_prime_pin(struct drm_gem_object *obj)
 
 	/* pin buffer into GTT */
 	ret = radeon_bo_pin(bo, RADEON_GEM_DOMAIN_GTT, NULL);
+	if (likely(ret == 0))
+		bo->prime_shared_count++;
+
 	radeon_bo_unreserve(bo);
 	return ret;
 }
@@ -105,6 +109,8 @@ void radeon_gem_prime_unpin(struct drm_gem_object *obj)
 		return;
 
 	radeon_bo_unpin(bo);
+	if (bo->prime_shared_count)
+		bo->prime_shared_count--;
 	radeon_bo_unreserve(bo);
 }
 
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 684f1703aa5c..8b7623b5a624 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -213,8 +213,8 @@ static void radeon_evict_flags(struct ttm_buffer_object *bo,
 			rbo->placement.num_busy_placement = 0;
 			for (i = 0; i < rbo->placement.num_placement; i++) {
 				if (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) {
-					if (rbo->placements[0].fpfn < fpfn)
-						rbo->placements[0].fpfn = fpfn;
+					if (rbo->placements[i].fpfn < fpfn)
+						rbo->placements[i].fpfn = fpfn;
 				} else {
 					rbo->placement.busy_placement =
 						&rbo->placements[i];
@@ -873,6 +873,7 @@ static struct ttm_bo_driver radeon_bo_driver = {
 	.fault_reserve_notify = &radeon_bo_fault_reserve_notify,
 	.io_mem_reserve = &radeon_ttm_io_mem_reserve,
 	.io_mem_free = &radeon_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
 
 int radeon_ttm_init(struct radeon_device *rdev)
diff --git a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
index 91ebe5c2c7a0..d8fa7a9c9240 100644
--- a/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
+++ b/drivers/gpu/drm/rockchip/analogix_dp-rockchip.c
@@ -417,7 +417,8 @@ static void rockchip_dp_unbind(struct device *dev, struct device *master,
 
 	rockchip_drm_psr_unregister(&dp->encoder);
 
-	return analogix_dp_unbind(dev, master, data);
+	analogix_dp_unbind(dev, master, data);
+	clk_disable_unprepare(dp->pclk);
 }
 
 static const struct component_ops rockchip_dp_component_ops = {
@@ -428,31 +429,13 @@ static const struct component_ops rockchip_dp_component_ops = {
 static int rockchip_dp_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct device_node *panel_node, *port, *endpoint;
 	struct drm_panel *panel = NULL;
 	struct rockchip_dp_device *dp;
+	int ret;
 
-	port = of_graph_get_port_by_id(dev->of_node, 1);
-	if (port) {
-		endpoint = of_get_child_by_name(port, "endpoint");
-		of_node_put(port);
-		if (!endpoint) {
-			dev_err(dev, "no output endpoint found\n");
-			return -EINVAL;
-		}
-
-		panel_node = of_graph_get_remote_port_parent(endpoint);
-		of_node_put(endpoint);
-		if (!panel_node) {
-			dev_err(dev, "no output node found\n");
-			return -EINVAL;
-		}
-
-		panel = of_drm_find_panel(panel_node);
-		of_node_put(panel_node);
-		if (!panel)
-			return -EPROBE_DEFER;
-	}
+	ret = drm_of_find_panel_or_bridge(dev->of_node, 1, 0, &panel, NULL);
+	if (ret)
+		return ret;
 
 	dp = devm_kzalloc(dev, sizeof(*dp), GFP_KERNEL);
 	if (!dp)
diff --git a/drivers/gpu/drm/rockchip/cdn-dp-core.c b/drivers/gpu/drm/rockchip/cdn-dp-core.c
index 4e55d63c3ef3..a2169dd3d26b 100644
--- a/drivers/gpu/drm/rockchip/cdn-dp-core.c
+++ b/drivers/gpu/drm/rockchip/cdn-dp-core.c
@@ -1053,6 +1053,7 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data)
 	dp->connected = false;
 	dp->active = false;
 	dp->active_port = -1;
+	dp->fw_loaded = false;
 
 	INIT_WORK(&dp->event_work, cdn_dp_pd_event_work);
 
@@ -1091,8 +1092,6 @@ static int cdn_dp_bind(struct device *dev, struct device *master, void *data)
 		goto err_free_connector;
 	}
 
-	cdn_dp_audio_codec_init(dp, dev);
-
 	for (i = 0; i < dp->ports; i++) {
 		port = dp->port[i];
 
@@ -1127,13 +1126,13 @@ static void cdn_dp_unbind(struct device *dev, struct device *master, void *data)
 	struct drm_connector *connector = &dp->connector;
 
 	cancel_work_sync(&dp->event_work);
-	platform_device_unregister(dp->audio_pdev);
 	cdn_dp_encoder_disable(encoder);
 	encoder->funcs->destroy(encoder);
 	connector->funcs->destroy(connector);
 
 	pm_runtime_disable(dev);
-	release_firmware(dp->fw);
+	if (dp->fw_loaded)
+		release_firmware(dp->fw);
 	kfree(dp->edid);
 	dp->edid = NULL;
 }
@@ -1219,6 +1218,8 @@ static int cdn_dp_probe(struct platform_device *pdev)
 	mutex_init(&dp->lock);
 	dev_set_drvdata(dev, dp);
 
+	cdn_dp_audio_codec_init(dp, dev);
+
 	return component_add(dev, &cdn_dp_component_ops);
 }
 
@@ -1226,6 +1227,7 @@ static int cdn_dp_remove(struct platform_device *pdev)
 {
 	struct cdn_dp_device *dp = platform_get_drvdata(pdev);
 
+	platform_device_unregister(dp->audio_pdev);
 	cdn_dp_suspend(dp->dev);
 	component_del(&pdev->dev, &cdn_dp_component_ops);
 
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index cd7d02e1f758..c6b1b7f3a2a3 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -136,21 +136,24 @@ static int rockchip_drm_bind(struct device *dev)
 	INIT_LIST_HEAD(&private->psr_list);
 	spin_lock_init(&private->psr_list_lock);
 
+	ret = rockchip_drm_init_iommu(drm_dev);
+	if (ret)
+		goto err_free;
+
 	drm_mode_config_init(drm_dev);
 
 	rockchip_drm_mode_config_init(drm_dev);
 
-	ret = rockchip_drm_init_iommu(drm_dev);
-	if (ret)
-		goto err_config_cleanup;
-
 	/* Try to bind all sub drivers. */
 	ret = component_bind_all(dev, drm_dev);
 	if (ret)
-		goto err_iommu_cleanup;
+		goto err_mode_config_cleanup;
 
-	/* init kms poll for handling hpd */
-	drm_kms_helper_poll_init(drm_dev);
+	ret = drm_vblank_init(drm_dev, drm_dev->mode_config.num_crtc);
+	if (ret)
+		goto err_unbind_all;
+
+	drm_mode_config_reset(drm_dev);
 
 	/*
 	 * enable drm irq mode.
@@ -158,15 +161,12 @@ static int rockchip_drm_bind(struct device *dev)
 	 */
 	drm_dev->irq_enabled = true;
 
-	ret = drm_vblank_init(drm_dev, ROCKCHIP_MAX_CRTC);
-	if (ret)
-		goto err_kms_helper_poll_fini;
-
-	drm_mode_config_reset(drm_dev);
+	/* init kms poll for handling hpd */
+	drm_kms_helper_poll_init(drm_dev);
 
 	ret = rockchip_drm_fbdev_init(drm_dev);
 	if (ret)
-		goto err_vblank_cleanup;
+		goto err_kms_helper_poll_fini;
 
 	ret = drm_dev_register(drm_dev, 0);
 	if (ret)
@@ -175,17 +175,17 @@ static int rockchip_drm_bind(struct device *dev)
 	return 0;
 err_fbdev_fini:
 	rockchip_drm_fbdev_fini(drm_dev);
-err_vblank_cleanup:
-	drm_vblank_cleanup(drm_dev);
 err_kms_helper_poll_fini:
 	drm_kms_helper_poll_fini(drm_dev);
+	drm_vblank_cleanup(drm_dev);
+err_unbind_all:
 	component_unbind_all(dev, drm_dev);
-err_iommu_cleanup:
-	rockchip_iommu_cleanup(drm_dev);
-err_config_cleanup:
+err_mode_config_cleanup:
 	drm_mode_config_cleanup(drm_dev);
-	drm_dev->dev_private = NULL;
+	rockchip_iommu_cleanup(drm_dev);
 err_free:
+	drm_dev->dev_private = NULL;
+	dev_set_drvdata(dev, NULL);
 	drm_dev_unref(drm_dev);
 	return ret;
 }
@@ -194,16 +194,20 @@ static void rockchip_drm_unbind(struct device *dev)
 {
 	struct drm_device *drm_dev = dev_get_drvdata(dev);
 
+	drm_dev_unregister(drm_dev);
+
 	rockchip_drm_fbdev_fini(drm_dev);
-	drm_vblank_cleanup(drm_dev);
 	drm_kms_helper_poll_fini(drm_dev);
+
+	drm_atomic_helper_shutdown(drm_dev);
+	drm_vblank_cleanup(drm_dev);
 	component_unbind_all(dev, drm_dev);
-	rockchip_iommu_cleanup(drm_dev);
 	drm_mode_config_cleanup(drm_dev);
+	rockchip_iommu_cleanup(drm_dev);
+
 	drm_dev->dev_private = NULL;
-	drm_dev_unregister(drm_dev);
-	drm_dev_unref(drm_dev);
 	dev_set_drvdata(dev, NULL);
+	drm_dev_unref(drm_dev);
 }
 
 static void rockchip_drm_lastclose(struct drm_device *dev)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
index 2151e1cee4b4..3f7a82d1e095 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_vop.c
@@ -506,7 +506,7 @@ static int vop_enable(struct drm_crtc *crtc)
 	ret = pm_runtime_get_sync(vop->dev);
 	if (ret < 0) {
 		dev_err(vop->dev, "failed to get pm runtime: %d\n", ret);
-		goto err_put_pm_runtime;
+		return ret;
 	}
 
 	ret = clk_enable(vop->hclk);
@@ -1405,10 +1405,16 @@ static int vop_initial(struct vop *vop)
 		return PTR_ERR(vop->dclk);
 	}
 
+	ret = pm_runtime_get_sync(vop->dev);
+	if (ret < 0) {
+		dev_err(vop->dev, "failed to get pm runtime: %d\n", ret);
+		return ret;
+	}
+
 	ret = clk_prepare(vop->dclk);
 	if (ret < 0) {
 		dev_err(vop->dev, "failed to prepare dclk\n");
-		return ret;
+		goto err_put_pm_runtime;
 	}
 
 	/* Enable both the hclk and aclk to setup the vop */
@@ -1468,6 +1474,8 @@ static int vop_initial(struct vop *vop)
 
 	vop->is_enabled = false;
 
+	pm_runtime_put_sync(vop->dev);
+
 	return 0;
 
 err_disable_aclk:
@@ -1476,6 +1484,8 @@ err_disable_hclk:
 	clk_disable_unprepare(vop->hclk);
 err_unprepare_dclk:
 	clk_unprepare(vop->dclk);
+err_put_pm_runtime:
+	pm_runtime_put_sync(vop->dev);
 	return ret;
 }
 
@@ -1576,12 +1586,6 @@ static int vop_bind(struct device *dev, struct device *master, void *data)
 	if (!vop->regsbak)
 		return -ENOMEM;
 
-	ret = vop_initial(vop);
-	if (ret < 0) {
-		dev_err(&pdev->dev, "cannot initial vop dev - err %d\n", ret);
-		return ret;
-	}
-
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0) {
 		dev_err(dev, "cannot find irq for vop\n");
@@ -1608,8 +1612,17 @@ static int vop_bind(struct device *dev, struct device *master, void *data)
 
 	pm_runtime_enable(&pdev->dev);
 
+	ret = vop_initial(vop);
+	if (ret < 0) {
+		dev_err(&pdev->dev, "cannot initial vop dev - err %d\n", ret);
+		goto err_disable_pm_runtime;
+	}
+
 	return 0;
 
+err_disable_pm_runtime:
+	pm_runtime_disable(&pdev->dev);
+	vop_destroy_crtc(vop);
 err_enable_irq:
 	enable_irq(vop->irq); /* To balance out the disable_irq above */
 	return ret;
@@ -1621,6 +1634,10 @@ static void vop_unbind(struct device *dev, struct device *master, void *data)
 
 	pm_runtime_disable(dev);
 	vop_destroy_crtc(vop);
+
+	clk_unprepare(vop->aclk);
+	clk_unprepare(vop->hclk);
+	clk_unprepare(vop->dclk);
 }
 
 const struct component_ops vop_component_ops = {
diff --git a/drivers/gpu/drm/sti/sti_gdp.c b/drivers/gpu/drm/sti/sti_gdp.c
index 86279f5022c2..88f16cdf6a4b 100644
--- a/drivers/gpu/drm/sti/sti_gdp.c
+++ b/drivers/gpu/drm/sti/sti_gdp.c
@@ -66,7 +66,9 @@ static struct gdp_format_to_str {
 #define GAM_GDP_ALPHARANGE_255  BIT(5)
 #define GAM_GDP_AGC_FULL_RANGE  0x00808080
 #define GAM_GDP_PPT_IGNORE      (BIT(1) | BIT(0))
-#define GAM_GDP_SIZE_MAX        0x7FF
+
+#define GAM_GDP_SIZE_MAX_WIDTH  3840
+#define GAM_GDP_SIZE_MAX_HEIGHT 2160
 
 #define GDP_NODE_NB_BANK        2
 #define GDP_NODE_PER_FIELD      2
@@ -632,8 +634,8 @@ static int sti_gdp_atomic_check(struct drm_plane *drm_plane,
 	/* src_x are in 16.16 format */
 	src_x = state->src_x >> 16;
 	src_y = state->src_y >> 16;
-	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
-	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX_WIDTH);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX_HEIGHT);
 
 	format = sti_gdp_fourcc2format(fb->format->format);
 	if (format == -1) {
@@ -741,8 +743,8 @@ static void sti_gdp_atomic_update(struct drm_plane *drm_plane,
 	/* src_x are in 16.16 format */
 	src_x = state->src_x >> 16;
 	src_y = state->src_y >> 16;
-	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX);
-	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX);
+	src_w = clamp_val(state->src_w >> 16, 0, GAM_GDP_SIZE_MAX_WIDTH);
+	src_h = clamp_val(state->src_h >> 16, 0, GAM_GDP_SIZE_MAX_HEIGHT);
 
 	list = sti_gdp_get_free_nodes(gdp);
 	top_field = list->top_field;
diff --git a/drivers/gpu/drm/sun4i/Makefile b/drivers/gpu/drm/sun4i/Makefile
index d625a82a6e5f..59b757350a1f 100644
--- a/drivers/gpu/drm/sun4i/Makefile
+++ b/drivers/gpu/drm/sun4i/Makefile
@@ -1,11 +1,11 @@
-sun4i-drm-y += sun4i_crtc.o
 sun4i-drm-y += sun4i_drv.o
 sun4i-drm-y += sun4i_framebuffer.o
-sun4i-drm-y += sun4i_layer.o
 
 sun4i-tcon-y += sun4i_tcon.o
 sun4i-tcon-y += sun4i_rgb.o
 sun4i-tcon-y += sun4i_dotclock.o
+sun4i-tcon-y += sun4i_crtc.o
+sun4i-tcon-y += sun4i_layer.o
 
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i-drm.o sun4i-tcon.o
 obj-$(CONFIG_DRM_SUN4I)		+= sun4i_backend.o
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index 08ce15070f80..d660741ba475 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -24,7 +24,7 @@
 #include "sun4i_backend.h"
 #include "sun4i_drv.h"
 
-static u32 sunxi_rgb2yuv_coef[12] = {
+static const u32 sunxi_rgb2yuv_coef[12] = {
 	0x00000107, 0x00000204, 0x00000064, 0x00000108,
 	0x00003f69, 0x00003ed6, 0x000001c1, 0x00000808,
 	0x000001c1, 0x00003e88, 0x00003fb8, 0x00000808
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.c b/drivers/gpu/drm/sun4i/sun4i_crtc.c
index a5d546a68e16..3c876c3a356a 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.c
@@ -19,6 +19,7 @@
 #include <linux/clk-provider.h>
 #include <linux/ioport.h>
 #include <linux/of_address.h>
+#include <linux/of_graph.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 
@@ -27,6 +28,7 @@
 #include "sun4i_backend.h"
 #include "sun4i_crtc.h"
 #include "sun4i_drv.h"
+#include "sun4i_layer.h"
 #include "sun4i_tcon.h"
 
 static void sun4i_crtc_atomic_begin(struct drm_crtc *crtc,
@@ -50,12 +52,11 @@ static void sun4i_crtc_atomic_flush(struct drm_crtc *crtc,
 				    struct drm_crtc_state *old_state)
 {
 	struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
-	struct sun4i_drv *drv = scrtc->drv;
 	struct drm_pending_vblank_event *event = crtc->state->event;
 
 	DRM_DEBUG_DRIVER("Committing plane changes\n");
 
-	sun4i_backend_commit(drv->backend);
+	sun4i_backend_commit(scrtc->backend);
 
 	if (event) {
 		crtc->state->event = NULL;
@@ -72,11 +73,10 @@ static void sun4i_crtc_atomic_flush(struct drm_crtc *crtc,
 static void sun4i_crtc_disable(struct drm_crtc *crtc)
 {
 	struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
-	struct sun4i_drv *drv = scrtc->drv;
 
 	DRM_DEBUG_DRIVER("Disabling the CRTC\n");
 
-	sun4i_tcon_disable(drv->tcon);
+	sun4i_tcon_disable(scrtc->tcon);
 
 	if (crtc->state->event && !crtc->state->active) {
 		spin_lock_irq(&crtc->dev->event_lock);
@@ -90,11 +90,10 @@ static void sun4i_crtc_disable(struct drm_crtc *crtc)
 static void sun4i_crtc_enable(struct drm_crtc *crtc)
 {
 	struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
-	struct sun4i_drv *drv = scrtc->drv;
 
 	DRM_DEBUG_DRIVER("Enabling the CRTC\n");
 
-	sun4i_tcon_enable(drv->tcon);
+	sun4i_tcon_enable(scrtc->tcon);
 }
 
 static const struct drm_crtc_helper_funcs sun4i_crtc_helper_funcs = {
@@ -107,11 +106,10 @@ static const struct drm_crtc_helper_funcs sun4i_crtc_helper_funcs = {
 static int sun4i_crtc_enable_vblank(struct drm_crtc *crtc)
 {
 	struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
-	struct sun4i_drv *drv = scrtc->drv;
 
 	DRM_DEBUG_DRIVER("Enabling VBLANK on crtc %p\n", crtc);
 
-	sun4i_tcon_enable_vblank(drv->tcon, true);
+	sun4i_tcon_enable_vblank(scrtc->tcon, true);
 
 	return 0;
 }
@@ -119,11 +117,10 @@ static int sun4i_crtc_enable_vblank(struct drm_crtc *crtc)
 static void sun4i_crtc_disable_vblank(struct drm_crtc *crtc)
 {
 	struct sun4i_crtc *scrtc = drm_crtc_to_sun4i_crtc(crtc);
-	struct sun4i_drv *drv = scrtc->drv;
 
 	DRM_DEBUG_DRIVER("Disabling VBLANK on crtc %p\n", crtc);
 
-	sun4i_tcon_enable_vblank(drv->tcon, false);
+	sun4i_tcon_enable_vblank(scrtc->tcon, false);
 }
 
 static const struct drm_crtc_funcs sun4i_crtc_funcs = {
@@ -137,28 +134,67 @@ static const struct drm_crtc_funcs sun4i_crtc_funcs = {
 	.disable_vblank		= sun4i_crtc_disable_vblank,
 };
 
-struct sun4i_crtc *sun4i_crtc_init(struct drm_device *drm)
+struct sun4i_crtc *sun4i_crtc_init(struct drm_device *drm,
+				   struct sun4i_backend *backend,
+				   struct sun4i_tcon *tcon)
 {
-	struct sun4i_drv *drv = drm->dev_private;
 	struct sun4i_crtc *scrtc;
-	int ret;
+	struct drm_plane *primary = NULL, *cursor = NULL;
+	int ret, i;
 
 	scrtc = devm_kzalloc(drm->dev, sizeof(*scrtc), GFP_KERNEL);
 	if (!scrtc)
+		return ERR_PTR(-ENOMEM);
+	scrtc->backend = backend;
+	scrtc->tcon = tcon;
+
+	/* Create our layers */
+	scrtc->layers = sun4i_layers_init(drm, scrtc->backend);
+	if (IS_ERR(scrtc->layers)) {
+		dev_err(drm->dev, "Couldn't create the planes\n");
 		return NULL;
-	scrtc->drv = drv;
+	}
+
+	/* find primary and cursor planes for drm_crtc_init_with_planes */
+	for (i = 0; scrtc->layers[i]; i++) {
+		struct sun4i_layer *layer = scrtc->layers[i];
+
+		switch (layer->plane.type) {
+		case DRM_PLANE_TYPE_PRIMARY:
+			primary = &layer->plane;
+			break;
+		case DRM_PLANE_TYPE_CURSOR:
+			cursor = &layer->plane;
+			break;
+		default:
+			break;
+		}
+	}
 
 	ret = drm_crtc_init_with_planes(drm, &scrtc->crtc,
-					drv->primary,
-					NULL,
+					primary,
+					cursor,
 					&sun4i_crtc_funcs,
 					NULL);
 	if (ret) {
 		dev_err(drm->dev, "Couldn't init DRM CRTC\n");
-		return NULL;
+		return ERR_PTR(ret);
 	}
 
 	drm_crtc_helper_add(&scrtc->crtc, &sun4i_crtc_helper_funcs);
 
+	/* Set crtc.port to output port node of the tcon */
+	scrtc->crtc.port = of_graph_get_port_by_id(scrtc->tcon->dev->of_node,
+						   1);
+
+	/* Set possible_crtcs to this crtc for overlay planes */
+	for (i = 0; scrtc->layers[i]; i++) {
+		uint32_t possible_crtcs = BIT(drm_crtc_index(&scrtc->crtc));
+		struct sun4i_layer *layer = scrtc->layers[i];
+
+		if (layer->plane.type == DRM_PLANE_TYPE_OVERLAY)
+			layer->plane.possible_crtcs = possible_crtcs;
+	}
+
 	return scrtc;
 }
diff --git a/drivers/gpu/drm/sun4i/sun4i_crtc.h b/drivers/gpu/drm/sun4i/sun4i_crtc.h
index dec8ce4d9b25..230cb8f0d601 100644
--- a/drivers/gpu/drm/sun4i/sun4i_crtc.h
+++ b/drivers/gpu/drm/sun4i/sun4i_crtc.h
@@ -17,7 +17,9 @@ struct sun4i_crtc {
 	struct drm_crtc			crtc;
 	struct drm_pending_vblank_event	*event;
 
-	struct sun4i_drv		*drv;
+	struct sun4i_backend		*backend;
+	struct sun4i_tcon		*tcon;
+	struct sun4i_layer		**layers;
 };
 
 static inline struct sun4i_crtc *drm_crtc_to_sun4i_crtc(struct drm_crtc *crtc)
@@ -25,6 +27,8 @@ static inline struct sun4i_crtc *drm_crtc_to_sun4i_crtc(struct drm_crtc *crtc)
 	return container_of(crtc, struct sun4i_crtc, crtc);
 }
 
-struct sun4i_crtc *sun4i_crtc_init(struct drm_device *drm);
+struct sun4i_crtc *sun4i_crtc_init(struct drm_device *drm,
+				   struct sun4i_backend *backend,
+				   struct sun4i_tcon *tcon);
 
 #endif /* _SUN4I_CRTC_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.c b/drivers/gpu/drm/sun4i/sun4i_drv.c
index 329ea56106a5..8ddd72cd5873 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.c
@@ -12,6 +12,7 @@
 
 #include <linux/component.h>
 #include <linux/of_graph.h>
+#include <linux/of_reserved_mem.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
@@ -20,10 +21,9 @@
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_of.h>
 
-#include "sun4i_crtc.h"
 #include "sun4i_drv.h"
 #include "sun4i_framebuffer.h"
-#include "sun4i_layer.h"
+#include "sun4i_tcon.h"
 
 DEFINE_DRM_GEM_CMA_FOPS(sun4i_drv_fops);
 
@@ -92,30 +92,25 @@ static int sun4i_drv_bind(struct device *dev)
 	}
 	drm->dev_private = drv;
 
-	drm_vblank_init(drm, 1);
+	ret = of_reserved_mem_device_init(dev);
+	if (ret && ret != -ENODEV) {
+		dev_err(drm->dev, "Couldn't claim our memory region\n");
+		goto free_drm;
+	}
+
+	/* drm_vblank_init calls kcalloc, which can fail */
+	ret = drm_vblank_init(drm, 1);
+	if (ret)
+		goto free_mem_region;
+
 	drm_mode_config_init(drm);
 
 	ret = component_bind_all(drm->dev, drm);
 	if (ret) {
 		dev_err(drm->dev, "Couldn't bind all pipelines components\n");
-		goto free_drm;
-	}
-
-	/* Create our layers */
-	drv->layers = sun4i_layers_init(drm);
-	if (IS_ERR(drv->layers)) {
-		dev_err(drm->dev, "Couldn't create the planes\n");
-		ret = PTR_ERR(drv->layers);
-		goto free_drm;
+		goto cleanup_mode_config;
 	}
 
-	/* Create our CRTC */
-	drv->crtc = sun4i_crtc_init(drm);
-	if (!drv->crtc) {
-		dev_err(drm->dev, "Couldn't create the CRTC\n");
-		ret = -EINVAL;
-		goto free_drm;
-	}
 	drm->irq_enabled = true;
 
 	/* Remove early framebuffers (ie. simplefb) */
@@ -126,7 +121,7 @@ static int sun4i_drv_bind(struct device *dev)
 	if (IS_ERR(drv->fbdev)) {
 		dev_err(drm->dev, "Couldn't create our framebuffer\n");
 		ret = PTR_ERR(drv->fbdev);
-		goto free_drm;
+		goto cleanup_mode_config;
 	}
 
 	/* Enable connectors polling */
@@ -134,10 +129,18 @@ static int sun4i_drv_bind(struct device *dev)
 
 	ret = drm_dev_register(drm, 0);
 	if (ret)
-		goto free_drm;
+		goto finish_poll;
 
 	return 0;
 
+finish_poll:
+	drm_kms_helper_poll_fini(drm);
+	sun4i_framebuffer_free(drm);
+cleanup_mode_config:
+	drm_mode_config_cleanup(drm);
+	drm_vblank_cleanup(drm);
+free_mem_region:
+	of_reserved_mem_device_release(dev);
 free_drm:
 	drm_dev_unref(drm);
 	return ret;
@@ -150,7 +153,9 @@ static void sun4i_drv_unbind(struct device *dev)
 	drm_dev_unregister(drm);
 	drm_kms_helper_poll_fini(drm);
 	sun4i_framebuffer_free(drm);
+	drm_mode_config_cleanup(drm);
 	drm_vblank_cleanup(drm);
+	of_reserved_mem_device_release(dev);
 	drm_dev_unref(drm);
 }
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_drv.h b/drivers/gpu/drm/sun4i/sun4i_drv.h
index 597353eab728..5df50126ff52 100644
--- a/drivers/gpu/drm/sun4i/sun4i_drv.h
+++ b/drivers/gpu/drm/sun4i/sun4i_drv.h
@@ -18,13 +18,9 @@
 
 struct sun4i_drv {
 	struct sun4i_backend	*backend;
-	struct sun4i_crtc	*crtc;
 	struct sun4i_tcon	*tcon;
 
-	struct drm_plane	*primary;
 	struct drm_fbdev_cma	*fbdev;
-
-	struct sun4i_layer	**layers;
 };
 
 #endif /* _SUN4I_DRV_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
index 2c3beff8b53e..9872e0fc03b0 100644
--- a/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_framebuffer.c
@@ -48,5 +48,4 @@ void sun4i_framebuffer_free(struct drm_device *drm)
 	struct sun4i_drv *drv = drm->dev_private;
 
 	drm_fbdev_cma_fini(drv->fbdev);
-	drm_mode_config_cleanup(drm);
 }
diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.c b/drivers/gpu/drm/sun4i/sun4i_layer.c
index 5d53c977bca5..f26bde5b9117 100644
--- a/drivers/gpu/drm/sun4i/sun4i_layer.c
+++ b/drivers/gpu/drm/sun4i/sun4i_layer.c
@@ -16,7 +16,6 @@
 #include <drm/drmP.h>
 
 #include "sun4i_backend.h"
-#include "sun4i_drv.h"
 #include "sun4i_layer.h"
 
 struct sun4i_plane_desc {
@@ -36,8 +35,7 @@ static void sun4i_backend_layer_atomic_disable(struct drm_plane *plane,
 					       struct drm_plane_state *old_state)
 {
 	struct sun4i_layer *layer = plane_to_sun4i_layer(plane);
-	struct sun4i_drv *drv = layer->drv;
-	struct sun4i_backend *backend = drv->backend;
+	struct sun4i_backend *backend = layer->backend;
 
 	sun4i_backend_layer_enable(backend, layer->id, false);
 }
@@ -46,8 +44,7 @@ static void sun4i_backend_layer_atomic_update(struct drm_plane *plane,
 					      struct drm_plane_state *old_state)
 {
 	struct sun4i_layer *layer = plane_to_sun4i_layer(plane);
-	struct sun4i_drv *drv = layer->drv;
-	struct sun4i_backend *backend = drv->backend;
+	struct sun4i_backend *backend = layer->backend;
 
 	sun4i_backend_update_layer_coord(backend, layer->id, plane);
 	sun4i_backend_update_layer_formats(backend, layer->id, plane);
@@ -104,9 +101,9 @@ static const struct sun4i_plane_desc sun4i_backend_planes[] = {
 };
 
 static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm,
+						struct sun4i_backend *backend,
 						const struct sun4i_plane_desc *plane)
 {
-	struct sun4i_drv *drv = drm->dev_private;
 	struct sun4i_layer *layer;
 	int ret;
 
@@ -114,7 +111,8 @@ static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm,
 	if (!layer)
 		return ERR_PTR(-ENOMEM);
 
-	ret = drm_universal_plane_init(drm, &layer->plane, BIT(0),
+	/* possible crtcs are set later */
+	ret = drm_universal_plane_init(drm, &layer->plane, 0,
 				       &sun4i_backend_layer_funcs,
 				       plane->formats, plane->nformats,
 				       plane->type, NULL);
@@ -125,22 +123,19 @@ static struct sun4i_layer *sun4i_layer_init_one(struct drm_device *drm,
 
 	drm_plane_helper_add(&layer->plane,
 			     &sun4i_backend_layer_helper_funcs);
-	layer->drv = drv;
-
-	if (plane->type == DRM_PLANE_TYPE_PRIMARY)
-		drv->primary = &layer->plane;
+	layer->backend = backend;
 
 	return layer;
 }
 
-struct sun4i_layer **sun4i_layers_init(struct drm_device *drm)
+struct sun4i_layer **sun4i_layers_init(struct drm_device *drm,
+				       struct sun4i_backend *backend)
 {
-	struct sun4i_drv *drv = drm->dev_private;
 	struct sun4i_layer **layers;
 	int i;
 
-	layers = devm_kcalloc(drm->dev, ARRAY_SIZE(sun4i_backend_planes),
-			      sizeof(**layers), GFP_KERNEL);
+	layers = devm_kcalloc(drm->dev, ARRAY_SIZE(sun4i_backend_planes) + 1,
+			      sizeof(*layers), GFP_KERNEL);
 	if (!layers)
 		return ERR_PTR(-ENOMEM);
 
@@ -167,9 +162,9 @@ struct sun4i_layer **sun4i_layers_init(struct drm_device *drm)
 	 */
 	for (i = 0; i < ARRAY_SIZE(sun4i_backend_planes); i++) {
 		const struct sun4i_plane_desc *plane = &sun4i_backend_planes[i];
-		struct sun4i_layer *layer = layers[i];
+		struct sun4i_layer *layer;
 
-		layer = sun4i_layer_init_one(drm, plane);
+		layer = sun4i_layer_init_one(drm, backend, plane);
 		if (IS_ERR(layer)) {
 			dev_err(drm->dev, "Couldn't initialize %s plane\n",
 				i ? "overlay" : "primary");
@@ -178,11 +173,12 @@ struct sun4i_layer **sun4i_layers_init(struct drm_device *drm)
 
 		DRM_DEBUG_DRIVER("Assigning %s plane to pipe %d\n",
 				 i ? "overlay" : "primary", plane->pipe);
-		regmap_update_bits(drv->backend->regs, SUN4I_BACKEND_ATTCTL_REG0(i),
+		regmap_update_bits(backend->regs, SUN4I_BACKEND_ATTCTL_REG0(i),
 				   SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL_MASK,
 				   SUN4I_BACKEND_ATTCTL_REG0_LAY_PIPESEL(plane->pipe));
 
 		layer->id = i;
+		layers[i] = layer;
 	};
 
 	return layers;
diff --git a/drivers/gpu/drm/sun4i/sun4i_layer.h b/drivers/gpu/drm/sun4i/sun4i_layer.h
index a2f65d7a3f4e..4be1f0919df2 100644
--- a/drivers/gpu/drm/sun4i/sun4i_layer.h
+++ b/drivers/gpu/drm/sun4i/sun4i_layer.h
@@ -16,6 +16,7 @@
 struct sun4i_layer {
 	struct drm_plane	plane;
 	struct sun4i_drv	*drv;
+	struct sun4i_backend	*backend;
 	int			id;
 };
 
@@ -25,6 +26,7 @@ plane_to_sun4i_layer(struct drm_plane *plane)
 	return container_of(plane, struct sun4i_layer, plane);
 }
 
-struct sun4i_layer **sun4i_layers_init(struct drm_device *drm);
+struct sun4i_layer **sun4i_layers_init(struct drm_device *drm,
+				       struct sun4i_backend *backend);
 
 #endif /* _SUN4I_LAYER_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.c b/drivers/gpu/drm/sun4i/sun4i_rgb.c
index 757208f51731..67f0b91a99de 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.c
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.c
@@ -15,9 +15,10 @@
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
-#include "sun4i_drv.h"
+#include "sun4i_crtc.h"
 #include "sun4i_tcon.h"
 #include "sun4i_rgb.h"
 
@@ -25,7 +26,7 @@ struct sun4i_rgb {
 	struct drm_connector	connector;
 	struct drm_encoder	encoder;
 
-	struct sun4i_drv	*drv;
+	struct sun4i_tcon	*tcon;
 };
 
 static inline struct sun4i_rgb *
@@ -46,8 +47,7 @@ static int sun4i_rgb_get_modes(struct drm_connector *connector)
 {
 	struct sun4i_rgb *rgb =
 		drm_connector_to_sun4i_rgb(connector);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 
 	return drm_panel_get_modes(tcon->panel);
 }
@@ -56,8 +56,7 @@ static int sun4i_rgb_mode_valid(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
 	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 	u32 hsync = mode->hsync_end - mode->hsync_start;
 	u32 vsync = mode->vsync_end - mode->vsync_start;
 	unsigned long rate = mode->clock * 1000;
@@ -114,8 +113,7 @@ static void
 sun4i_rgb_connector_destroy(struct drm_connector *connector)
 {
 	struct sun4i_rgb *rgb = drm_connector_to_sun4i_rgb(connector);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 
 	drm_panel_detach(tcon->panel);
 	drm_connector_cleanup(connector);
@@ -140,8 +138,7 @@ static int sun4i_rgb_atomic_check(struct drm_encoder *encoder,
 static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder)
 {
 	struct sun4i_rgb *rgb = drm_encoder_to_sun4i_rgb(encoder);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 
 	DRM_DEBUG_DRIVER("Enabling RGB output\n");
 
@@ -157,8 +154,7 @@ static void sun4i_rgb_encoder_enable(struct drm_encoder *encoder)
 static void sun4i_rgb_encoder_disable(struct drm_encoder *encoder)
 {
 	struct sun4i_rgb *rgb = drm_encoder_to_sun4i_rgb(encoder);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 
 	DRM_DEBUG_DRIVER("Disabling RGB output\n");
 
@@ -176,8 +172,7 @@ static void sun4i_rgb_encoder_mode_set(struct drm_encoder *encoder,
 				       struct drm_display_mode *adjusted_mode)
 {
 	struct sun4i_rgb *rgb = drm_encoder_to_sun4i_rgb(encoder);
-	struct sun4i_drv *drv = rgb->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_tcon *tcon = rgb->tcon;
 
 	sun4i_tcon0_mode_set(tcon, mode);
 
@@ -203,10 +198,8 @@ static struct drm_encoder_funcs sun4i_rgb_enc_funcs = {
 	.destroy	= sun4i_rgb_enc_destroy,
 };
 
-int sun4i_rgb_init(struct drm_device *drm)
+int sun4i_rgb_init(struct drm_device *drm, struct sun4i_tcon *tcon)
 {
-	struct sun4i_drv *drv = drm->dev_private;
-	struct sun4i_tcon *tcon = drv->tcon;
 	struct drm_encoder *encoder;
 	struct drm_bridge *bridge;
 	struct sun4i_rgb *rgb;
@@ -215,12 +208,12 @@ int sun4i_rgb_init(struct drm_device *drm)
 	rgb = devm_kzalloc(drm->dev, sizeof(*rgb), GFP_KERNEL);
 	if (!rgb)
 		return -ENOMEM;
-	rgb->drv = drv;
+	rgb->tcon = tcon;
 	encoder = &rgb->encoder;
 
-	tcon->panel = sun4i_tcon_find_panel(tcon->dev->of_node);
-	bridge = sun4i_tcon_find_bridge(tcon->dev->of_node);
-	if (IS_ERR(tcon->panel) && IS_ERR(bridge)) {
+	ret = drm_of_find_panel_or_bridge(tcon->dev->of_node, 1, 0,
+					  &tcon->panel, &bridge);
+	if (ret) {
 		dev_info(drm->dev, "No panel or bridge found... RGB output disabled\n");
 		return 0;
 	}
@@ -238,9 +231,9 @@ int sun4i_rgb_init(struct drm_device *drm)
 	}
 
 	/* The RGB encoder can only work with the TCON channel 0 */
-	rgb->encoder.possible_crtcs = BIT(0);
+	rgb->encoder.possible_crtcs = BIT(drm_crtc_index(&tcon->crtc->crtc));
 
-	if (!IS_ERR(tcon->panel)) {
+	if (tcon->panel) {
 		drm_connector_helper_add(&rgb->connector,
 					 &sun4i_rgb_con_helper_funcs);
 		ret = drm_connector_init(drm, &rgb->connector,
@@ -261,7 +254,7 @@ int sun4i_rgb_init(struct drm_device *drm)
 		}
 	}
 
-	if (!IS_ERR(bridge)) {
+	if (bridge) {
 		ret = drm_bridge_attach(encoder, bridge, NULL);
 		if (ret) {
 			dev_err(drm->dev, "Couldn't attach our bridge\n");
diff --git a/drivers/gpu/drm/sun4i/sun4i_rgb.h b/drivers/gpu/drm/sun4i/sun4i_rgb.h
index 7c4da4c8acdd..40c18f4a6c7e 100644
--- a/drivers/gpu/drm/sun4i/sun4i_rgb.h
+++ b/drivers/gpu/drm/sun4i/sun4i_rgb.h
@@ -13,6 +13,6 @@
 #ifndef _SUN4I_RGB_H_
 #define _SUN4I_RGB_H_
 
-int sun4i_rgb_init(struct drm_device *drm);
+int sun4i_rgb_init(struct drm_device *drm, struct sun4i_tcon *tcon);
 
 #endif /* _SUN4I_RGB_H_ */
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.c b/drivers/gpu/drm/sun4i/sun4i_tcon.c
index ea2906f87cb9..9a83a85529ac 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.c
@@ -15,13 +15,12 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_modes.h>
-#include <drm/drm_panel.h>
+#include <drm/drm_of.h>
 
 #include <linux/component.h>
 #include <linux/ioport.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
-#include <linux/of_graph.h>
 #include <linux/of_irq.h>
 #include <linux/regmap.h>
 #include <linux/reset.h>
@@ -143,7 +142,7 @@ void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon,
 
 	/*
 	 * This is called a backporch in the register documentation,
-	 * but it really is the front porch + hsync
+	 * but it really is the back porch + hsync
 	 */
 	bp = mode->crtc_htotal - mode->crtc_hsync_start;
 	DRM_DEBUG_DRIVER("Setting horizontal total %d, backporch %d\n",
@@ -156,7 +155,7 @@ void sun4i_tcon0_mode_set(struct sun4i_tcon *tcon,
 
 	/*
 	 * This is called a backporch in the register documentation,
-	 * but it really is the front porch + hsync
+	 * but it really is the back porch + hsync
 	 */
 	bp = mode->crtc_vtotal - mode->crtc_vsync_start;
 	DRM_DEBUG_DRIVER("Setting vertical total %d, backporch %d\n",
@@ -290,8 +289,7 @@ static irqreturn_t sun4i_tcon_handler(int irq, void *private)
 {
 	struct sun4i_tcon *tcon = private;
 	struct drm_device *drm = tcon->drm;
-	struct sun4i_drv *drv = drm->dev_private;
-	struct sun4i_crtc *scrtc = drv->crtc;
+	struct sun4i_crtc *scrtc = tcon->crtc;
 	unsigned int status;
 
 	regmap_read(tcon->regs, SUN4I_TCON_GINT0_REG, &status);
@@ -336,12 +334,11 @@ static int sun4i_tcon_init_clocks(struct device *dev,
 		}
 	}
 
-	return sun4i_dclk_create(dev, tcon);
+	return 0;
 }
 
 static void sun4i_tcon_free_clocks(struct sun4i_tcon *tcon)
 {
-	sun4i_dclk_free(tcon);
 	clk_disable_unprepare(tcon->clk);
 }
 
@@ -405,74 +402,6 @@ static int sun4i_tcon_init_regmap(struct device *dev,
 	return 0;
 }
 
-struct drm_panel *sun4i_tcon_find_panel(struct device_node *node)
-{
-	struct device_node *port, *remote, *child;
-	struct device_node *end_node = NULL;
-
-	/* Inputs are listed first, then outputs */
-	port = of_graph_get_port_by_id(node, 1);
-
-	/*
-	 * Our first output is the RGB interface where the panel will
-	 * be connected.
-	 */
-	for_each_child_of_node(port, child) {
-		u32 reg;
-
-		of_property_read_u32(child, "reg", &reg);
-		if (reg == 0)
-			end_node = child;
-	}
-
-	if (!end_node) {
-		DRM_DEBUG_DRIVER("Missing panel endpoint\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	remote = of_graph_get_remote_port_parent(end_node);
-	if (!remote) {
-		DRM_DEBUG_DRIVER("Unable to parse remote node\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	return of_drm_find_panel(remote) ?: ERR_PTR(-EPROBE_DEFER);
-}
-
-struct drm_bridge *sun4i_tcon_find_bridge(struct device_node *node)
-{
-	struct device_node *port, *remote, *child;
-	struct device_node *end_node = NULL;
-
-	/* Inputs are listed first, then outputs */
-	port = of_graph_get_port_by_id(node, 1);
-
-	/*
-	 * Our first output is the RGB interface where the panel will
-	 * be connected.
-	 */
-	for_each_child_of_node(port, child) {
-		u32 reg;
-
-		of_property_read_u32(child, "reg", &reg);
-		if (reg == 0)
-			end_node = child;
-	}
-
-	if (!end_node) {
-		DRM_DEBUG_DRIVER("Missing bridge endpoint\n");
-		return ERR_PTR(-ENODEV);
-	}
-
-	remote = of_graph_get_remote_port_parent(end_node);
-	if (!remote) {
-		DRM_DEBUG_DRIVER("Enable to parse remote node\n");
-		return ERR_PTR(-EINVAL);
-	}
-
-	return of_drm_find_bridge(remote) ?: ERR_PTR(-EPROBE_DEFER);
-}
-
 static int sun4i_tcon_bind(struct device *dev, struct device *master,
 			   void *data)
 {
@@ -506,30 +435,45 @@ static int sun4i_tcon_bind(struct device *dev, struct device *master,
 		return ret;
 	}
 
+	ret = sun4i_tcon_init_clocks(dev, tcon);
+	if (ret) {
+		dev_err(dev, "Couldn't init our TCON clocks\n");
+		goto err_assert_reset;
+	}
+
 	ret = sun4i_tcon_init_regmap(dev, tcon);
 	if (ret) {
 		dev_err(dev, "Couldn't init our TCON regmap\n");
-		goto err_assert_reset;
+		goto err_free_clocks;
 	}
 
-	ret = sun4i_tcon_init_clocks(dev, tcon);
+	ret = sun4i_dclk_create(dev, tcon);
 	if (ret) {
-		dev_err(dev, "Couldn't init our TCON clocks\n");
-		goto err_assert_reset;
+		dev_err(dev, "Couldn't create our TCON dot clock\n");
+		goto err_free_clocks;
 	}
 
 	ret = sun4i_tcon_init_irq(dev, tcon);
 	if (ret) {
 		dev_err(dev, "Couldn't init our TCON interrupts\n");
+		goto err_free_dotclock;
+	}
+
+	tcon->crtc = sun4i_crtc_init(drm, drv->backend, tcon);
+	if (IS_ERR(tcon->crtc)) {
+		dev_err(dev, "Couldn't create our CRTC\n");
+		ret = PTR_ERR(tcon->crtc);
 		goto err_free_clocks;
 	}
 
-	ret = sun4i_rgb_init(drm);
+	ret = sun4i_rgb_init(drm, tcon);
 	if (ret < 0)
 		goto err_free_clocks;
 
 	return 0;
 
+err_free_dotclock:
+	sun4i_dclk_free(tcon);
 err_free_clocks:
 	sun4i_tcon_free_clocks(tcon);
 err_assert_reset:
@@ -542,6 +486,7 @@ static void sun4i_tcon_unbind(struct device *dev, struct device *master,
 {
 	struct sun4i_tcon *tcon = dev_get_drvdata(dev);
 
+	sun4i_dclk_free(tcon);
 	sun4i_tcon_free_clocks(tcon);
 }
 
@@ -555,22 +500,11 @@ static int sun4i_tcon_probe(struct platform_device *pdev)
 	struct device_node *node = pdev->dev.of_node;
 	struct drm_bridge *bridge;
 	struct drm_panel *panel;
+	int ret;
 
-	/*
-	 * Neither the bridge or the panel is ready.
-	 * Defer the probe.
-	 */
-	panel = sun4i_tcon_find_panel(node);
-	bridge = sun4i_tcon_find_bridge(node);
-
-	/*
-	 * If we don't have a panel endpoint, just go on
-	 */
-	if ((PTR_ERR(panel) == -EPROBE_DEFER) &&
-	    (PTR_ERR(bridge) == -EPROBE_DEFER)) {
-		DRM_DEBUG_DRIVER("Still waiting for our panel/bridge. Deferring...\n");
-		return -EPROBE_DEFER;
-	}
+	ret = drm_of_find_panel_or_bridge(node, 1, 0, &panel, &bridge);
+	if (ret == -EPROBE_DEFER)
+		return ret;
 
 	return component_add(&pdev->dev, &sun4i_tcon_ops);
 }
diff --git a/drivers/gpu/drm/sun4i/sun4i_tcon.h b/drivers/gpu/drm/sun4i/sun4i_tcon.h
index 166064bafe2e..f636343a935d 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tcon.h
+++ b/drivers/gpu/drm/sun4i/sun4i_tcon.h
@@ -169,6 +169,9 @@ struct sun4i_tcon {
 
 	/* Platform adjustments */
 	const struct sun4i_tcon_quirks	*quirks;
+
+	/* Associated crtc */
+	struct sun4i_crtc		*crtc;
 };
 
 struct drm_bridge *sun4i_tcon_find_bridge(struct device_node *node);
diff --git a/drivers/gpu/drm/sun4i/sun4i_tv.c b/drivers/gpu/drm/sun4i/sun4i_tv.c
index c6f47222e8fc..49c49431a053 100644
--- a/drivers/gpu/drm/sun4i/sun4i_tv.c
+++ b/drivers/gpu/drm/sun4i/sun4i_tv.c
@@ -19,9 +19,11 @@
 #include <drm/drmP.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_of.h>
 #include <drm/drm_panel.h>
 
 #include "sun4i_backend.h"
+#include "sun4i_crtc.h"
 #include "sun4i_drv.h"
 #include "sun4i_tcon.h"
 
@@ -349,8 +351,9 @@ static int sun4i_tv_atomic_check(struct drm_encoder *encoder,
 static void sun4i_tv_disable(struct drm_encoder *encoder)
 {
 	struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder);
-	struct sun4i_drv *drv = tv->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc);
+	struct sun4i_tcon *tcon = crtc->tcon;
+	struct sun4i_backend *backend = crtc->backend;
 
 	DRM_DEBUG_DRIVER("Disabling the TV Output\n");
 
@@ -359,18 +362,19 @@ static void sun4i_tv_disable(struct drm_encoder *encoder)
 	regmap_update_bits(tv->regs, SUN4I_TVE_EN_REG,
 			   SUN4I_TVE_EN_ENABLE,
 			   0);
-	sun4i_backend_disable_color_correction(drv->backend);
+	sun4i_backend_disable_color_correction(backend);
 }
 
 static void sun4i_tv_enable(struct drm_encoder *encoder)
 {
 	struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder);
-	struct sun4i_drv *drv = tv->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc);
+	struct sun4i_tcon *tcon = crtc->tcon;
+	struct sun4i_backend *backend = crtc->backend;
 
 	DRM_DEBUG_DRIVER("Enabling the TV Output\n");
 
-	sun4i_backend_apply_color_correction(drv->backend);
+	sun4i_backend_apply_color_correction(backend);
 
 	regmap_update_bits(tv->regs, SUN4I_TVE_EN_REG,
 			   SUN4I_TVE_EN_ENABLE,
@@ -384,8 +388,8 @@ static void sun4i_tv_mode_set(struct drm_encoder *encoder,
 			      struct drm_display_mode *adjusted_mode)
 {
 	struct sun4i_tv *tv = drm_encoder_to_sun4i_tv(encoder);
-	struct sun4i_drv *drv = tv->drv;
-	struct sun4i_tcon *tcon = drv->tcon;
+	struct sun4i_crtc *crtc = drm_crtc_to_sun4i_crtc(encoder->crtc);
+	struct sun4i_tcon *tcon = crtc->tcon;
 	const struct tv_mode *tv_mode = sun4i_tv_find_tv_by_mode(mode);
 
 	sun4i_tcon1_mode_set(tcon, mode);
@@ -623,7 +627,12 @@ static int sun4i_tv_bind(struct device *dev, struct device *master,
 		goto err_disable_clk;
 	}
 
-	tv->encoder.possible_crtcs = BIT(0);
+	tv->encoder.possible_crtcs = drm_of_find_possible_crtcs(drm,
+								dev->of_node);
+	if (!tv->encoder.possible_crtcs) {
+		ret = -EPROBE_DEFER;
+		goto err_disable_clk;
+	}
 
 	drm_connector_helper_add(&tv->connector,
 				 &sun4i_tv_comp_connector_helper_funcs);
diff --git a/drivers/gpu/drm/tegra/gem.c b/drivers/gpu/drm/tegra/gem.c
index 17e62ecb5d4d..8672f5d2f237 100644
--- a/drivers/gpu/drm/tegra/gem.c
+++ b/drivers/gpu/drm/tegra/gem.c
@@ -619,10 +619,10 @@ static const struct dma_buf_ops tegra_gem_prime_dmabuf_ops = {
 	.map_dma_buf = tegra_gem_prime_map_dma_buf,
 	.unmap_dma_buf = tegra_gem_prime_unmap_dma_buf,
 	.release = tegra_gem_prime_release,
-	.kmap_atomic = tegra_gem_prime_kmap_atomic,
-	.kunmap_atomic = tegra_gem_prime_kunmap_atomic,
-	.kmap = tegra_gem_prime_kmap,
-	.kunmap = tegra_gem_prime_kunmap,
+	.map_atomic = tegra_gem_prime_kmap_atomic,
+	.unmap_atomic = tegra_gem_prime_kunmap_atomic,
+	.map = tegra_gem_prime_kmap,
+	.unmap = tegra_gem_prime_kunmap,
 	.mmap = tegra_gem_prime_mmap,
 	.vmap = tegra_gem_prime_vmap,
 	.vunmap = tegra_gem_prime_vunmap,
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
index afd2a7b2aff7..d524ed0d5146 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_crtc.c
@@ -23,6 +23,7 @@
 #include <linux/workqueue.h>
 #include <linux/completion.h>
 #include <linux/dma-mapping.h>
+#include <linux/of_graph.h>
 
 #include "tilcdc_drv.h"
 #include "tilcdc_regs.h"
@@ -1035,16 +1036,7 @@ int tilcdc_crtc_create(struct drm_device *dev)
 	drm_crtc_helper_add(crtc, &tilcdc_crtc_helper_funcs);
 
 	if (priv->is_componentized) {
-		struct device_node *ports =
-			of_get_child_by_name(dev->dev->of_node, "ports");
-
-		if (ports) {
-			crtc->port = of_get_child_by_name(ports, "port");
-			of_node_put(ports);
-		} else {
-			crtc->port =
-				of_get_child_by_name(dev->dev->of_node, "port");
-		}
+		crtc->port = of_graph_get_port_by_id(dev->dev->of_node, 0);
 		if (!crtc->port) { /* This should never happen */
 			dev_err(dev->dev, "Port node not found in %s\n",
 				dev->dev->of_node->full_name);
diff --git a/drivers/gpu/drm/tilcdc/tilcdc_external.c b/drivers/gpu/drm/tilcdc/tilcdc_external.c
index b0dd5e8634ae..711c7b3289d3 100644
--- a/drivers/gpu/drm/tilcdc/tilcdc_external.c
+++ b/drivers/gpu/drm/tilcdc/tilcdc_external.c
@@ -185,39 +185,6 @@ int tilcdc_attach_bridge(struct drm_device *ddev, struct drm_bridge *bridge)
 	return ret;
 }
 
-static int tilcdc_node_has_port(struct device_node *dev_node)
-{
-	struct device_node *node;
-
-	node = of_get_child_by_name(dev_node, "ports");
-	if (!node)
-		node = of_get_child_by_name(dev_node, "port");
-	if (!node)
-		return 0;
-	of_node_put(node);
-
-	return 1;
-}
-
-static
-struct device_node *tilcdc_get_remote_node(struct device_node *node)
-{
-	struct device_node *ep;
-	struct device_node *parent;
-
-	if (!tilcdc_node_has_port(node))
-		return NULL;
-
-	ep = of_graph_get_next_endpoint(node, NULL);
-	if (!ep)
-		return NULL;
-
-	parent = of_graph_get_remote_port_parent(ep);
-	of_node_put(ep);
-
-	return parent;
-}
-
 int tilcdc_attach_external_device(struct drm_device *ddev)
 {
 	struct tilcdc_drm_private *priv = ddev->dev_private;
@@ -225,7 +192,7 @@ int tilcdc_attach_external_device(struct drm_device *ddev)
 	struct drm_bridge *bridge;
 	int ret;
 
-	remote_node = tilcdc_get_remote_node(ddev->dev->of_node);
+	remote_node = of_graph_get_remote_node(ddev->dev->of_node, 0, 0);
 	if (!remote_node)
 		return 0;
 
@@ -264,35 +231,16 @@ int tilcdc_get_external_components(struct device *dev,
 				   struct component_match **match)
 {
 	struct device_node *node;
-	struct device_node *ep = NULL;
-	int count = 0;
-	int ret = 0;
-
-	if (!tilcdc_node_has_port(dev->of_node))
-		return 0;
 
-	while ((ep = of_graph_get_next_endpoint(dev->of_node, ep))) {
-		node = of_graph_get_remote_port_parent(ep);
-		if (!node || !of_device_is_available(node)) {
-			of_node_put(node);
-			continue;
-		}
-
-		dev_dbg(dev, "Subdevice node '%s' found\n", node->name);
-
-		if (of_device_is_compatible(node, "nxp,tda998x")) {
-			if (match)
-				drm_of_component_match_add(dev, match,
-							   dev_match_of, node);
-			ret = 1;
-		}
+	node = of_graph_get_remote_node(dev->of_node, 0, 0);
 
+	if (!of_device_is_compatible(node, "nxp,tda998x")) {
 		of_node_put(node);
-		if (count++ > 1) {
-			dev_err(dev, "Only one port is supported\n");
-			return -EINVAL;
-		}
+		return 0;
 	}
 
-	return ret;
+	if (match)
+		drm_of_component_match_add(dev, match, dev_match_of, node);
+	of_node_put(node);
+	return 1;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index 412240a3ba90..e44626a2e698 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -1020,37 +1020,44 @@ out_unlock:
 	return ret;
 }
 
-bool ttm_bo_mem_compat(struct ttm_placement *placement,
-		       struct ttm_mem_reg *mem,
-		       uint32_t *new_flags)
+static bool ttm_bo_places_compat(const struct ttm_place *places,
+				 unsigned num_placement,
+				 struct ttm_mem_reg *mem,
+				 uint32_t *new_flags)
 {
-	int i;
-
-	for (i = 0; i < placement->num_placement; i++) {
-		const struct ttm_place *heap = &placement->placement[i];
-		if (mem->mm_node &&
-		    (mem->start < heap->fpfn ||
-		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
-			continue;
+	unsigned i;
 
-		*new_flags = heap->flags;
-		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
-		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
-			return true;
-	}
+	for (i = 0; i < num_placement; i++) {
+		const struct ttm_place *heap = &places[i];
 
-	for (i = 0; i < placement->num_busy_placement; i++) {
-		const struct ttm_place *heap = &placement->busy_placement[i];
-		if (mem->mm_node &&
-		    (mem->start < heap->fpfn ||
+		if (mem->mm_node && (mem->start < heap->fpfn ||
 		     (heap->lpfn != 0 && (mem->start + mem->num_pages) > heap->lpfn)))
 			continue;
 
 		*new_flags = heap->flags;
 		if ((*new_flags & mem->placement & TTM_PL_MASK_CACHING) &&
-		    (*new_flags & mem->placement & TTM_PL_MASK_MEM))
+		    (*new_flags & mem->placement & TTM_PL_MASK_MEM) &&
+		    (!(*new_flags & TTM_PL_FLAG_CONTIGUOUS) ||
+		     (mem->placement & TTM_PL_FLAG_CONTIGUOUS)))
 			return true;
 	}
+	return false;
+}
+
+bool ttm_bo_mem_compat(struct ttm_placement *placement,
+		       struct ttm_mem_reg *mem,
+		       uint32_t *new_flags)
+{
+	if (ttm_bo_places_compat(placement->placement, placement->num_placement,
+				 mem, new_flags))
+		return true;
+
+	if ((placement->busy_placement != placement->placement ||
+	     placement->num_busy_placement > placement->num_placement) &&
+	    ttm_bo_places_compat(placement->busy_placement,
+				 placement->num_busy_placement,
+				 mem, new_flags))
+		return true;
 
 	return false;
 }
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 35ffb3754feb..9f53df95f35c 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -231,7 +231,7 @@ static int ttm_bo_vm_fault(struct vm_fault *vmf)
 	 */
 	for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
 		if (bo->mem.bus.is_iomem)
-			pfn = ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT) + page_offset;
+			pfn = bdev->driver->io_mem_pfn(bo, page_offset);
 		else {
 			page = ttm->pages[page_offset];
 			if (unlikely(!page && i == 0)) {
@@ -324,6 +324,14 @@ static struct ttm_buffer_object *ttm_bo_vm_lookup(struct ttm_bo_device *bdev,
 	return bo;
 }
 
+unsigned long ttm_bo_default_io_mem_pfn(struct ttm_buffer_object *bo,
+					unsigned long page_offset)
+{
+	return ((bo->mem.bus.base + bo->mem.bus.offset) >> PAGE_SHIFT)
+		+ page_offset;
+}
+EXPORT_SYMBOL(ttm_bo_default_io_mem_pfn);
+
 int ttm_bo_mmap(struct file *filp, struct vm_area_struct *vma,
 		struct ttm_bo_device *bdev)
 {
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index fdb451e3ec01..26a7ad0f4789 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -179,7 +179,7 @@ int ttm_base_object_init(struct ttm_object_file *tfile,
 	if (unlikely(ret != 0))
 		goto out_err0;
 
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 	if (unlikely(ret != 0))
 		goto out_err1;
 
@@ -318,7 +318,8 @@ EXPORT_SYMBOL(ttm_ref_object_exists);
 
 int ttm_ref_object_add(struct ttm_object_file *tfile,
 		       struct ttm_base_object *base,
-		       enum ttm_ref_type ref_type, bool *existed)
+		       enum ttm_ref_type ref_type, bool *existed,
+		       bool require_existed)
 {
 	struct drm_open_hash *ht = &tfile->ref_hash[ref_type];
 	struct ttm_ref_object *ref;
@@ -345,6 +346,9 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 		}
 
 		rcu_read_unlock();
+		if (require_existed)
+			return -EPERM;
+
 		ret = ttm_mem_global_alloc(mem_glob, sizeof(*ref),
 					   false, false);
 		if (unlikely(ret != 0))
@@ -449,10 +453,10 @@ void ttm_object_file_release(struct ttm_object_file **p_tfile)
 		ttm_ref_object_release(&ref->kref);
 	}
 
+	spin_unlock(&tfile->lock);
 	for (i = 0; i < TTM_REF_NUM; ++i)
 		drm_ht_remove(&tfile->ref_hash[i]);
 
-	spin_unlock(&tfile->lock);
 	ttm_object_file_unref(&tfile);
 }
 EXPORT_SYMBOL(ttm_object_file_release);
@@ -529,9 +533,7 @@ void ttm_object_device_release(struct ttm_object_device **p_tdev)
 
 	*p_tdev = NULL;
 
-	spin_lock(&tdev->object_lock);
 	drm_ht_remove(&tdev->object_hash);
-	spin_unlock(&tdev->object_lock);
 
 	kfree(tdev);
 }
@@ -635,7 +637,7 @@ int ttm_prime_fd_to_handle(struct ttm_object_file *tfile,
 	prime = (struct ttm_prime_object *) dma_buf->priv;
 	base = &prime->base;
 	*handle = base->hash.key;
-	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+	ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL, false);
 
 	dma_buf_put(dma_buf);
 
diff --git a/drivers/gpu/drm/udl/udl_dmabuf.c b/drivers/gpu/drm/udl/udl_dmabuf.c
index ac90ffdb5912..ed0e636243b2 100644
--- a/drivers/gpu/drm/udl/udl_dmabuf.c
+++ b/drivers/gpu/drm/udl/udl_dmabuf.c
@@ -191,10 +191,10 @@ static struct dma_buf_ops udl_dmabuf_ops = {
 	.detach			= udl_detach_dma_buf,
 	.map_dma_buf		= udl_map_dma_buf,
 	.unmap_dma_buf		= udl_unmap_dma_buf,
-	.kmap			= udl_dmabuf_kmap,
-	.kmap_atomic		= udl_dmabuf_kmap_atomic,
-	.kunmap			= udl_dmabuf_kunmap,
-	.kunmap_atomic		= udl_dmabuf_kunmap_atomic,
+	.map			= udl_dmabuf_kmap,
+	.map_atomic		= udl_dmabuf_kmap_atomic,
+	.unmap			= udl_dmabuf_kunmap,
+	.unmap_atomic		= udl_dmabuf_kunmap_atomic,
 	.mmap			= udl_dmabuf_mmap,
 	.release		= drm_gem_dmabuf_release,
 };
diff --git a/drivers/gpu/drm/udl/udl_transfer.c b/drivers/gpu/drm/udl/udl_transfer.c
index 917dcb978c2c..0c87b1ac6b68 100644
--- a/drivers/gpu/drm/udl/udl_transfer.c
+++ b/drivers/gpu/drm/udl/udl_transfer.c
@@ -14,6 +14,7 @@
 #include <linux/slab.h>
 #include <linux/fb.h>
 #include <linux/prefetch.h>
+#include <asm/unaligned.h>
 
 #include <drm/drmP.h>
 #include "udl_drv.h"
@@ -163,7 +164,7 @@ static void udl_compress_hline16(
 			const u8 *const start = pixel;
 			const uint16_t repeating_pixel_val16 = pixel_val16;
 
-			*(uint16_t *)cmd = cpu_to_be16(pixel_val16);
+			put_unaligned_be16(pixel_val16, cmd);
 
 			cmd += 2;
 			pixel += bpp;
diff --git a/drivers/gpu/drm/vc4/vc4_crtc.c b/drivers/gpu/drm/vc4/vc4_crtc.c
index 865e9f494bcc..d86c8cce3182 100644
--- a/drivers/gpu/drm/vc4/vc4_crtc.c
+++ b/drivers/gpu/drm/vc4/vc4_crtc.c
@@ -314,7 +314,8 @@ vc4_crtc_lut_load(struct drm_crtc *crtc)
 
 static int
 vc4_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
-		   uint32_t size)
+		   uint32_t size,
+		   struct drm_modeset_acquire_ctx *ctx)
 {
 	struct vc4_crtc *vc4_crtc = to_vc4_crtc(crtc);
 	u32 i;
@@ -846,6 +847,17 @@ static void vc4_crtc_destroy_state(struct drm_crtc *crtc,
 	drm_atomic_helper_crtc_destroy_state(crtc, state);
 }
 
+static void
+vc4_crtc_reset(struct drm_crtc *crtc)
+{
+	if (crtc->state)
+		__drm_atomic_helper_crtc_destroy_state(crtc->state);
+
+	crtc->state = kzalloc(sizeof(struct vc4_crtc_state), GFP_KERNEL);
+	if (crtc->state)
+		crtc->state->crtc = crtc;
+}
+
 static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_config = drm_atomic_helper_set_config,
 	.destroy = vc4_crtc_destroy,
@@ -853,7 +865,7 @@ static const struct drm_crtc_funcs vc4_crtc_funcs = {
 	.set_property = NULL,
 	.cursor_set = NULL, /* handled by drm_mode_cursor_universal */
 	.cursor_move = NULL, /* handled by drm_mode_cursor_universal */
-	.reset = drm_atomic_helper_crtc_reset,
+	.reset = vc4_crtc_reset,
 	.atomic_duplicate_state = vc4_crtc_duplicate_state,
 	.atomic_destroy_state = vc4_crtc_destroy_state,
 	.gamma_set = vc4_crtc_gamma_set,
diff --git a/drivers/gpu/drm/vc4/vc4_dpi.c b/drivers/gpu/drm/vc4/vc4_dpi.c
index 71435796c710..c6d703903fd9 100644
--- a/drivers/gpu/drm/vc4/vc4_dpi.c
+++ b/drivers/gpu/drm/vc4/vc4_dpi.c
@@ -356,23 +356,14 @@ static const struct of_device_id vc4_dpi_dt_match[] = {
  */
 static struct drm_panel *vc4_dpi_get_panel(struct device *dev)
 {
-	struct device_node *endpoint, *panel_node;
+	struct device_node *panel_node;
 	struct device_node *np = dev->of_node;
 	struct drm_panel *panel;
 
-	endpoint = of_graph_get_next_endpoint(np, NULL);
-	if (!endpoint) {
-		dev_err(dev, "no endpoint to fetch DPI panel\n");
-		return NULL;
-	}
-
 	/* don't proceed if we have an endpoint but no panel_node tied to it */
-	panel_node = of_graph_get_remote_port_parent(endpoint);
-	of_node_put(endpoint);
-	if (!panel_node) {
-		dev_err(dev, "no valid panel node\n");
+	panel_node = of_graph_get_remote_node(np, 0, 0);
+	if (!panel_node)
 		return NULL;
-	}
 
 	panel = of_drm_find_panel(panel_node);
 	of_node_put(panel_node);
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 93900a83dced..1328185bfd59 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -334,6 +334,7 @@ int virtio_gpu_modeset_init(struct virtio_gpu_device *vgdev);
 void virtio_gpu_modeset_fini(struct virtio_gpu_device *vgdev);
 
 /* virtio_gpu_plane.c */
+uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc);
 struct drm_plane *virtio_gpu_plane_init(struct virtio_gpu_device *vgdev,
 					enum drm_plane_type type,
 					int index);
diff --git a/drivers/gpu/drm/virtio/virtgpu_fb.c b/drivers/gpu/drm/virtio/virtgpu_fb.c
index 9bfaef379469..33df067b11c1 100644
--- a/drivers/gpu/drm/virtio/virtgpu_fb.c
+++ b/drivers/gpu/drm/virtio/virtgpu_fb.c
@@ -231,63 +231,9 @@ static int virtio_gpufb_create(struct drm_fb_helper *helper,
 	mode_cmd.pitches[0] = mode_cmd.width * 4;
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(32, 24);
 
-	switch (mode_cmd.pixel_format) {
-#ifdef __BIG_ENDIAN
-	case DRM_FORMAT_XRGB8888:
-		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
-		break;
-	case DRM_FORMAT_ARGB8888:
-		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
-		break;
-	case DRM_FORMAT_BGRX8888:
-		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
-		break;
-	case DRM_FORMAT_BGRA8888:
-		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
-		break;
-	case DRM_FORMAT_RGBX8888:
-		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
-		break;
-	case DRM_FORMAT_RGBA8888:
-		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
-		break;
-	case DRM_FORMAT_XBGR8888:
-		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
-		break;
-	case DRM_FORMAT_ABGR8888:
-		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
-		break;
-#else
-	case DRM_FORMAT_XRGB8888:
-		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
-		break;
-	case DRM_FORMAT_ARGB8888:
-		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
-		break;
-	case DRM_FORMAT_BGRX8888:
-		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
-		break;
-	case DRM_FORMAT_BGRA8888:
-		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
-		break;
-	case DRM_FORMAT_RGBX8888:
-		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
-		break;
-	case DRM_FORMAT_RGBA8888:
-		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
-		break;
-	case DRM_FORMAT_XBGR8888:
-		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
-		break;
-	case DRM_FORMAT_ABGR8888:
-		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
-		break;
-#endif
-	default:
-		DRM_ERROR("failed to find virtio gpu format for %d\n",
-			  mode_cmd.pixel_format);
+	format = virtio_gpu_translate_format(mode_cmd.pixel_format);
+	if (format == 0)
 		return -EINVAL;
-	}
 
 	size = mode_cmd.pitches[0] * mode_cmd.height;
 	obj = virtio_gpu_alloc_object(dev, size, false, true);
diff --git a/drivers/gpu/drm/virtio/virtgpu_gem.c b/drivers/gpu/drm/virtio/virtgpu_gem.c
index 336a57fd6d5d..cc025d8fbe19 100644
--- a/drivers/gpu/drm/virtio/virtgpu_gem.c
+++ b/drivers/gpu/drm/virtio/virtgpu_gem.c
@@ -88,6 +88,7 @@ int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
 	int ret;
 	uint32_t pitch;
 	uint32_t resid;
+	uint32_t format;
 
 	pitch = args->width * ((args->bpp + 1) / 8);
 	args->size = pitch * args->height;
@@ -98,9 +99,10 @@ int virtio_gpu_mode_dumb_create(struct drm_file *file_priv,
 	if (ret)
 		goto fail;
 
+	format = virtio_gpu_translate_format(DRM_FORMAT_XRGB8888);
 	virtio_gpu_resource_id_get(vgdev, &resid);
-	virtio_gpu_cmd_create_resource(vgdev, resid,
-				       2, args->width, args->height);
+	virtio_gpu_cmd_create_resource(vgdev, resid, format,
+				       args->width, args->height);
 
 	/* attach the object to the resource */
 	obj = gem_to_virtio_gpu_obj(gobj);
diff --git a/drivers/gpu/drm/virtio/virtgpu_object.c b/drivers/gpu/drm/virtio/virtgpu_object.c
index 1483daebe057..6f66b7347cd0 100644
--- a/drivers/gpu/drm/virtio/virtgpu_object.c
+++ b/drivers/gpu/drm/virtio/virtgpu_object.c
@@ -81,8 +81,10 @@ int virtio_gpu_object_create(struct virtio_gpu_device *vgdev,
 		return -ENOMEM;
 	size = roundup(size, PAGE_SIZE);
 	ret = drm_gem_object_init(vgdev->ddev, &bo->gem_base, size);
-	if (ret != 0)
+	if (ret != 0) {
+		kfree(bo);
 		return ret;
+	}
 	bo->dumb = false;
 	virtio_gpu_init_ttm_placement(bo, pinned);
 
diff --git a/drivers/gpu/drm/virtio/virtgpu_plane.c b/drivers/gpu/drm/virtio/virtgpu_plane.c
index 1ff9c64c9ec0..adcdbd0abef6 100644
--- a/drivers/gpu/drm/virtio/virtgpu_plane.c
+++ b/drivers/gpu/drm/virtio/virtgpu_plane.c
@@ -39,9 +39,81 @@ static const uint32_t virtio_gpu_formats[] = {
 };
 
 static const uint32_t virtio_gpu_cursor_formats[] = {
+#ifdef __BIG_ENDIAN
+	DRM_FORMAT_BGRA8888,
+#else
 	DRM_FORMAT_ARGB8888,
+#endif
 };
 
+uint32_t virtio_gpu_translate_format(uint32_t drm_fourcc)
+{
+	uint32_t format;
+
+	switch (drm_fourcc) {
+#ifdef __BIG_ENDIAN
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+#else
+	case DRM_FORMAT_XRGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8X8_UNORM;
+		break;
+	case DRM_FORMAT_ARGB8888:
+		format = VIRTIO_GPU_FORMAT_B8G8R8A8_UNORM;
+		break;
+	case DRM_FORMAT_BGRX8888:
+		format = VIRTIO_GPU_FORMAT_X8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_BGRA8888:
+		format = VIRTIO_GPU_FORMAT_A8R8G8B8_UNORM;
+		break;
+	case DRM_FORMAT_RGBX8888:
+		format = VIRTIO_GPU_FORMAT_X8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_RGBA8888:
+		format = VIRTIO_GPU_FORMAT_A8B8G8R8_UNORM;
+		break;
+	case DRM_FORMAT_XBGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8X8_UNORM;
+		break;
+	case DRM_FORMAT_ABGR8888:
+		format = VIRTIO_GPU_FORMAT_R8G8B8A8_UNORM;
+		break;
+#endif
+	default:
+		/*
+		 * This should not happen, we handle everything listed
+		 * in virtio_gpu_formats[].
+		 */
+		format = 0;
+		break;
+	}
+	WARN_ON(format == 0);
+	return format;
+}
+
 static void virtio_gpu_plane_destroy(struct drm_plane *plane)
 {
 	drm_plane_cleanup(plane);
diff --git a/drivers/gpu/drm/virtio/virtgpu_ttm.c b/drivers/gpu/drm/virtio/virtgpu_ttm.c
index 70ec8ca8d9b1..4e8e27d50922 100644
--- a/drivers/gpu/drm/virtio/virtgpu_ttm.c
+++ b/drivers/gpu/drm/virtio/virtgpu_ttm.c
@@ -431,6 +431,7 @@ static struct ttm_bo_driver virtio_gpu_bo_driver = {
 	.verify_access = &virtio_gpu_verify_access,
 	.io_mem_reserve = &virtio_gpu_ttm_io_mem_reserve,
 	.io_mem_free = &virtio_gpu_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 	.move_notify = &virtio_gpu_bo_move_notify,
 	.swap_notify = &virtio_gpu_bo_swap_notify,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 4c7f24a67a2e..35bf781e418e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -859,4 +859,5 @@ struct ttm_bo_driver vmw_bo_driver = {
 	.fault_reserve_notify = &vmw_ttm_fault_reserve_notify,
 	.io_mem_reserve = &vmw_ttm_io_mem_reserve,
 	.io_mem_free = &vmw_ttm_io_mem_free,
+	.io_mem_pfn = ttm_bo_default_io_mem_pfn,
 };
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index f6e936d90517..4a641555b960 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -254,7 +254,7 @@ module_param_named(restrict_iommu, vmw_restrict_iommu, int, 0600);
 MODULE_PARM_DESC(force_coherent, "Force coherent TTM pages");
 module_param_named(force_coherent, vmw_force_coherent, int, 0600);
 MODULE_PARM_DESC(restrict_dma_mask, "Restrict DMA mask to 44 bits with IOMMU");
-module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, S_IRUSR | S_IWUSR);
+module_param_named(restrict_dma_mask, vmw_restrict_dma_mask, int, 0600);
 MODULE_PARM_DESC(assume_16bpp, "Assume 16-bpp when filtering modes");
 module_param_named(assume_16bpp, vmw_assume_16bpp, int, 0600);
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
index 09e120d50e65..6f4cb4678cbc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fb.c
@@ -418,6 +418,60 @@ static int vmw_fb_compute_depth(struct fb_var_screeninfo *var,
 	return 0;
 }
 
+static int vmwgfx_set_config_internal(struct drm_mode_set *set)
+{
+	struct drm_crtc *crtc = set->crtc;
+	struct drm_framebuffer *fb;
+	struct drm_crtc *tmp;
+	struct drm_modeset_acquire_ctx *ctx;
+	struct drm_device *dev = set->crtc->dev;
+	int ret;
+
+	ctx = dev->mode_config.acquire_ctx;
+
+restart:
+	/*
+	 * NOTE: ->set_config can also disable other crtcs (if we steal all
+	 * connectors from it), hence we need to refcount the fbs across all
+	 * crtcs. Atomic modeset will have saner semantics ...
+	 */
+	drm_for_each_crtc(tmp, dev)
+		tmp->primary->old_fb = tmp->primary->fb;
+
+	fb = set->fb;
+
+	ret = crtc->funcs->set_config(set, ctx);
+	if (ret == 0) {
+		crtc->primary->crtc = crtc;
+		crtc->primary->fb = fb;
+	}
+
+	drm_for_each_crtc(tmp, dev) {
+		if (tmp->primary->fb)
+			drm_framebuffer_get(tmp->primary->fb);
+		if (tmp->primary->old_fb)
+			drm_framebuffer_put(tmp->primary->old_fb);
+		tmp->primary->old_fb = NULL;
+	}
+
+	if (ret == -EDEADLK) {
+		dev->mode_config.acquire_ctx = NULL;
+
+retry_locking:
+		drm_modeset_backoff(ctx);
+
+		ret = drm_modeset_lock_all_ctx(dev, ctx);
+		if (ret)
+			goto retry_locking;
+
+		dev->mode_config.acquire_ctx = ctx;
+
+		goto restart;
+	}
+
+	return ret;
+}
+
 static int vmw_fb_kms_detach(struct vmw_fb_par *par,
 			     bool detach_bo,
 			     bool unref_bo)
@@ -436,7 +490,7 @@ static int vmw_fb_kms_detach(struct vmw_fb_par *par,
 		set.fb = NULL;
 		set.num_connectors = 0;
 		set.connectors = &par->con;
-		ret = drm_mode_set_config_internal(&set);
+		ret = vmwgfx_set_config_internal(&set);
 		if (ret) {
 			DRM_ERROR("Could not unset a mode.\n");
 			return ret;
@@ -578,7 +632,7 @@ static int vmw_fb_set_par(struct fb_info *info)
 	set.num_connectors = 1;
 	set.connectors = &par->con;
 
-	ret = drm_mode_set_config_internal(&set);
+	ret = vmwgfx_set_config_internal(&set);
 	if (ret)
 		goto out_unlock;
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
index b399f03a988d..6b2708b4eafe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_fence.c
@@ -538,7 +538,7 @@ int vmw_fence_create(struct vmw_fence_manager *fman,
 		     struct vmw_fence_obj **p_fence)
 {
 	struct vmw_fence_obj *fence;
-	int ret;
+ 	int ret;
 
 	fence = kzalloc(sizeof(*fence), GFP_KERNEL);
 	if (unlikely(fence == NULL))
@@ -701,6 +701,41 @@ void vmw_fence_fifo_up(struct vmw_fence_manager *fman)
 }
 
 
+/**
+ * vmw_fence_obj_lookup - Look up a user-space fence object
+ *
+ * @tfile: A struct ttm_object_file identifying the caller.
+ * @handle: A handle identifying the fence object.
+ * @return: A struct vmw_user_fence base ttm object on success or
+ * an error pointer on failure.
+ *
+ * The fence object is looked up and type-checked. The caller needs
+ * to have opened the fence object first, but since that happens on
+ * creation and fence objects aren't shareable, that's not an
+ * issue currently.
+ */
+static struct ttm_base_object *
+vmw_fence_obj_lookup(struct ttm_object_file *tfile, u32 handle)
+{
+	struct ttm_base_object *base = ttm_base_object_lookup(tfile, handle);
+
+	if (!base) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (base->refcount_release != vmw_user_fence_base_release) {
+		pr_err("Invalid fence object handle 0x%08lx.\n",
+		       (unsigned long)handle);
+		ttm_base_object_unref(&base);
+		return ERR_PTR(-EINVAL);
+	}
+
+	return base;
+}
+
+
 int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
@@ -726,12 +761,9 @@ int vmw_fence_obj_wait_ioctl(struct drm_device *dev, void *data,
 		arg->kernel_cookie = jiffies + wait_timeout;
 	}
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		pr_err("Wait invalid fence object handle 0x%08lx\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 
@@ -770,12 +802,9 @@ int vmw_fence_obj_signaled_ioctl(struct drm_device *dev, void *data,
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
 	struct vmw_private *dev_priv = vmw_priv(dev);
 
-	base = ttm_base_object_lookup(tfile, arg->handle);
-	if (unlikely(base == NULL)) {
-		pr_err("Fence signaled invalid fence object handle 0x%08lx\n",
-		       (unsigned long)arg->handle);
-		return -EINVAL;
-	}
+	base = vmw_fence_obj_lookup(tfile, arg->handle);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
 
 	fence = &(container_of(base, struct vmw_user_fence, base)->fence);
 	fman = fman_from_fence(fence);
@@ -1022,6 +1051,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 		(struct drm_vmw_fence_event_arg *) data;
 	struct vmw_fence_obj *fence = NULL;
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
+	struct ttm_object_file *tfile = vmw_fp->tfile;
 	struct drm_vmw_fence_rep __user *user_fence_rep =
 		(struct drm_vmw_fence_rep __user *)(unsigned long)
 		arg->fence_rep;
@@ -1035,24 +1065,18 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	 */
 	if (arg->handle) {
 		struct ttm_base_object *base =
-			ttm_base_object_lookup_for_ref(dev_priv->tdev,
-						       arg->handle);
-
-		if (unlikely(base == NULL)) {
-			DRM_ERROR("Fence event invalid fence object handle "
-				  "0x%08lx.\n",
-				  (unsigned long)arg->handle);
-			return -EINVAL;
-		}
+			vmw_fence_obj_lookup(tfile, arg->handle);
+
+		if (IS_ERR(base))
+			return PTR_ERR(base);
+
 		fence = &(container_of(base, struct vmw_user_fence,
 				       base)->fence);
 		(void) vmw_fence_obj_reference(fence);
 
 		if (user_fence_rep != NULL) {
-			bool existed;
-
 			ret = ttm_ref_object_add(vmw_fp->tfile, base,
-						 TTM_REF_USAGE, &existed);
+						 TTM_REF_USAGE, NULL, false);
 			if (unlikely(ret != 0)) {
 				DRM_ERROR("Failed to reference a fence "
 					  "object.\n");
@@ -1095,8 +1119,7 @@ int vmw_fence_event_ioctl(struct drm_device *dev, void *data,
 	return 0;
 out_no_create:
 	if (user_fence_rep != NULL)
-		ttm_ref_object_base_unref(vmw_fpriv(file_priv)->tfile,
-					  handle, TTM_REF_USAGE);
+		ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
 out_no_ref_obj:
 	vmw_fence_obj_unreference(&fence);
 	return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index b8c6a03c8c54..5ec24fd801cd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -114,8 +114,6 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		param->value = dev_priv->has_dx;
 		break;
 	default:
-		DRM_ERROR("Illegal vmwgfx get param request: %d\n",
-			  param->param);
 		return -EINVAL;
 	}
 
@@ -186,7 +184,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
 	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
-	if (unlikely(arg->pad64 != 0)) {
+	if (unlikely(arg->pad64 != 0 || arg->max_size == 0)) {
 		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
 	}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
index 6078654d033b..ef9f3a2a4030 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.c
@@ -2026,7 +2026,8 @@ static int vmw_du_update_layout(struct vmw_private *dev_priv, unsigned num,
 
 int vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			  u16 *r, u16 *g, u16 *b,
-			  uint32_t size)
+			  uint32_t size,
+			  struct drm_modeset_acquire_ctx *ctx)
 {
 	struct vmw_private *dev_priv = vmw_priv(crtc->dev);
 	int i;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
index 0c226b2adea5..13f2f1d2818a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_kms.h
@@ -254,7 +254,8 @@ void vmw_du_crtc_save(struct drm_crtc *crtc);
 void vmw_du_crtc_restore(struct drm_crtc *crtc);
 int vmw_du_crtc_gamma_set(struct drm_crtc *crtc,
 			   u16 *r, u16 *g, u16 *b,
-			   uint32_t size);
+			   uint32_t size,
+			   struct drm_modeset_acquire_ctx *ctx);
 int vmw_du_crtc_cursor_set2(struct drm_crtc *crtc, struct drm_file *file_priv,
 			    uint32_t handle, uint32_t width, uint32_t height,
 			    int32_t hot_x, int32_t hot_y);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
index 31fe32d8d65a..0d42a46521fc 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_prime.c
@@ -108,10 +108,10 @@ const struct dma_buf_ops vmw_prime_dmabuf_ops =  {
 	.map_dma_buf = vmw_prime_map_dma_buf,
 	.unmap_dma_buf = vmw_prime_unmap_dma_buf,
 	.release = NULL,
-	.kmap = vmw_prime_dmabuf_kmap,
-	.kmap_atomic = vmw_prime_dmabuf_kmap_atomic,
-	.kunmap = vmw_prime_dmabuf_kunmap,
-	.kunmap_atomic = vmw_prime_dmabuf_kunmap_atomic,
+	.map = vmw_prime_dmabuf_kmap,
+	.map_atomic = vmw_prime_dmabuf_kmap_atomic,
+	.unmap = vmw_prime_dmabuf_kunmap,
+	.unmap_atomic = vmw_prime_dmabuf_kunmap_atomic,
 	.mmap = vmw_prime_dmabuf_mmap,
 	.vmap = vmw_prime_dmabuf_vmap,
 	.vunmap = vmw_prime_dmabuf_vunmap,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index fa1037ec8e5f..7d591f653dfa 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -546,7 +546,7 @@ static int vmw_user_dmabuf_synccpu_grab(struct vmw_user_dma_buffer *user_bo,
 		return ret;
 
 	ret = ttm_ref_object_add(tfile, &user_bo->prime.base,
-				 TTM_REF_SYNCCPU_WRITE, &existed);
+				 TTM_REF_SYNCCPU_WRITE, &existed, false);
 	if (ret != 0 || existed)
 		ttm_bo_synccpu_write_release(&user_bo->dma.base);
 
@@ -730,7 +730,7 @@ int vmw_user_dmabuf_reference(struct ttm_object_file *tfile,
 
 	*handle = user_bo->prime.base.hash.key;
 	return ttm_ref_object_add(tfile, &user_bo->prime.base,
-				  TTM_REF_USAGE, NULL);
+				  TTM_REF_USAGE, NULL, false);
 }
 
 /**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 41b9d20d6ae7..7681341fe32b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -713,11 +713,14 @@ int vmw_surface_define_ioctl(struct drm_device *dev, void *data,
 			128;
 
 	num_sizes = 0;
-	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i)
+	for (i = 0; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+		if (req->mip_levels[i] > DRM_VMW_MAX_MIP_LEVELS)
+			return -EINVAL;
 		num_sizes += req->mip_levels[i];
+	}
 
-	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES *
-	    DRM_VMW_MAX_MIP_LEVELS)
+	if (num_sizes > DRM_VMW_MAX_SURFACE_FACES * DRM_VMW_MAX_MIP_LEVELS ||
+	    num_sizes == 0)
 		return -EINVAL;
 
 	size = vmw_user_surface_size + 128 +
@@ -890,17 +893,16 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 	uint32_t handle;
 	struct ttm_base_object *base;
 	int ret;
+	bool require_exist = false;
 
 	if (handle_type == DRM_VMW_HANDLE_PRIME) {
 		ret = ttm_prime_fd_to_handle(tfile, u_handle, &handle);
 		if (unlikely(ret != 0))
 			return ret;
 	} else {
-		if (unlikely(drm_is_render_client(file_priv))) {
-			DRM_ERROR("Render client refused legacy "
-				  "surface reference.\n");
-			return -EACCES;
-		}
+		if (unlikely(drm_is_render_client(file_priv)))
+			require_exist = true;
+
 		if (ACCESS_ONCE(vmw_fpriv(file_priv)->locked_master)) {
 			DRM_ERROR("Locked master refused legacy "
 				  "surface reference.\n");
@@ -928,17 +930,14 @@ vmw_surface_handle_reference(struct vmw_private *dev_priv,
 
 		/*
 		 * Make sure the surface creator has the same
-		 * authenticating master.
+		 * authenticating master, or is already registered with us.
 		 */
 		if (drm_is_primary_client(file_priv) &&
-		    user_srf->master != file_priv->master) {
-			DRM_ERROR("Trying to reference surface outside of"
-				  " master domain.\n");
-			ret = -EACCES;
-			goto out_bad_resource;
-		}
+		    user_srf->master != file_priv->master)
+			require_exist = true;
 
-		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL);
+		ret = ttm_ref_object_add(tfile, base, TTM_REF_USAGE, NULL,
+					 require_exist);
 		if (unlikely(ret != 0)) {
 			DRM_ERROR("Could not add a reference to a surface.\n");
 			goto out_bad_resource;
diff --git a/drivers/gpu/ipu-v3/Makefile b/drivers/gpu/ipu-v3/Makefile
index 1ab9bceee755..8cdf9e4ae772 100644
--- a/drivers/gpu/ipu-v3/Makefile
+++ b/drivers/gpu/ipu-v3/Makefile
@@ -2,4 +2,8 @@ obj-$(CONFIG_IMX_IPUV3_CORE) += imx-ipu-v3.o
 
 imx-ipu-v3-objs := ipu-common.o ipu-cpmem.o ipu-csi.o ipu-dc.o ipu-di.o \
 		ipu-dp.o ipu-dmfc.o ipu-ic.o ipu-image-convert.o \
-		ipu-pre.o ipu-prg.o ipu-smfc.o ipu-vdi.o
+		ipu-smfc.o ipu-vdi.o
+
+ifdef CONFIG_DRM
+	imx-ipu-v3-objs += ipu-pre.o ipu-prg.o
+endif
diff --git a/drivers/gpu/ipu-v3/ipu-common.c b/drivers/gpu/ipu-v3/ipu-common.c
index 7aefccec31b1..16d556816b5f 100644
--- a/drivers/gpu/ipu-v3/ipu-common.c
+++ b/drivers/gpu/ipu-v3/ipu-common.c
@@ -1401,7 +1401,8 @@ static int ipu_probe(struct platform_device *pdev)
 
 	ipu->id = of_alias_get_id(np, "ipu");
 
-	if (of_device_is_compatible(np, "fsl,imx6qp-ipu")) {
+	if (of_device_is_compatible(np, "fsl,imx6qp-ipu") &&
+	    IS_ENABLED(CONFIG_DRM)) {
 		ipu->prg_priv = ipu_prg_lookup_by_phandle(&pdev->dev,
 							  "fsl,prg", ipu->id);
 		if (!ipu->prg_priv)
@@ -1538,8 +1539,10 @@ static struct platform_driver imx_ipu_driver = {
 };
 
 static struct platform_driver * const drivers[] = {
+#if IS_ENABLED(CONFIG_DRM)
 	&ipu_pre_drv,
 	&ipu_prg_drv,
+#endif
 	&imx_ipu_driver,
 };