378 files changed, 6506 insertions, 2748 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index c8ad6bf6618a..bcef6ea4bcf9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -82,6 +82,7 @@
 #include "amdgpu_bo_list.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_doorbell.h"
+#include "amdgpu_amdkfd.h"
 
 #define MAX_GPU_INSTANCE		16
 
@@ -163,6 +164,7 @@ extern int amdgpu_si_support;
 extern int amdgpu_cik_support;
 #endif
 
+#define AMDGPU_VM_MAX_NUM_CTX			4096
 #define AMDGPU_SG_THRESHOLD			(256*1024*1024)
 #define AMDGPU_DEFAULT_GTT_SIZE_MB		3072ULL /* 3GB by default */
 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
@@ -235,7 +237,7 @@ enum amdgpu_kiq_irq {
 
 #define MAX_KIQ_REG_WAIT       5000 /* in usecs, 5ms */
 #define MAX_KIQ_REG_BAILOUT_INTERVAL   5 /* in msecs, 5ms */
-#define MAX_KIQ_REG_TRY 20
+#define MAX_KIQ_REG_TRY 80 /* 20 -> 80 */
 
 int amdgpu_device_ip_set_clockgating_state(void *dev,
 					   enum amd_ip_block_type block_type,
@@ -862,6 +864,9 @@ struct amdgpu_device {
 	/* GDS */
 	struct amdgpu_gds		gds;
 
+	/* KFD */
+	struct amdgpu_kfd_dev		kfd;
+
 	/* display related functionality */
 	struct amdgpu_display_manager dm;
 
@@ -875,9 +880,6 @@ struct amdgpu_device {
 	atomic64_t visible_pin_size;
 	atomic64_t gart_pin_size;
 
-	/* amdkfd interface */
-	struct kfd_dev          *kfd;
-
 	/* soc15 register offset based on ip, instance and  segment */
 	uint32_t 		*reg_offset[MAX_HWIP][HWIP_MAX_INSTANCE];
 
@@ -910,7 +912,9 @@ struct amdgpu_device {
 	bool                            in_gpu_reset;
 	struct mutex  lock_reset;
 	struct amdgpu_doorbell_index doorbell_index;
+
 	int asic_reset_res;
+	struct work_struct		xgmi_reset_work;
 };
 
 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
index 47db65926d71..4376b17ca594 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@@ -886,6 +886,5 @@ void amdgpu_acpi_get_backlight_caps(struct amdgpu_device *adev,
 void amdgpu_acpi_fini(struct amdgpu_device *adev)
 {
 	unregister_acpi_notifier(&adev->acpi_nb);
-	if (adev->atif)
-		kfree(adev->atif);
+	kfree(adev->atif);
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
index d693b8047653..2dfaf158ef07 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@@ -26,15 +26,26 @@
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include <linux/module.h>
+#include <linux/dma-buf.h>
 
 const struct kgd2kfd_calls *kgd2kfd;
 
 static const unsigned int compute_vmid_bitmap = 0xFF00;
 
+/* Total memory size in system memory and all GPU VRAM. Used to
+ * estimate worst case amount of memory to reserve for page tables
+ */
+uint64_t amdgpu_amdkfd_total_mem_size;
+
 int amdgpu_amdkfd_init(void)
 {
+	struct sysinfo si;
 	int ret;
 
+	si_meminfo(&si);
+	amdgpu_amdkfd_total_mem_size = si.totalram - si.totalhigh;
+	amdgpu_amdkfd_total_mem_size *= si.mem_unit;
+
 #ifdef CONFIG_HSA_AMD
 	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
 	if (ret)
@@ -87,8 +98,11 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 		return;
 	}
 
-	adev->kfd = kgd2kfd->probe((struct kgd_dev *)adev,
-				   adev->pdev, kfd2kgd);
+	adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
+				       adev->pdev, kfd2kgd);
+
+	if (adev->kfd.dev)
+		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
 }
 
 /**
@@ -128,7 +142,8 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
 	int i, n;
 	int last_valid_bit;
-	if (adev->kfd) {
+
+	if (adev->kfd.dev) {
 		struct kgd2kfd_shared_resources gpu_resources = {
 			.compute_vmid_bitmap = compute_vmid_bitmap,
 			.num_pipe_per_mec = adev->gfx.mec.num_pipe_per_mec,
@@ -167,7 +182,7 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_start_offset);
 
 		if (adev->asic_type < CHIP_VEGA10) {
-			kgd2kfd->device_init(adev->kfd, &gpu_resources);
+			kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
 			return;
 		}
 
@@ -196,37 +211,37 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 		gpu_resources.reserved_doorbell_mask = 0x1e0;
 		gpu_resources.reserved_doorbell_val  = 0x0e0;
 
-		kgd2kfd->device_init(adev->kfd, &gpu_resources);
+		kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
 	}
 }
 
 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
-	if (adev->kfd) {
-		kgd2kfd->device_exit(adev->kfd);
-		adev->kfd = NULL;
+	if (adev->kfd.dev) {
+		kgd2kfd->device_exit(adev->kfd.dev);
+		adev->kfd.dev = NULL;
 	}
 }
 
 void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 {
-	if (adev->kfd)
-		kgd2kfd->interrupt(adev->kfd, ih_ring_entry);
+	if (adev->kfd.dev)
+		kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
 }
 
 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
-	if (adev->kfd)
-		kgd2kfd->suspend(adev->kfd);
+	if (adev->kfd.dev)
+		kgd2kfd->suspend(adev->kfd.dev);
 }
 
 int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 {
 	int r = 0;
 
-	if (adev->kfd)
-		r = kgd2kfd->resume(adev->kfd);
+	if (adev->kfd.dev)
+		r = kgd2kfd->resume(adev->kfd.dev);
 
 	return r;
 }
@@ -235,8 +250,8 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
 {
 	int r = 0;
 
-	if (adev->kfd)
-		r = kgd2kfd->pre_reset(adev->kfd);
+	if (adev->kfd.dev)
+		r = kgd2kfd->pre_reset(adev->kfd.dev);
 
 	return r;
 }
@@ -245,8 +260,8 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
 {
 	int r = 0;
 
-	if (adev->kfd)
-		r = kgd2kfd->post_reset(adev->kfd);
+	if (adev->kfd.dev)
+		r = kgd2kfd->post_reset(adev->kfd.dev);
 
 	return r;
 }
@@ -419,6 +434,62 @@ void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info)
 	cu_info->lds_size = acu_info.lds_size;
 }
 
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+				  struct kgd_dev **dma_buf_kgd,
+				  uint64_t *bo_size, void *metadata_buffer,
+				  size_t buffer_size, uint32_t *metadata_size,
+				  uint32_t *flags)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct dma_buf *dma_buf;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *bo;
+	uint64_t metadata_flags;
+	int r = -EINVAL;
+
+	dma_buf = dma_buf_get(dma_buf_fd);
+	if (IS_ERR(dma_buf))
+		return PTR_ERR(dma_buf);
+
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
+		/* Can't handle non-graphics buffers */
+		goto out_put;
+
+	obj = dma_buf->priv;
+	if (obj->dev->driver != adev->ddev->driver)
+		/* Can't handle buffers from different drivers */
+		goto out_put;
+
+	adev = obj->dev->dev_private;
+	bo = gem_to_amdgpu_bo(obj);
+	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT)))
+		/* Only VRAM and GTT BOs are supported */
+		goto out_put;
+
+	r = 0;
+	if (dma_buf_kgd)
+		*dma_buf_kgd = (struct kgd_dev *)adev;
+	if (bo_size)
+		*bo_size = amdgpu_bo_size(bo);
+	if (metadata_size)
+		*metadata_size = bo->metadata_size;
+	if (metadata_buffer)
+		r = amdgpu_bo_get_metadata(bo, metadata_buffer, buffer_size,
+					   metadata_size, &metadata_flags);
+	if (flags) {
+		*flags = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+			ALLOC_MEM_FLAGS_VRAM : ALLOC_MEM_FLAGS_GTT;
+
+		if (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)
+			*flags |= ALLOC_MEM_FLAGS_PUBLIC;
+	}
+
+out_put:
+	dma_buf_put(dma_buf);
+	return r;
+}
+
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
@@ -501,7 +572,7 @@ void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
 
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 {
-	if (adev->kfd) {
+	if (adev->kfd.dev) {
 		if ((1 << vmid) & compute_vmid_bitmap)
 			return true;
 	}
@@ -515,7 +586,7 @@ bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
 	return false;
 }
 
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 {
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
index bcf587b4ba98..70429f7aa9a8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@@ -27,7 +27,6 @@
 
 #include <linux/types.h>
 #include <linux/mm.h>
-#include <linux/mmu_context.h>
 #include <linux/workqueue.h>
 #include <kgd_kfd_interface.h>
 #include <drm/ttm/ttm_execbuf_util.h>
@@ -35,6 +34,7 @@
 #include "amdgpu_vm.h"
 
 extern const struct kgd2kfd_calls *kgd2kfd;
+extern uint64_t amdgpu_amdkfd_total_mem_size;
 
 struct amdgpu_device;
 
@@ -77,6 +77,11 @@ struct amdgpu_amdkfd_fence {
 	char timeline_name[TASK_COMM_LEN];
 };
 
+struct amdgpu_kfd_dev {
+	struct kfd_dev *dev;
+	uint64_t vram_used;
+};
+
 struct amdgpu_amdkfd_fence *amdgpu_amdkfd_fence_create(u64 context,
 						       struct mm_struct *mm);
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm);
@@ -144,6 +149,11 @@ uint64_t amdgpu_amdkfd_get_gpu_clock_counter(struct kgd_dev *kgd);
 
 uint32_t amdgpu_amdkfd_get_max_engine_clock_in_mhz(struct kgd_dev *kgd);
 void amdgpu_amdkfd_get_cu_info(struct kgd_dev *kgd, struct kfd_cu_info *cu_info);
+int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
+				  struct kgd_dev **dmabuf_kgd,
+				  uint64_t *bo_size, void *metadata_buffer,
+				  size_t buffer_size, uint32_t *metadata_size,
+				  uint32_t *flags);
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
 
@@ -195,7 +205,13 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 					      struct kfd_vm_fault_info *info);
 
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+				      struct dma_buf *dmabuf,
+				      uint64_t va, void *vm,
+				      struct kgd_mem **mem, uint64_t *size,
+				      uint64_t *mmap_offset);
+
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);
 
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
index 72a357dae070..ff7fac7df34b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@@ -23,6 +23,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
+#include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
index 0e2a56b6a9b6..56ea929f524b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@@ -24,6 +24,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
+#include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
index 03b604c96d94..5c51d4910650 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@@ -26,6 +26,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/firmware.h>
+#include <linux/mmu_context.h>
 #include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
index f3129b912714..be1ab43473c6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@@ -25,6 +25,7 @@
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
+#include <linux/dma-buf.h>
 #include <drm/drmP.h>
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
@@ -110,17 +111,17 @@ void amdgpu_amdkfd_gpuvm_init_mem_limits(void)
 		(kfd_mem_limit.max_ttm_mem_limit >> 20));
 }
 
-static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
+static int amdgpu_amdkfd_reserve_mem_limit(struct amdgpu_device *adev,
 		uint64_t size, u32 domain, bool sg)
 {
-	size_t acc_size, system_mem_needed, ttm_mem_needed;
+	size_t acc_size, system_mem_needed, ttm_mem_needed, vram_needed;
+	uint64_t reserved_for_pt = amdgpu_amdkfd_total_mem_size >> 9;
 	int ret = 0;
 
 	acc_size = ttm_bo_dma_acc_size(&adev->mman.bdev, size,
 				       sizeof(struct amdgpu_bo));
 
-	spin_lock(&kfd_mem_limit.mem_limit_lock);
-
+	vram_needed = 0;
 	if (domain == AMDGPU_GEM_DOMAIN_GTT) {
 		/* TTM GTT memory */
 		system_mem_needed = acc_size + size;
@@ -133,23 +134,30 @@ static int amdgpu_amdkfd_reserve_system_mem_limit(struct amdgpu_device *adev,
 		/* VRAM and SG */
 		system_mem_needed = acc_size;
 		ttm_mem_needed = acc_size;
+		if (domain == AMDGPU_GEM_DOMAIN_VRAM)
+			vram_needed = size;
 	}
 
+	spin_lock(&kfd_mem_limit.mem_limit_lock);
+
 	if ((kfd_mem_limit.system_mem_used + system_mem_needed >
-		kfd_mem_limit.max_system_mem_limit) ||
-		(kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
-		kfd_mem_limit.max_ttm_mem_limit))
+	     kfd_mem_limit.max_system_mem_limit) ||
+	    (kfd_mem_limit.ttm_mem_used + ttm_mem_needed >
+	     kfd_mem_limit.max_ttm_mem_limit) ||
+	    (adev->kfd.vram_used + vram_needed >
+	     adev->gmc.real_vram_size - reserved_for_pt)) {
 		ret = -ENOMEM;
-	else {
+	} else {
 		kfd_mem_limit.system_mem_used += system_mem_needed;
 		kfd_mem_limit.ttm_mem_used += ttm_mem_needed;
+		adev->kfd.vram_used += vram_needed;
 	}
 
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 	return ret;
 }
 
-static void unreserve_system_mem_limit(struct amdgpu_device *adev,
+static void unreserve_mem_limit(struct amdgpu_device *adev,
 		uint64_t size, u32 domain, bool sg)
 {
 	size_t acc_size;
@@ -167,6 +175,11 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 	} else {
 		kfd_mem_limit.system_mem_used -= acc_size;
 		kfd_mem_limit.ttm_mem_used -= acc_size;
+		if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
+			adev->kfd.vram_used -= size;
+			WARN_ONCE(adev->kfd.vram_used < 0,
+				  "kfd VRAM memory accounting unbalanced");
+		}
 	}
 	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
 		  "kfd system memory accounting unbalanced");
@@ -176,29 +189,18 @@ static void unreserve_system_mem_limit(struct amdgpu_device *adev,
 	spin_unlock(&kfd_mem_limit.mem_limit_lock);
 }
 
-void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo)
+void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 {
-	spin_lock(&kfd_mem_limit.mem_limit_lock);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	u32 domain = bo->preferred_domains;
+	bool sg = (bo->preferred_domains == AMDGPU_GEM_DOMAIN_CPU);
 
 	if (bo->flags & AMDGPU_AMDKFD_USERPTR_BO) {
-		kfd_mem_limit.system_mem_used -=
-			(bo->tbo.acc_size + amdgpu_bo_size(bo));
-		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
-	} else if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GTT) {
-		kfd_mem_limit.system_mem_used -=
-			(bo->tbo.acc_size + amdgpu_bo_size(bo));
-		kfd_mem_limit.ttm_mem_used -=
-			(bo->tbo.acc_size + amdgpu_bo_size(bo));
-	} else {
-		kfd_mem_limit.system_mem_used -= bo->tbo.acc_size;
-		kfd_mem_limit.ttm_mem_used -= bo->tbo.acc_size;
+		domain = AMDGPU_GEM_DOMAIN_CPU;
+		sg = false;
 	}
-	WARN_ONCE(kfd_mem_limit.system_mem_used < 0,
-		  "kfd system memory accounting unbalanced");
-	WARN_ONCE(kfd_mem_limit.ttm_mem_used < 0,
-		  "kfd TTM memory accounting unbalanced");
 
-	spin_unlock(&kfd_mem_limit.mem_limit_lock);
+	unreserve_mem_limit(adev, amdgpu_bo_size(bo), domain, sg);
 }
 
 
@@ -535,7 +537,7 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	struct amdgpu_bo *bo = mem->bo;
 
 	INIT_LIST_HEAD(&entry->head);
-	entry->shared = true;
+	entry->num_shared = 1;
 	entry->bo = &bo->tbo;
 	mutex_lock(&process_info->lock);
 	if (userptr)
@@ -676,7 +678,7 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
-	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.tv.num_shared = 1;
 	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
@@ -740,7 +742,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
-	ctx->kfd_bo.tv.shared = true;
+	ctx->kfd_bo.tv.num_shared = 1;
 	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);
 
@@ -885,6 +887,24 @@ update_gpuvm_pte_failed:
 	return ret;
 }
 
+static struct sg_table *create_doorbell_sg(uint64_t addr, uint32_t size)
+{
+	struct sg_table *sg = kmalloc(sizeof(*sg), GFP_KERNEL);
+
+	if (!sg)
+		return NULL;
+	if (sg_alloc_table(sg, 1, GFP_KERNEL)) {
+		kfree(sg);
+		return NULL;
+	}
+	sg->sgl->dma_address = addr;
+	sg->sgl->length = size;
+#ifdef CONFIG_NEED_SG_DMA_LENGTH
+	sg->sgl->dma_length = size;
+#endif
+	return sg;
+}
+
 static int process_validate_vms(struct amdkfd_process_info *process_info)
 {
 	struct amdgpu_vm *peer_vm;
@@ -1168,6 +1188,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 {
 	struct amdgpu_device *adev = get_amdgpu_device(kgd);
 	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+	enum ttm_bo_type bo_type = ttm_bo_type_device;
+	struct sg_table *sg = NULL;
 	uint64_t user_addr = 0;
 	struct amdgpu_bo *bo;
 	struct amdgpu_bo_param bp;
@@ -1196,13 +1218,25 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		if (!offset || !*offset)
 			return -EINVAL;
 		user_addr = *offset;
+	} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+		domain = AMDGPU_GEM_DOMAIN_GTT;
+		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
+		bo_type = ttm_bo_type_sg;
+		alloc_flags = 0;
+		if (size > UINT_MAX)
+			return -EINVAL;
+		sg = create_doorbell_sg(*offset, size);
+		if (!sg)
+			return -ENOMEM;
 	} else {
 		return -EINVAL;
 	}
 
 	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
-	if (!*mem)
-		return -ENOMEM;
+	if (!*mem) {
+		ret = -ENOMEM;
+		goto err;
+	}
 	INIT_LIST_HEAD(&(*mem)->bo_va_list);
 	mutex_init(&(*mem)->lock);
 	(*mem)->aql_queue = !!(flags & ALLOC_MEM_FLAGS_AQL_QUEUE_MEM);
@@ -1235,8 +1269,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 
 	amdgpu_sync_create(&(*mem)->sync);
 
-	ret = amdgpu_amdkfd_reserve_system_mem_limit(adev, size,
-						     alloc_domain, false);
+	ret = amdgpu_amdkfd_reserve_mem_limit(adev, size, alloc_domain, !!sg);
 	if (ret) {
 		pr_debug("Insufficient system memory\n");
 		goto err_reserve_limit;
@@ -1250,7 +1283,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	bp.byte_align = byte_align;
 	bp.domain = alloc_domain;
 	bp.flags = alloc_flags;
-	bp.type = ttm_bo_type_device;
+	bp.type = bo_type;
 	bp.resv = NULL;
 	ret = amdgpu_bo_create(adev, &bp, &bo);
 	if (ret) {
@@ -1258,6 +1291,10 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 				domain_string(alloc_domain), ret);
 		goto err_bo_create;
 	}
+	if (bo_type == ttm_bo_type_sg) {
+		bo->tbo.sg = sg;
+		bo->tbo.ttm->sg = sg;
+	}
 	bo->kfd_bo = *mem;
 	(*mem)->bo = bo;
 	if (user_addr)
@@ -1289,10 +1326,15 @@ allocate_init_user_pages_failed:
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
 err_bo_create:
-	unreserve_system_mem_limit(adev, size, alloc_domain, false);
+	unreserve_mem_limit(adev, size, alloc_domain, !!sg);
 err_reserve_limit:
 	mutex_destroy(&(*mem)->lock);
 	kfree(*mem);
+err:
+	if (sg) {
+		sg_free_table(sg);
+		kfree(sg);
+	}
 	return ret;
 }
 
@@ -1362,6 +1404,14 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	/* Free the sync object */
 	amdgpu_sync_free(&mem->sync);
 
+	/* If the SG is not NULL, it's one we created for a doorbell
+	 * BO. We need to free it.
+	 */
+	if (mem->bo->tbo.sg) {
+		sg_free_table(mem->bo->tbo.sg);
+		kfree(mem->bo->tbo.sg);
+	}
+
 	/* Free the BO*/
 	amdgpu_bo_unref(&mem->bo);
 	mutex_destroy(&mem->lock);
@@ -1664,6 +1714,60 @@ int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
 	return 0;
 }
 
+int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
+				      struct dma_buf *dma_buf,
+				      uint64_t va, void *vm,
+				      struct kgd_mem **mem, uint64_t *size,
+				      uint64_t *mmap_offset)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct drm_gem_object *obj;
+	struct amdgpu_bo *bo;
+	struct amdgpu_vm *avm = (struct amdgpu_vm *)vm;
+
+	if (dma_buf->ops != &amdgpu_dmabuf_ops)
+		/* Can't handle non-graphics buffers */
+		return -EINVAL;
+
+	obj = dma_buf->priv;
+	if (obj->dev->dev_private != adev)
+		/* Can't handle buffers from other devices */
+		return -EINVAL;
+
+	bo = gem_to_amdgpu_bo(obj);
+	if (!(bo->preferred_domains & (AMDGPU_GEM_DOMAIN_VRAM |
+				    AMDGPU_GEM_DOMAIN_GTT)))
+		/* Only VRAM and GTT BOs are supported */
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+
+	if (size)
+		*size = amdgpu_bo_size(bo);
+
+	if (mmap_offset)
+		*mmap_offset = amdgpu_bo_mmap_offset(bo);
+
+	INIT_LIST_HEAD(&(*mem)->bo_va_list);
+	mutex_init(&(*mem)->lock);
+	(*mem)->mapping_flags =
+		AMDGPU_VM_PAGE_READABLE | AMDGPU_VM_PAGE_WRITEABLE |
+		AMDGPU_VM_PAGE_EXECUTABLE | AMDGPU_VM_MTYPE_NC;
+
+	(*mem)->bo = amdgpu_bo_ref(bo);
+	(*mem)->va = va;
+	(*mem)->domain = (bo->preferred_domains & AMDGPU_GEM_DOMAIN_VRAM) ?
+		AMDGPU_GEM_DOMAIN_VRAM : AMDGPU_GEM_DOMAIN_GTT;
+	(*mem)->mapped_to_gpu_memory = 0;
+	(*mem)->process_info = avm->process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, avm->process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+
+	return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
  *
  * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@@ -1830,7 +1934,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			    validate_list.head) {
 		list_add_tail(&mem->resv_list.head, &resv_list);
 		mem->resv_list.bo = mem->validate_list.bo;
-		mem->resv_list.shared = mem->validate_list.shared;
+		mem->resv_list.num_shared = mem->validate_list.num_shared;
 	}
 
 	/* Reserve all BOs and page tables for validation */
@@ -2049,7 +2153,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 
 		list_add_tail(&mem->resv_list.head, &ctx.list);
 		mem->resv_list.bo = mem->validate_list.bo;
-		mem->resv_list.shared = mem->validate_list.shared;
+		mem->resv_list.num_shared = mem->validate_list.num_shared;
 	}
 
 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
index a028661d9e20..92b11de19581 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@@ -576,6 +576,7 @@ static const struct amdgpu_px_quirk amdgpu_px_quirk_list[] = {
 	{ 0x1002, 0x6900, 0x1028, 0x0812, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1028, 0x0813, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0x1002, 0x6900, 0x1025, 0x125A, AMDGPU_PX_QUIRK_FORCE_ATPX },
+	{ 0x1002, 0x6900, 0x17AA, 0x3806, AMDGPU_PX_QUIRK_FORCE_ATPX },
 	{ 0, 0, 0, 0, 0 },
 };
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index 14d2982a47cc..5c79da8e1150 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -118,7 +118,6 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 		entry->priority = min(info[i].bo_priority,
 				      AMDGPU_BO_LIST_MAX_PRIORITY);
 		entry->tv.bo = &bo->tbo;
-		entry->tv.shared = !bo->prime_shared_count;
 
 		if (bo->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
 			list->gds_obj = bo;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
index ceadeeadfa56..387f1cf1dc20 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@@ -381,7 +381,8 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					      (adev->pdev->revision == 0xe7) ||
 					      (adev->pdev->revision == 0xef))) ||
 					    ((adev->pdev->device == 0x6fdf) &&
-					     (adev->pdev->revision == 0xef))) {
+					     ((adev->pdev->revision == 0xef) ||
+					      (adev->pdev->revision == 0xff)))) {
 						info->is_kicker = true;
 						strcpy(fw_name, "amdgpu/polaris10_k_smc.bin");
 					} else if ((adev->pdev->device == 0x67df) &&
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
index dc54e9efd910..1c49b8266d69 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@@ -50,7 +50,8 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	bo = amdgpu_bo_ref(gem_to_amdgpu_bo(gobj));
 	p->uf_entry.priority = 0;
 	p->uf_entry.tv.bo = &bo->tbo;
-	p->uf_entry.tv.shared = true;
+	/* One for TTM and one for the CS job */
+	p->uf_entry.tv.num_shared = 2;
 	p->uf_entry.user_pages = NULL;
 
 	drm_gem_object_put_unlocked(gobj);
@@ -124,14 +125,14 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		goto free_chunk;
 	}
 
+	mutex_lock(&p->ctx->lock);
+
 	/* skip guilty context job */
 	if (atomic_read(&p->ctx->guilty) == 1) {
 		ret = -ECANCELED;
 		goto free_chunk;
 	}
 
-	mutex_lock(&p->ctx->lock);
-
 	/* get chunks */
 	chunk_array_user = u64_to_user_ptr(cs->in.chunks);
 	if (copy_from_user(chunk_array, chunk_array_user,
@@ -598,6 +599,10 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			return r;
 	}
 
+	/* One for TTM and one for the CS job */
+	amdgpu_bo_list_for_each_entry(e, p->bo_list)
+		e->tv.num_shared = 2;
+
 	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
 	if (p->bo_list->first_userptr != p->bo_list->num_entries)
 		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
@@ -717,8 +722,14 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	gws = p->bo_list->gws_obj;
 	oa = p->bo_list->oa_obj;
 
-	amdgpu_bo_list_for_each_entry(e, p->bo_list)
-		e->bo_va = amdgpu_vm_bo_find(vm, ttm_to_amdgpu_bo(e->tv.bo));
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+
+		/* Make sure we use the exclusive slot for shared BOs */
+		if (bo->prime_shared_count)
+			e->tv.num_shared = 0;
+		e->bo_va = amdgpu_vm_bo_find(vm, bo);
+	}
 
 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds) >> PAGE_SHIFT;
@@ -955,10 +966,6 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;
 
-	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
-	if (r)
-		return r;
-
 	p->job->vm_pd_addr = amdgpu_gmc_pd_addr(vm->root.base.bo);
 
 	if (amdgpu_vm_debug) {
@@ -1421,6 +1428,9 @@ int amdgpu_cs_fence_to_handle_ioctl(struct drm_device *dev, void *data,
 	if (IS_ERR(fence))
 		return PTR_ERR(fence);
 
+	if (!fence)
+		fence = dma_fence_get_stub();
+
 	switch (info->in.what) {
 	case AMDGPU_FENCE_TO_HANDLE_GET_SYNCOBJ:
 		r = drm_syncobj_create(&syncobj, 0, fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index 5b550706ee76..7e22be7ca68a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -74,7 +74,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	INIT_LIST_HEAD(&list);
 	INIT_LIST_HEAD(&csa_tv.head);
 	csa_tv.bo = &bo->tbo;
-	csa_tv.shared = true;
+	csa_tv.num_shared = 1;
 
 	list_add(&csa_tv.head, &list);
 	amdgpu_vm_get_pd_bo(vm, &list, &pd);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
index 95f4c4139fc6..d85184b5b35c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@@ -248,7 +248,7 @@ static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
 		return -ENOMEM;
 
 	mutex_lock(&mgr->lock);
-	r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
+	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
 	if (r < 0) {
 		mutex_unlock(&mgr->lock);
 		kfree(ctx);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
index c75badfa5c4c..7ff3a28fc903 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@@ -515,7 +515,6 @@ void amdgpu_device_pci_config_reset(struct amdgpu_device *adev)
  */
 static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 {
-	amdgpu_asic_init_doorbell_index(adev);
 
 	/* No doorbell on SI hardware generation */
 	if (adev->asic_type < CHIP_BONAIRE) {
@@ -529,6 +528,8 @@ static int amdgpu_device_doorbell_init(struct amdgpu_device *adev)
 	if (pci_resource_flags(adev->pdev, 2) & IORESOURCE_UNSET)
 		return -EINVAL;
 
+	amdgpu_asic_init_doorbell_index(adev);
+
 	/* doorbell bar mapping */
 	adev->doorbell.base = pci_resource_start(adev->pdev, 2);
 	adev->doorbell.size = pci_resource_len(adev->pdev, 2);
@@ -1700,8 +1701,10 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);
 
-	if (amdgpu_sriov_vf(adev))
+	if (amdgpu_sriov_vf(adev)) {
+		amdgpu_virt_init_data_exchange(adev);
 		amdgpu_virt_release_full_gpu(adev, true);
+	}
 
 	return 0;
 }
@@ -1864,6 +1867,9 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 {
 	int i, r;
 
+	if (adev->gmc.xgmi.num_physical_nodes > 1)
+		amdgpu_xgmi_remove_device(adev);
+
 	amdgpu_amdkfd_device_fini(adev);
 
 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
@@ -2353,6 +2359,19 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 	return amdgpu_device_asic_has_dc_support(adev->asic_type);
 }
 
+
+static void amdgpu_device_xgmi_reset_func(struct work_struct *__work)
+{
+	struct amdgpu_device *adev =
+		container_of(__work, struct amdgpu_device, xgmi_reset_work);
+
+	adev->asic_reset_res =  amdgpu_asic_reset(adev);
+	if (adev->asic_reset_res)
+		DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
+			 adev->asic_reset_res, adev->ddev->unique);
+}
+
+
 /**
  * amdgpu_device_init - initialize the driver
  *
@@ -2451,6 +2470,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
 			  amdgpu_device_delay_enable_gfx_off);
 
+	INIT_WORK(&adev->xgmi_reset_work, amdgpu_device_xgmi_reset_func);
+
 	adev->gfx.gfx_off_req_count = 1;
 	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
 
@@ -2613,9 +2634,6 @@ fence_driver_init:
 		goto failed;
 	}
 
-	if (amdgpu_sriov_vf(adev))
-		amdgpu_virt_init_data_exchange(adev);
-
 	amdgpu_fbdev_init(adev);
 
 	r = amdgpu_pm_sysfs_init(adev);
@@ -2779,7 +2797,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 			struct drm_framebuffer *fb = crtc->primary->fb;
 			struct amdgpu_bo *robj;
 
-			if (amdgpu_crtc->cursor_bo) {
+			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
 				r = amdgpu_bo_reserve(aobj, true);
 				if (r == 0) {
@@ -2887,7 +2905,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
-			if (amdgpu_crtc->cursor_bo) {
+			if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
 				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
 				r = amdgpu_bo_reserve(aobj, true);
 				if (r == 0) {
@@ -3207,6 +3225,7 @@ static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 	r = amdgpu_ib_ring_tests(adev);
 
 error:
+	amdgpu_virt_init_data_exchange(adev);
 	amdgpu_virt_release_full_gpu(adev, true);
 	if (!r && adev->virt.gim_feature & AMDGIM_FEATURE_GIM_FLR_VRAMLOST) {
 		atomic_inc(&adev->vram_lost_counter);
@@ -3239,6 +3258,8 @@ bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev)
 
 	if (amdgpu_gpu_recovery == -1) {
 		switch (adev->asic_type) {
+		case CHIP_BONAIRE:
+		case CHIP_HAWAII:
 		case CHIP_TOPAZ:
 		case CHIP_TONGA:
 		case CHIP_FIJI:
@@ -3328,10 +3349,31 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 	 */
 	if (need_full_reset) {
 		list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-			r = amdgpu_asic_reset(tmp_adev);
-			if (r)
-				DRM_WARN("ASIC reset failed with err r, %d for drm dev, %s",
+			/* For XGMI run all resets in parallel to speed up the process */
+			if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+				if (!queue_work(system_highpri_wq, &tmp_adev->xgmi_reset_work))
+					r = -EALREADY;
+			} else
+				r = amdgpu_asic_reset(tmp_adev);
+
+			if (r) {
+				DRM_ERROR("ASIC reset failed with err r, %d for drm dev, %s",
 					 r, tmp_adev->ddev->unique);
+				break;
+			}
+		}
+
+		/* For XGMI wait for all PSP resets to complete before proceed */
+		if (!r) {
+			list_for_each_entry(tmp_adev, device_list_handle,
+					    gmc.xgmi.head) {
+				if (tmp_adev->gmc.xgmi.num_physical_nodes > 1) {
+					flush_work(&tmp_adev->xgmi_reset_work);
+					r = tmp_adev->asic_reset_res;
+					if (r)
+						break;
+				}
+			}
 		}
 	}
 
@@ -3434,14 +3476,16 @@ static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
 	mutex_lock(&adev->lock_reset);
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
-	/* Block kfd */
-	amdgpu_amdkfd_pre_reset(adev);
+	/* Block kfd: SRIOV would do it separately */
+	if (!amdgpu_sriov_vf(adev))
+                amdgpu_amdkfd_pre_reset(adev);
 }
 
 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 {
-	/*unlock kfd */
-	amdgpu_amdkfd_post_reset(adev);
+	/*unlock kfd: SRIOV would do it separately */
+	if (!amdgpu_sriov_vf(adev))
+                amdgpu_amdkfd_post_reset(adev);
 	amdgpu_vf_error_trans_all(adev);
 	adev->in_gpu_reset = 0;
 	mutex_unlock(&adev->lock_reset);
@@ -3518,8 +3562,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		if (tmp_adev == adev)
 			continue;
 
-		dev_info(tmp_adev->dev, "GPU reset begin for drm dev %s!\n", adev->ddev->unique);
-
 		amdgpu_device_lock_adev(tmp_adev);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
index 15ce7e681d67..b083b219b1a9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@@ -188,10 +188,12 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 	}
 
-	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
-	if (unlikely(r != 0)) {
-		DRM_ERROR("failed to pin new abo buffer before flip\n");
-		goto unreserve;
+	if (!adev->enable_virtual_display) {
+		r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
+		if (unlikely(r != 0)) {
+			DRM_ERROR("failed to pin new abo buffer before flip\n");
+			goto unreserve;
+		}
 	}
 
 	r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
@@ -211,7 +213,8 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 	amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
 	amdgpu_bo_unreserve(new_abo);
 
-	work->base = amdgpu_bo_gpu_offset(new_abo);
+	if (!adev->enable_virtual_display)
+		work->base = amdgpu_bo_gpu_offset(new_abo);
 	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
 		amdgpu_get_vblank_counter_kms(dev, work->crtc_id);
 
@@ -242,9 +245,10 @@ pflip_cleanup:
 		goto cleanup;
 	}
 unpin:
-	if (unlikely(amdgpu_bo_unpin(new_abo) != 0)) {
-		DRM_ERROR("failed to unpin new abo in error path\n");
-	}
+	if (!adev->enable_virtual_display)
+		if (unlikely(amdgpu_bo_unpin(new_abo) != 0))
+			DRM_ERROR("failed to unpin new abo in error path\n");
+
 unreserve:
 	amdgpu_bo_unreserve(new_abo);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 90f474f98b6e..c806f984bcc5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -865,6 +865,7 @@ static const struct pci_device_id pciidlist[] = {
 	/* VEGAM */
 	{0x1002, 0x694C, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
 	{0x1002, 0x694E, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
+	{0x1002, 0x694F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGAM},
 	/* Vega 10 */
 	{0x1002, 0x6860, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6861, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
@@ -873,7 +874,13 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x6864, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6867, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x6868, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x6869, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x686c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
+	{0x1002, 0x686f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	{0x1002, 0x687f, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA10},
 	/* Vega 12 */
 	{0x1002, 0x69A0, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA12},
@@ -886,6 +893,7 @@ static const struct pci_device_id pciidlist[] = {
 	{0x1002, 0x66A1, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A2, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
+	{0x1002, 0x66A4, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66A7, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	{0x1002, 0x66AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_VEGA20},
 	/* Raven */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
index 7b3d1ebda9df..f4f00217546e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@@ -169,7 +169,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,
 	INIT_LIST_HEAD(&duplicates);
 
 	tv.bo = &bo->tbo;
-	tv.shared = true;
+	tv.num_shared = 1;
 	list_add(&tv.head, &list);
 
 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);
@@ -604,7 +604,10 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			return -ENOENT;
 		abo = gem_to_amdgpu_bo(gobj);
 		tv.bo = &abo->tbo;
-		tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
+		if (abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+			tv.num_shared = 1;
+		else
+			tv.num_shared = 0;
 		list_add(&tv.head, &list);
 	} else {
 		gobj = NULL;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
index d63daba9b17c..f1ddfc50bcc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@@ -54,6 +54,8 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
 
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
 /*
  * GEM objects.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
index 8c57924c075f..81e6070d255b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@@ -99,6 +99,7 @@ struct amdgpu_xgmi {
 	unsigned num_physical_nodes;
 	/* gpu list in the same hive */
 	struct list_head head;
+	bool supported;
 };
 
 struct amdgpu_gmc {
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
index 9ce8c93ec19b..f877bb78d10a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@@ -51,14 +51,12 @@ struct amdgpu_ih_ring {
 struct amdgpu_ih_funcs {
 	/* ring read/write ptr handling, called from interrupt context */
 	u32 (*get_wptr)(struct amdgpu_device *adev);
-	bool (*prescreen_iv)(struct amdgpu_device *adev);
 	void (*decode_iv)(struct amdgpu_device *adev,
 			  struct amdgpu_iv_entry *entry);
 	void (*set_rptr)(struct amdgpu_device *adev);
 };
 
 #define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
-#define amdgpu_ih_prescreen_iv(adev) (adev)->irq.ih_funcs->prescreen_iv((adev))
 #define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
 #define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
index 6b6524f04ce0..b7968f426862 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@@ -145,13 +145,6 @@ static void amdgpu_irq_callback(struct amdgpu_device *adev,
 	u32 ring_index = ih->rptr >> 2;
 	struct amdgpu_iv_entry entry;
 
-	/* Prescreening of high-frequency interrupts */
-	if (!amdgpu_ih_prescreen_iv(adev))
-		return;
-
-	/* Before dispatching irq to IP blocks, send it to amdkfd */
-	amdgpu_amdkfd_interrupt(adev, (const void *) &ih->ring[ring_index]);
-
 	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
 	amdgpu_ih_decode_iv(adev, &entry);
 
@@ -371,39 +364,38 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 	unsigned client_id = entry->client_id;
 	unsigned src_id = entry->src_id;
 	struct amdgpu_irq_src *src;
+	bool handled = false;
 	int r;
 
 	trace_amdgpu_iv(entry);
 
 	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
 		DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
-		return;
-	}
 
-	if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
+	} else	if (src_id >= AMDGPU_MAX_IRQ_SRC_ID) {
 		DRM_DEBUG("Invalid src_id in IV: %d\n", src_id);
-		return;
-	}
 
-	if (adev->irq.virq[src_id]) {
+	} else if (adev->irq.virq[src_id]) {
 		generic_handle_irq(irq_find_mapping(adev->irq.domain, src_id));
-	} else {
-		if (!adev->irq.client[client_id].sources) {
-			DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
-				  client_id, src_id);
-			return;
-		}
 
-		src = adev->irq.client[client_id].sources[src_id];
-		if (!src) {
-			DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
-			return;
-		}
+	} else if (!adev->irq.client[client_id].sources) {
+		DRM_DEBUG("Unregistered interrupt client_id: %d src_id: %d\n",
+			  client_id, src_id);
 
+	} else if ((src = adev->irq.client[client_id].sources[src_id])) {
 		r = src->funcs->process(adev, src, entry);
-		if (r)
+		if (r < 0)
 			DRM_ERROR("error processing interrupt (%d)\n", r);
+		else if (r)
+			handled = true;
+
+	} else {
+		DRM_DEBUG("Unhandled interrupt src_id: %d\n", src_id);
 	}
+
+	/* Send it to amdkfd as well if it isn't already handled */
+	if (!handled)
+		amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
index e0af44fd6a0c..0a17fb1af204 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@@ -32,6 +32,9 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
 	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
 	struct amdgpu_job *job = to_amdgpu_job(s_job);
+	struct amdgpu_task_info ti;
+
+	memset(&ti, 0, sizeof(struct amdgpu_task_info));
 
 	if (amdgpu_ring_soft_recovery(ring, job->vmid, s_job->s_fence->parent)) {
 		DRM_ERROR("ring %s timeout, but soft recovered\n",
@@ -39,9 +42,12 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 		return;
 	}
 
+	amdgpu_vm_get_task_info(ring->adev, job->pasid, &ti);
 	DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
 		  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
 		  ring->fence_drv.sync_seq);
+	DRM_ERROR("Process information: process %s pid %d thread %s pid %d\n",
+		  ti.process_name, ti.tgid, ti.task_name, ti.pid);
 
 	if (amdgpu_device_should_recover_gpu(ring->adev))
 		amdgpu_device_gpu_recover(ring->adev, job);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
index e55508b39496..3e6823fdd939 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@@ -238,44 +238,40 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
  * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
  *
  * @mn: our notifier
- * @mm: the mm this callback is about
- * @start: start of updated range
- * @end: end of updated range
+ * @range: mmu notifier context
  *
  * Block for operations on BOs to finish and mark pages as accessed and
  * potentially dirty.
  */
 static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+			const struct mmu_notifier_range *range)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
+	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end -= 1;
+	end = range->end - 1;
 
 	/* TODO we should be able to split locking for interval tree and
 	 * amdgpu_mn_invalidate_node
 	 */
-	if (amdgpu_mn_read_lock(amn, blockable))
+	if (amdgpu_mn_read_lock(amn, range->blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, start, end);
+	it = interval_tree_iter_first(&amn->objects, range->start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 
-		if (!blockable) {
+		if (!range->blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, start, end);
+		it = interval_tree_iter_next(it, range->start, end);
 
-		amdgpu_mn_invalidate_node(node, start, end);
+		amdgpu_mn_invalidate_node(node, range->start, end);
 	}
 
 	return 0;
@@ -294,39 +290,38 @@ static int amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
  * are restorted in amdgpu_mn_invalidate_range_end_hsa.
  */
 static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
-						 struct mm_struct *mm,
-						 unsigned long start,
-						 unsigned long end,
-						 bool blockable)
+			const struct mmu_notifier_range *range)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;
+	unsigned long end;
 
 	/* notification is exclusive, but interval is inclusive */
-	end -= 1;
+	end = range->end - 1;
 
-	if (amdgpu_mn_read_lock(amn, blockable))
+	if (amdgpu_mn_read_lock(amn, range->blockable))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&amn->objects, start, end);
+	it = interval_tree_iter_first(&amn->objects, range->start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
 
-		if (!blockable) {
+		if (!range->blockable) {
 			amdgpu_mn_read_unlock(amn);
 			return -EAGAIN;
 		}
 
 		node = container_of(it, struct amdgpu_mn_node, it);
-		it = interval_tree_iter_next(it, start, end);
+		it = interval_tree_iter_next(it, range->start, end);
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
 			struct kgd_mem *mem = bo->kfd_bo;
 
 			if (amdgpu_ttm_tt_affect_userptr(bo->tbo.ttm,
-							 start, end))
-				amdgpu_amdkfd_evict_userptr(mem, mm);
+							 range->start,
+							 end))
+				amdgpu_amdkfd_evict_userptr(mem, range->mm);
 		}
 	}
 
@@ -344,9 +339,7 @@ static int amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
  * Release the lock again to allow new command submissions.
  */
 static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
-					   struct mm_struct *mm,
-					   unsigned long start,
-					   unsigned long end)
+			const struct mmu_notifier_range *range)
 {
 	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
index cf768acb51dc..728e15e5d68a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@@ -81,7 +81,7 @@ static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
 		amdgpu_bo_subtract_pin_size(bo);
 
 	if (bo->kfd_bo)
-		amdgpu_amdkfd_unreserve_system_memory_limit(bo);
+		amdgpu_amdkfd_unreserve_memory_limit(bo);
 
 	amdgpu_bo_kunmap(bo);
 
@@ -608,53 +608,6 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 }
 
 /**
- * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
- * @adev: amdgpu device object
- * @ring: amdgpu_ring for the engine handling the buffer operations
- * @bo: &amdgpu_bo buffer to be backed up
- * @resv: reservation object with embedded fence
- * @fence: dma_fence associated with the operation
- * @direct: whether to submit the job directly
- *
- * Copies an &amdgpu_bo buffer object to its shadow object.
- * Not used for now.
- *
- * Returns:
- * 0 for success or a negative error code on failure.
- */
-int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
-			       struct amdgpu_ring *ring,
-			       struct amdgpu_bo *bo,
-			       struct reservation_object *resv,
-			       struct dma_fence **fence,
-			       bool direct)
-
-{
-	struct amdgpu_bo *shadow = bo->shadow;
-	uint64_t bo_addr, shadow_addr;
-	int r;
-
-	if (!shadow)
-		return -EINVAL;
-
-	bo_addr = amdgpu_bo_gpu_offset(bo);
-	shadow_addr = amdgpu_bo_gpu_offset(bo->shadow);
-
-	r = reservation_object_reserve_shared(bo->tbo.resv, 1);
-	if (r)
-		goto err;
-
-	r = amdgpu_copy_buffer(ring, bo_addr, shadow_addr,
-			       amdgpu_bo_size(bo), resv, fence,
-			       direct, false);
-	if (!r)
-		amdgpu_bo_fence(bo, *fence, true);
-
-err:
-	return r;
-}
-
-/**
  * amdgpu_bo_validate - validate an &amdgpu_bo buffer object
  * @bo: pointer to the buffer object
  *
@@ -959,7 +912,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 	struct ttm_operation_ctx ctx = { false, false };
 	int r, i;
 
-	if (!bo->pin_count) {
+	if (WARN_ON_ONCE(!bo->pin_count)) {
 		dev_warn(adev->dev, "%p unpin not necessary\n", bo);
 		return 0;
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
index 7d3312d0da11..9291c2f837e9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@@ -267,11 +267,6 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
-int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
-			       struct amdgpu_ring *ring,
-			       struct amdgpu_bo *bo,
-			       struct reservation_object *resv,
-			       struct dma_fence **fence, bool direct);
 int amdgpu_bo_validate(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
 			     struct dma_fence **fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
index 1f61ed95727c..6896dec97fc7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@@ -2008,6 +2008,7 @@ void amdgpu_pm_print_power_states(struct amdgpu_device *adev)
 
 int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 {
+	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
 	int ret;
 
 	if (adev->pm.sysfs_initialized)
@@ -2091,12 +2092,14 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 				"pp_power_profile_mode\n");
 		return ret;
 	}
-	ret = device_create_file(adev->dev,
-			&dev_attr_pp_od_clk_voltage);
-	if (ret) {
-		DRM_ERROR("failed to create device file	"
-				"pp_od_clk_voltage\n");
-		return ret;
+	if (hwmgr->od_enabled) {
+		ret = device_create_file(adev->dev,
+				&dev_attr_pp_od_clk_voltage);
+		if (ret) {
+			DRM_ERROR("failed to create device file	"
+					"pp_od_clk_voltage\n");
+			return ret;
+		}
 	}
 	ret = device_create_file(adev->dev,
 			&dev_attr_gpu_busy_percent);
@@ -2118,6 +2121,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 
 void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 {
+	struct pp_hwmgr *hwmgr = adev->powerplay.pp_handle;
+
 	if (adev->pm.dpm_enabled == 0)
 		return;
 
@@ -2138,8 +2143,9 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 	device_remove_file(adev->dev,
 			&dev_attr_pp_power_profile_mode);
-	device_remove_file(adev->dev,
-			&dev_attr_pp_od_clk_voltage);
+	if (hwmgr->od_enabled)
+		device_remove_file(adev->dev,
+				&dev_attr_pp_od_clk_voltage);
 	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
 }
 
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
index 3e44d889f7af..71913a18d142 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@@ -39,8 +39,6 @@
 #include <drm/amdgpu_drm.h>
 #include <linux/dma-buf.h>
 
-static const struct dma_buf_ops amdgpu_dmabuf_ops;
-
 /**
  * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
  * implementation
@@ -332,7 +330,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 	return ret;
 }
 
-static const struct dma_buf_ops amdgpu_dmabuf_ops = {
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
 	.attach = amdgpu_gem_map_attach,
 	.detach = amdgpu_gem_map_detach,
 	.map_dma_buf = drm_gem_map_dma_buf,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index e05dc66b1090..8fab0d637ee5 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -155,10 +155,14 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	return ret;
 }
 
-static void psp_prep_tmr_cmd_buf(struct psp_gfx_cmd_resp *cmd,
+static void psp_prep_tmr_cmd_buf(struct psp_context *psp,
+				 struct psp_gfx_cmd_resp *cmd,
 				 uint64_t tmr_mc, uint32_t size)
 {
-	cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
+	if (psp_support_vmr_ring(psp))
+		cmd->cmd_id = GFX_CMD_ID_SETUP_VMR;
+	else
+		cmd->cmd_id = GFX_CMD_ID_SETUP_TMR;
 	cmd->cmd.cmd_setup_tmr.buf_phy_addr_lo = lower_32_bits(tmr_mc);
 	cmd->cmd.cmd_setup_tmr.buf_phy_addr_hi = upper_32_bits(tmr_mc);
 	cmd->cmd.cmd_setup_tmr.buf_size = size;
@@ -192,7 +196,7 @@ static int psp_tmr_load(struct psp_context *psp)
 	if (!cmd)
 		return -ENOMEM;
 
-	psp_prep_tmr_cmd_buf(cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
+	psp_prep_tmr_cmd_buf(psp, cmd, psp->tmr_mc_addr, PSP_TMR_SIZE);
 	DRM_INFO("reserve 0x%x from 0x%llx for PSP TMR SIZE\n",
 			PSP_TMR_SIZE, psp->tmr_mc_addr);
 
@@ -536,8 +540,10 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	int ret;
 	struct psp_context *psp = &adev->psp;
 
-	if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset != 0)
+	if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
+		psp_ring_destroy(psp, PSP_RING_TYPE__KM);
 		goto skip_memalloc;
+	}
 
 	psp->cmd = kzalloc(sizeof(struct psp_gfx_cmd_resp), GFP_KERNEL);
 	if (!psp->cmd)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 9ec5d1a666a6..3ee573b4016e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -83,12 +83,13 @@ struct psp_funcs
 				  enum AMDGPU_UCODE_ID ucode_type);
 	bool (*smu_reload_quirk)(struct psp_context *psp);
 	int (*mode1_reset)(struct psp_context *psp);
-	uint64_t (*xgmi_get_node_id)(struct psp_context *psp);
-	uint64_t (*xgmi_get_hive_id)(struct psp_context *psp);
+	int (*xgmi_get_node_id)(struct psp_context *psp, uint64_t *node_id);
+	int (*xgmi_get_hive_id)(struct psp_context *psp, uint64_t *hive_id);
 	int (*xgmi_get_topology_info)(struct psp_context *psp, int number_devices,
 				      struct psp_xgmi_topology_info *topology);
 	int (*xgmi_set_topology_info)(struct psp_context *psp, int number_devices,
 				      struct psp_xgmi_topology_info *topology);
+	bool (*support_vmr_ring)(struct psp_context *psp);
 };
 
 struct psp_xgmi_context {
@@ -192,12 +193,14 @@ struct psp_xgmi_topology_info {
 		((psp)->funcs->bootloader_load_sos ? (psp)->funcs->bootloader_load_sos((psp)) : 0)
 #define psp_smu_reload_quirk(psp) \
 		((psp)->funcs->smu_reload_quirk ? (psp)->funcs->smu_reload_quirk((psp)) : false)
+#define psp_support_vmr_ring(psp) \
+		((psp)->funcs->support_vmr_ring ? (psp)->funcs->support_vmr_ring((psp)) : false)
 #define psp_mode1_reset(psp) \
 		((psp)->funcs->mode1_reset ? (psp)->funcs->mode1_reset((psp)) : false)
-#define psp_xgmi_get_node_id(psp) \
-		((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp)) : 0)
-#define psp_xgmi_get_hive_id(psp) \
-		((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp)) : 0)
+#define psp_xgmi_get_node_id(psp, node_id) \
+		((psp)->funcs->xgmi_get_node_id ? (psp)->funcs->xgmi_get_node_id((psp), (node_id)) : -EINVAL)
+#define psp_xgmi_get_hive_id(psp, hive_id) \
+		((psp)->funcs->xgmi_get_hive_id ? (psp)->funcs->xgmi_get_hive_id((psp), (hive_id)) : -EINVAL)
 #define psp_xgmi_get_topology_info(psp, num_device, topology) \
 		((psp)->funcs->xgmi_get_topology_info ? \
 		(psp)->funcs->xgmi_get_topology_info((psp), (num_device), (topology)) : -EINVAL)
@@ -217,7 +220,6 @@ extern const struct amdgpu_ip_block_version psp_v10_0_ip_block;
 
 int psp_gpu_reset(struct amdgpu_device *adev);
 int psp_xgmi_invoke(struct psp_context *psp, uint32_t ta_cmd_id);
-
 extern const struct amdgpu_ip_block_version psp_v11_0_ip_block;
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 5b75bdc8dc28..335a0edf114b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -397,7 +397,7 @@ bool amdgpu_ring_soft_recovery(struct amdgpu_ring *ring, unsigned int vmid,
 {
 	ktime_t deadline = ktime_add_us(ktime_get(), 10000);
 
-	if (!ring->funcs->soft_recovery)
+	if (!ring->funcs->soft_recovery || !fence)
 		return false;
 
 	atomic_inc(&ring->adev->gpu_reset_counter);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 0beb01fef83f..d87e828a084b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -29,7 +29,7 @@
 #include <drm/drm_print.h>
 
 /* max number of rings */
-#define AMDGPU_MAX_RINGS		21
+#define AMDGPU_MAX_RINGS		23
 #define AMDGPU_MAX_GFX_RINGS		1
 #define AMDGPU_MAX_COMPUTE_RINGS	8
 #define AMDGPU_MAX_VCE_RINGS		3
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
index e2e42e3fbcf3..ecf6f96df2ad 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@@ -262,7 +262,7 @@ static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
 
 				ring = &adev->vcn.ring_dec;
 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
@@ -322,7 +322,7 @@ static int amdgpu_vcn_pause_dpg_mode(struct amdgpu_device *adev,
 
 				ring = &adev->vcn.ring_dec;
 				WREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR,
-						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2));
+						   RREG32_SOC15(UVD, 0, mmUVD_SCRATCH2) & 0x7FFFFFFF);
 				SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
 						   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON,
 						   UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
@@ -396,16 +396,26 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 
 	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
 		struct dpg_pause_state new_state;
+		unsigned int fences = 0;
+		unsigned int i;
 
-		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
+			fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
+		}
+		if (fences)
 			new_state.fw_based = VCN_DPG_STATE__PAUSE;
 		else
-			new_state.fw_based = adev->vcn.pause_state.fw_based;
+			new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
 
-		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+		if (amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg))
 			new_state.jpeg = VCN_DPG_STATE__PAUSE;
 		else
-			new_state.jpeg = adev->vcn.pause_state.jpeg;
+			new_state.jpeg = VCN_DPG_STATE__UNPAUSE;
+
+		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			new_state.fw_based = VCN_DPG_STATE__PAUSE;
+		else if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
+			new_state.jpeg = VCN_DPG_STATE__PAUSE;
 
 		amdgpu_vcn_pause_dpg_mode(adev, &new_state);
 	}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 58a2363040dd..d2ea5ce2cefb 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -181,7 +181,7 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 
 	if (level == adev->vm_manager.root_level)
 		/* For the root directory */
-		return round_up(adev->vm_manager.max_pfn, 1 << shift) >> shift;
+		return round_up(adev->vm_manager.max_pfn, 1ULL << shift) >> shift;
 	else if (level != AMDGPU_VM_PTB)
 		/* Everything in between */
 		return 512;
@@ -617,7 +617,8 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 {
 	entry->priority = 0;
 	entry->tv.bo = &vm->root.base.bo->tbo;
-	entry->tv.shared = true;
+	/* One for the VM updates, one for TTM and one for the CS job */
+	entry->tv.num_shared = 3;
 	entry->user_pages = NULL;
 	list_add(&entry->tv.head, validated);
 }
@@ -773,10 +774,6 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 
 	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);
 
-	r = reservation_object_reserve_shared(bo->tbo.resv, 1);
-	if (r)
-		return r;
-
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (r)
 		goto error;
@@ -850,9 +847,6 @@ static void amdgpu_vm_bo_param(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	bp->size = amdgpu_vm_bo_size(adev, level);
 	bp->byte_align = AMDGPU_GPU_PAGE_SIZE;
 	bp->domain = AMDGPU_GEM_DOMAIN_VRAM;
-	if (bp->size <= PAGE_SIZE && adev->asic_type >= CHIP_VEGA10 &&
-	    adev->flags & AMD_IS_APU)
-		bp->domain |= AMDGPU_GEM_DOMAIN_GTT;
 	bp->domain = amdgpu_bo_get_preferred_pin_domain(adev, bp->domain);
 	bp->flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 		AMDGPU_GEM_CREATE_CPU_GTT_USWC;
@@ -1656,9 +1650,11 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			if (!amdgpu_vm_pt_descendant(adev, &cursor))
 				return -ENOENT;
 			continue;
-		} else if (frag >= parent_shift) {
+		} else if (frag >= parent_shift &&
+			   cursor.level - 1 != adev->vm_manager.root_level) {
 			/* If the fragment size is even larger than the parent
-			 * shift we should go up one level and check it again.
+			 * shift we should go up one level and check it again
+			 * unless one level up is the root level.
 			 */
 			if (!amdgpu_vm_pt_ancestor(&cursor))
 				return -ENOENT;
@@ -1666,10 +1662,10 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 		}
 
 		/* Looks good so far, calculate parameters for the update */
-		incr = AMDGPU_GPU_PAGE_SIZE << shift;
+		incr = (uint64_t)AMDGPU_GPU_PAGE_SIZE << shift;
 		mask = amdgpu_vm_entries_mask(adev, cursor.level);
 		pe_start = ((cursor.pfn >> shift) & mask) * 8;
-		entry_end = (mask + 1) << shift;
+		entry_end = (uint64_t)(mask + 1) << shift;
 		entry_end += cursor.pfn & ~(entry_end - 1);
 		entry_end = min(entry_end, end);
 
@@ -1682,7 +1678,7 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 					      flags | AMDGPU_PTE_FRAG(frag));
 
 			pe_start += nptes * 8;
-			dst += nptes * AMDGPU_GPU_PAGE_SIZE << shift;
+			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
 
 			frag_start = upd_end;
 			if (frag_start >= frag_end) {
@@ -1842,10 +1838,6 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;
 
-	r = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
-	if (r)
-		goto error_free;
-
 	r = amdgpu_vm_update_ptes(&params, start, last + 1, addr, flags);
 	if (r)
 		goto error_free;
@@ -3026,6 +3018,10 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	if (r)
 		goto error_free_root;
 
+	r = reservation_object_reserve_shared(root->tbo.resv, 1);
+	if (r)
+		goto error_unreserve;
+
 	r = amdgpu_vm_clear_bo(adev, vm, root,
 			       adev->vm_manager.root_level,
 			       vm->pte_support_ats);
@@ -3055,7 +3051,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 
 	INIT_KFIFO(vm->faults);
-	vm->fault_credit = 16;
 
 	return 0;
 
@@ -3268,42 +3263,6 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 }
 
 /**
- * amdgpu_vm_pasid_fault_credit - Check fault credit for given PASID
- *
- * @adev: amdgpu_device pointer
- * @pasid: PASID do identify the VM
- *
- * This function is expected to be called in interrupt context.
- *
- * Returns:
- * True if there was fault credit, false otherwise
- */
-bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
-				  unsigned int pasid)
-{
-	struct amdgpu_vm *vm;
-
-	spin_lock(&adev->vm_manager.pasid_lock);
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-	if (!vm) {
-		/* VM not found, can't track fault credit */
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		return true;
-	}
-
-	/* No lock needed. only accessed by IRQ handler */
-	if (!vm->fault_credit) {
-		/* Too many faults in this VM */
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		return false;
-	}
-
-	vm->fault_credit--;
-	spin_unlock(&adev->vm_manager.pasid_lock);
-	return true;
-}
-
-/**
  * amdgpu_vm_manager_init - init the VM manager
  *
  * @adev: amdgpu_device pointer
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index 2a8898d19c8b..e8dcfd59fc93 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -229,9 +229,6 @@ struct amdgpu_vm {
 	/* Up to 128 pending retry page faults */
 	DECLARE_KFIFO(faults, u64, 128);
 
-	/* Limit non-retry fault storms */
-	unsigned int		fault_credit;
-
 	/* Points to the KFD process VM info */
 	struct amdkfd_process_info *process_info;
 
@@ -299,8 +296,6 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, unsigned int pasid);
 void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
-bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
-				  unsigned int pasid);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index fb37e69f1bba..8a8bc60cb6b4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -78,7 +78,7 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 			adev->gmc.xgmi.node_id,
 			adev->gmc.xgmi.hive_id, ret);
 	else
-		dev_info(adev->dev, "XGMI: Add node %d to hive 0x%llx.\n",
+		dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
 			 adev->gmc.xgmi.physical_node_id,
 				 adev->gmc.xgmi.hive_id);
 
@@ -94,11 +94,22 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 
 	int count = 0, ret = -EINVAL;
 
-	if ((adev->asic_type < CHIP_VEGA20) ||
-		(adev->flags & AMD_IS_APU) )
+	if (!adev->gmc.xgmi.supported)
 		return 0;
-	adev->gmc.xgmi.node_id = psp_xgmi_get_node_id(&adev->psp);
-	adev->gmc.xgmi.hive_id = psp_xgmi_get_hive_id(&adev->psp);
+
+	ret = psp_xgmi_get_node_id(&adev->psp, &adev->gmc.xgmi.node_id);
+	if (ret) {
+		dev_err(adev->dev,
+			"XGMI: Failed to get node id\n");
+		return ret;
+	}
+
+	ret = psp_xgmi_get_hive_id(&adev->psp, &adev->gmc.xgmi.hive_id);
+	if (ret) {
+		dev_err(adev->dev,
+			"XGMI: Failed to get hive id\n");
+		return ret;
+	}
 
 	mutex_lock(&xgmi_mutex);
 	hive = amdgpu_get_xgmi_hive(adev);
@@ -135,3 +146,23 @@ exit:
 	mutex_unlock(&xgmi_mutex);
 	return ret;
 }
+
+void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
+{
+	struct amdgpu_hive_info *hive;
+
+	if (!adev->gmc.xgmi.supported)
+		return;
+
+	mutex_lock(&xgmi_mutex);
+
+	hive = amdgpu_get_xgmi_hive(adev);
+	if (!hive)
+		goto exit;
+
+	if (!(hive->number_devices--))
+		mutex_destroy(&hive->hive_lock);
+
+exit:
+	mutex_unlock(&xgmi_mutex);
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 6335bfdcc51d..6151eb9c8ad3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -35,5 +35,6 @@ struct amdgpu_hive_info {
 struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
+void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
 
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/cik_ih.c b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
index b5775c6a857b..8a8b4967a101 100644
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@@ -228,34 +228,6 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
  * [127:96] - reserved
  */
 
-/**
- * cik_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool cik_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u16 pasid;
-
-	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
-	case 146:
-	case 147:
-		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
-		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
-			return true;
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	adev->irq.ih.rptr += 16;
-	return false;
-}
-
  /**
  * cik_ih_decode_iv - decode an interrupt vector
  *
@@ -461,7 +433,6 @@ static const struct amd_ip_funcs cik_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs cik_ih_funcs = {
 	.get_wptr = cik_ih_get_wptr,
-	.prescreen_iv = cik_ih_prescreen_iv,
 	.decode_iv = cik_ih_decode_iv,
 	.set_rptr = cik_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/cz_ih.c b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
index df5ac4d85a00..9d3ea298e116 100644
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@@ -208,34 +208,6 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
 }
 
 /**
- * cz_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool cz_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u16 pasid;
-
-	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
-	case 146:
-	case 147:
-		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
-		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
-			return true;
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	adev->irq.ih.rptr += 16;
-	return false;
-}
-
-/**
  * cz_ih_decode_iv - decode an interrupt vector
  *
  * @adev: amdgpu_device pointer
@@ -442,7 +414,6 @@ static const struct amd_ip_funcs cz_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs cz_ih_funcs = {
 	.get_wptr = cz_ih_get_wptr,
-	.prescreen_iv = cz_ih_prescreen_iv,
 	.decode_iv = cz_ih_decode_iv,
 	.set_rptr = cz_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
index fdace004544d..e4cc1d48eaab 100644
--- a/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_virtual.c
@@ -167,19 +167,6 @@ static void dce_virtual_crtc_disable(struct drm_crtc *crtc)
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
 
 	dce_virtual_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
-	if (crtc->primary->fb) {
-		int r;
-		struct amdgpu_bo *abo;
-
-		abo = gem_to_amdgpu_bo(crtc->primary->fb->obj[0]);
-		r = amdgpu_bo_reserve(abo, true);
-		if (unlikely(r))
-			DRM_ERROR("failed to reserve abo before unpin\n");
-		else {
-			amdgpu_bo_unpin(abo);
-			amdgpu_bo_unreserve(abo);
-		}
-	}
 
 	amdgpu_crtc->pll_id = ATOM_PPLL_INVALID;
 	amdgpu_crtc->encoder = NULL;
@@ -692,7 +679,9 @@ static int dce_virtual_pageflip(struct amdgpu_device *adev,
 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);
 
 	drm_crtc_vblank_put(&amdgpu_crtc->base);
-	schedule_work(&works->unpin_work);
+	amdgpu_bo_unref(&works->old_abo);
+	kfree(works->shared);
+	kfree(works);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
index 1454fc306783..57cb3a51bda7 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@@ -4068,6 +4068,11 @@ static void gfx_v8_0_rlc_start(struct amdgpu_device *adev)
 
 static int gfx_v8_0_rlc_resume(struct amdgpu_device *adev)
 {
+	if (amdgpu_sriov_vf(adev)) {
+		gfx_v8_0_init_csb(adev);
+		return 0;
+	}
+
 	adev->gfx.rlc.funcs->stop(adev);
 	adev->gfx.rlc.funcs->reset(adev);
 	gfx_v8_0_init_pg(adev);
@@ -4228,7 +4233,6 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	u32 tmp;
 	u32 rb_bufsz;
 	u64 rb_addr, rptr_addr, wptr_gpu_addr;
-	int r;
 
 	/* Set the write pointer delay */
 	WREG32(mmCP_RB_WPTR_DELAY, 0);
@@ -4273,9 +4277,8 @@ static int gfx_v8_0_cp_gfx_resume(struct amdgpu_device *adev)
 	amdgpu_ring_clear_ring(ring);
 	gfx_v8_0_cp_gfx_start(adev);
 	ring->sched.ready = true;
-	r = amdgpu_ring_test_helper(ring);
 
-	return r;
+	return 0;
 }
 
 static void gfx_v8_0_cp_compute_enable(struct amdgpu_device *adev, bool enable)
@@ -4364,10 +4367,9 @@ static int gfx_v8_0_kiq_kcq_enable(struct amdgpu_device *adev)
 		amdgpu_ring_write(kiq_ring, upper_32_bits(wptr_addr));
 	}
 
-	r = amdgpu_ring_test_helper(kiq_ring);
-	if (r)
-		DRM_ERROR("KCQ enable failed\n");
-	return r;
+	amdgpu_ring_commit(kiq_ring);
+
+	return 0;
 }
 
 static int gfx_v8_0_deactivate_hqd(struct amdgpu_device *adev, u32 req)
@@ -4704,16 +4706,32 @@ static int gfx_v8_0_kcq_resume(struct amdgpu_device *adev)
 	if (r)
 		goto done;
 
-	/* Test KCQs - reversing the order of rings seems to fix ring test failure
-	 * after GPU reset
-	 */
-	for (i = adev->gfx.num_compute_rings - 1; i >= 0; i--) {
+done:
+	return r;
+}
+
+static int gfx_v8_0_cp_test_all_rings(struct amdgpu_device *adev)
+{
+	int r, i;
+	struct amdgpu_ring *ring;
+
+	/* collect all the ring_tests here, gfx, kiq, compute */
+	ring = &adev->gfx.gfx_ring[0];
+	r = amdgpu_ring_test_helper(ring);
+	if (r)
+		return r;
+
+	ring = &adev->gfx.kiq.ring;
+	r = amdgpu_ring_test_helper(ring);
+	if (r)
+		return r;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
-		r = amdgpu_ring_test_helper(ring);
+		amdgpu_ring_test_helper(ring);
 	}
 
-done:
-	return r;
+	return 0;
 }
 
 static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
@@ -4734,6 +4752,11 @@ static int gfx_v8_0_cp_resume(struct amdgpu_device *adev)
 	r = gfx_v8_0_kcq_resume(adev);
 	if (r)
 		return r;
+
+	r = gfx_v8_0_cp_test_all_rings(adev);
+	if (r)
+		return r;
+
 	gfx_v8_0_enable_gui_idle_interrupt(adev, true);
 
 	return 0;
@@ -4947,14 +4970,13 @@ static bool gfx_v8_0_check_soft_reset(void *handle)
 static int gfx_v8_0_pre_soft_reset(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+	u32 grbm_soft_reset = 0;
 
 	if ((!adev->gfx.grbm_soft_reset) &&
 	    (!adev->gfx.srbm_soft_reset))
 		return 0;
 
 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
-	srbm_soft_reset = adev->gfx.srbm_soft_reset;
 
 	/* stop the rlc */
 	adev->gfx.rlc.funcs->stop(adev);
@@ -5051,14 +5073,13 @@ static int gfx_v8_0_soft_reset(void *handle)
 static int gfx_v8_0_post_soft_reset(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	u32 grbm_soft_reset = 0, srbm_soft_reset = 0;
+	u32 grbm_soft_reset = 0;
 
 	if ((!adev->gfx.grbm_soft_reset) &&
 	    (!adev->gfx.srbm_soft_reset))
 		return 0;
 
 	grbm_soft_reset = adev->gfx.grbm_soft_reset;
-	srbm_soft_reset = adev->gfx.srbm_soft_reset;
 
 	if (REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CP) ||
 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_CPF) ||
@@ -5083,6 +5104,8 @@ static int gfx_v8_0_post_soft_reset(void *handle)
 	    REG_GET_FIELD(grbm_soft_reset, GRBM_SOFT_RESET, SOFT_RESET_GFX))
 		gfx_v8_0_cp_gfx_resume(adev);
 
+	gfx_v8_0_cp_test_all_rings(adev);
+
 	adev->gfx.rlc.funcs->start(adev);
 
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
index af8ccb014be3..fbca0494f871 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@@ -86,6 +86,7 @@ MODULE_FIRMWARE("amdgpu/picasso_me.bin");
 MODULE_FIRMWARE("amdgpu/picasso_mec.bin");
 MODULE_FIRMWARE("amdgpu/picasso_mec2.bin");
 MODULE_FIRMWARE("amdgpu/picasso_rlc.bin");
+MODULE_FIRMWARE("amdgpu/picasso_rlc_am4.bin");
 
 MODULE_FIRMWARE("amdgpu/raven2_ce.bin");
 MODULE_FIRMWARE("amdgpu/raven2_pfp.bin");
@@ -112,7 +113,10 @@ static const struct soc15_reg_golden golden_settings_gc_9_0[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_HI, 0xffffffff, 0x4a2c0e68),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTCP_CHAN_STEER_LO, 0xffffffff, 0xb5d3f197),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_CACHE_INVALIDATION, 0x3fff3af3, 0x19200000),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmVGT_GS_MAX_WAVE_ID, 0x00000fff, 0x000003ff),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
@@ -134,10 +138,7 @@ static const struct soc15_reg_golden golden_settings_gc_9_0_vg10[] =
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmRMI_UTCL1_CNTL2, 0x00030000, 0x00020000),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmSPI_CONFIG_CNTL_1, 0x0000000f, 0x01000107),
 	SOC15_REG_GOLDEN_VALUE(GC, 0, mmTD_CNTL, 0x00001800, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC1_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_MEC2_F32_INT_DIS, 0x00000000, 0x00000800),
-	SOC15_REG_GOLDEN_VALUE(GC, 0, mmCP_DEBUG, 0x00000000, 0x00008000)
+	SOC15_REG_GOLDEN_VALUE(GC, 0, mmWD_UTCL1_CNTL, 0x08000000, 0x08000080)
 };
 
 static const struct soc15_reg_golden golden_settings_gc_9_0_vg20[] =
@@ -645,7 +646,20 @@ static int gfx_v9_0_init_microcode(struct amdgpu_device *adev)
 	adev->gfx.ce_fw_version = le32_to_cpu(cp_hdr->header.ucode_version);
 	adev->gfx.ce_feature_version = le32_to_cpu(cp_hdr->ucode_feature_version);
 
-	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
+	/*
+	 * For Picasso && AM4 SOCKET board, we use picasso_rlc_am4.bin
+	 * instead of picasso_rlc.bin.
+	 * Judgment method:
+	 * PCO AM4: revision >= 0xC8 && revision <= 0xCF
+	 *          or revision >= 0xD8 && revision <= 0xDF
+	 * otherwise is PCO FP5
+	 */
+	if (!strcmp(chip_name, "picasso") &&
+		(((adev->pdev->revision >= 0xC8) && (adev->pdev->revision <= 0xCF)) ||
+		((adev->pdev->revision >= 0xD8) && (adev->pdev->revision <= 0xDF))))
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc_am4.bin", chip_name);
+	else
+		snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_rlc.bin", chip_name);
 	err = request_firmware(&adev->gfx.rlc_fw, fw_name, adev->dev);
 	if (err)
 		goto out;
@@ -2326,12 +2340,13 @@ static void gfx_v9_0_rlc_start(struct amdgpu_device *adev)
 #endif
 
 	WREG32_FIELD15(GC, 0, RLC_CNTL, RLC_ENABLE_F32, 1);
+	udelay(50);
 
 	/* carrizo do enable cp interrupt after cp inited */
-	if (!(adev->flags & AMD_IS_APU))
+	if (!(adev->flags & AMD_IS_APU)) {
 		gfx_v9_0_enable_gui_idle_interrupt(adev, true);
-
-	udelay(50);
+		udelay(50);
+	}
 
 #ifdef AMDGPU_RLC_DEBUG_RETRY
 	/* RLC_GPM_GENERAL_6 : RLC Ucode version */
@@ -3572,6 +3587,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 {
 	uint32_t data, def;
 
+	amdgpu_gfx_rlc_enter_safe_mode(adev);
+
 	/* It is disabled by HW by default */
 	if (enable && (adev->cg_flags & AMD_CG_SUPPORT_GFX_MGCG)) {
 		/* 1 - RLC_CGTT_MGCG_OVERRIDE */
@@ -3636,6 +3653,8 @@ static void gfx_v9_0_update_medium_grain_clock_gating(struct amdgpu_device *adev
 			WREG32_SOC15(GC, 0, mmCP_MEM_SLP_CNTL, data);
 		}
 	}
+
+	amdgpu_gfx_rlc_exit_safe_mode(adev);
 }
 
 static void gfx_v9_0_update_3d_clock_gating(struct amdgpu_device *adev,
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
index 531aaf377592..1ad7e6b8ed1d 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@@ -56,6 +56,9 @@ MODULE_FIRMWARE("amdgpu/tonga_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris11_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris10_mc.bin");
 MODULE_FIRMWARE("amdgpu/polaris12_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris11_k_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris10_k_mc.bin");
+MODULE_FIRMWARE("amdgpu/polaris12_k_mc.bin");
 
 static const u32 golden_settings_tonga_a11[] =
 {
@@ -224,13 +227,39 @@ static int gmc_v8_0_init_microcode(struct amdgpu_device *adev)
 		chip_name = "tonga";
 		break;
 	case CHIP_POLARIS11:
-		chip_name = "polaris11";
+		if (((adev->pdev->device == 0x67ef) &&
+		     ((adev->pdev->revision == 0xe0) ||
+		      (adev->pdev->revision == 0xe5))) ||
+		    ((adev->pdev->device == 0x67ff) &&
+		     ((adev->pdev->revision == 0xcf) ||
+		      (adev->pdev->revision == 0xef) ||
+		      (adev->pdev->revision == 0xff))))
+			chip_name = "polaris11_k";
+		else if ((adev->pdev->device == 0x67ef) &&
+			 (adev->pdev->revision == 0xe2))
+			chip_name = "polaris11_k";
+		else
+			chip_name = "polaris11";
 		break;
 	case CHIP_POLARIS10:
-		chip_name = "polaris10";
+		if ((adev->pdev->device == 0x67df) &&
+		    ((adev->pdev->revision == 0xe1) ||
+		     (adev->pdev->revision == 0xf7)))
+			chip_name = "polaris10_k";
+		else
+			chip_name = "polaris10";
 		break;
 	case CHIP_POLARIS12:
-		chip_name = "polaris12";
+		if (((adev->pdev->device == 0x6987) &&
+		     ((adev->pdev->revision == 0xc0) ||
+		      (adev->pdev->revision == 0xc3))) ||
+		    ((adev->pdev->device == 0x6981) &&
+		     ((adev->pdev->revision == 0x00) ||
+		      (adev->pdev->revision == 0x01) ||
+		      (adev->pdev->revision == 0x10))))
+			chip_name = "polaris12_k";
+		else
+			chip_name = "polaris12";
 		break;
 	case CHIP_FIJI:
 	case CHIP_CARRIZO:
@@ -337,7 +366,7 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 	const struct mc_firmware_header_v1_0 *hdr;
 	const __le32 *fw_data = NULL;
 	const __le32 *io_mc_regs = NULL;
-	u32 data, vbios_version;
+	u32 data;
 	int i, ucode_size, regs_size;
 
 	/* Skip MC ucode loading on SR-IOV capable boards.
@@ -348,13 +377,6 @@ static int gmc_v8_0_polaris_mc_load_microcode(struct amdgpu_device *adev)
 	if (amdgpu_sriov_bios(adev))
 		return 0;
 
-	WREG32(mmMC_SEQ_IO_DEBUG_INDEX, 0x9F);
-	data = RREG32(mmMC_SEQ_IO_DEBUG_DATA);
-	vbios_version = data & 0xf;
-
-	if (vbios_version == 0)
-		return 0;
-
 	if (!adev->gmc.fw)
 		return -EINVAL;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 3a4e5d8d5162..bacdaef77b6c 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -244,6 +244,62 @@ static int gmc_v9_0_vm_fault_interrupt_state(struct amdgpu_device *adev,
 	return 0;
 }
 
+/**
+ * vega10_ih_prescreen_iv - prescreen an interrupt vector
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns true if the interrupt vector should be further processed.
+ */
+static bool gmc_v9_0_prescreen_iv(struct amdgpu_device *adev,
+				  struct amdgpu_iv_entry *entry,
+				  uint64_t addr)
+{
+	struct amdgpu_vm *vm;
+	u64 key;
+	int r;
+
+	/* No PASID, can't identify faulting process */
+	if (!entry->pasid)
+		return true;
+
+	/* Not a retry fault */
+	if (!(entry->src_data[1] & 0x80))
+		return true;
+
+	/* Track retry faults in per-VM fault FIFO. */
+	spin_lock(&adev->vm_manager.pasid_lock);
+	vm = idr_find(&adev->vm_manager.pasid_idr, entry->pasid);
+	if (!vm) {
+		/* VM not found, process it normally */
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return true;
+	}
+
+	key = AMDGPU_VM_FAULT(entry->pasid, addr);
+	r = amdgpu_vm_add_fault(vm->fault_hash, key);
+
+	/* Hash table is full or the fault is already being processed,
+	 * ignore further page faults
+	 */
+	if (r != 0) {
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return false;
+	}
+	/* No locking required with single writer and single reader */
+	r = kfifo_put(&vm->faults, key);
+	if (!r) {
+		/* FIFO is full. Ignore it until there is space */
+		amdgpu_vm_clear_fault(vm->fault_hash, key);
+		spin_unlock(&adev->vm_manager.pasid_lock);
+		return false;
+	}
+
+	spin_unlock(&adev->vm_manager.pasid_lock);
+	/* It's the first fault for this address, process it normally */
+	return true;
+}
+
 static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 				struct amdgpu_irq_src *source,
 				struct amdgpu_iv_entry *entry)
@@ -255,6 +311,9 @@ static int gmc_v9_0_process_interrupt(struct amdgpu_device *adev,
 	addr = (u64)entry->src_data[0] << 12;
 	addr |= ((u64)entry->src_data[1] & 0xf) << 44;
 
+	if (!gmc_v9_0_prescreen_iv(adev, entry, addr))
+		return 1; /* This also prevents sending it to KFD */
+
 	if (!amdgpu_sriov_vf(adev)) {
 		status = RREG32(hub->vm_l2_pro_fault_status);
 		WREG32_P(hub->vm_l2_pro_fault_cntl, 1, ~1);
@@ -659,37 +718,46 @@ static bool gmc_v9_0_keep_stolen_memory(struct amdgpu_device *adev)
 	}
 }
 
-static int gmc_v9_0_late_init(void *handle)
+static int gmc_v9_0_allocate_vm_inv_eng(struct amdgpu_device *adev)
 {
-	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
-	/*
-	 * The latest engine allocation on gfx9 is:
-	 * Engine 0, 1: idle
-	 * Engine 2, 3: firmware
-	 * Engine 4~13: amdgpu ring, subject to change when ring number changes
-	 * Engine 14~15: idle
-	 * Engine 16: kfd tlb invalidation
-	 * Engine 17: Gart flushes
-	 */
-	unsigned vm_inv_eng[AMDGPU_MAX_VMHUBS] = { 4, 4 };
+	struct amdgpu_ring *ring;
+	unsigned vm_inv_engs[AMDGPU_MAX_VMHUBS] =
+		{GFXHUB_FREE_VM_INV_ENGS_BITMAP, MMHUB_FREE_VM_INV_ENGS_BITMAP};
 	unsigned i;
-	int r;
+	unsigned vmhub, inv_eng;
 
-	if (!gmc_v9_0_keep_stolen_memory(adev))
-		amdgpu_bo_late_init(adev);
+	for (i = 0; i < adev->num_rings; ++i) {
+		ring = adev->rings[i];
+		vmhub = ring->funcs->vmhub;
 
-	for(i = 0; i < adev->num_rings; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-		unsigned vmhub = ring->funcs->vmhub;
+		inv_eng = ffs(vm_inv_engs[vmhub]);
+		if (!inv_eng) {
+			dev_err(adev->dev, "no VM inv eng for ring %s\n",
+				ring->name);
+			return -EINVAL;
+		}
+
+		ring->vm_inv_eng = inv_eng - 1;
+		change_bit(inv_eng - 1, (unsigned long *)(&vm_inv_engs[vmhub]));
 
-		ring->vm_inv_eng = vm_inv_eng[vmhub]++;
 		dev_info(adev->dev, "ring %s uses VM inv eng %u on hub %u\n",
 			 ring->name, ring->vm_inv_eng, ring->funcs->vmhub);
 	}
 
-	/* Engine 16 is used for KFD and 17 for GART flushes */
-	for(i = 0; i < AMDGPU_MAX_VMHUBS; ++i)
-		BUG_ON(vm_inv_eng[i] > 16);
+	return 0;
+}
+
+static int gmc_v9_0_late_init(void *handle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+	int r;
+
+	if (!gmc_v9_0_keep_stolen_memory(adev))
+		amdgpu_bo_late_init(adev);
+
+	r = gmc_v9_0_allocate_vm_inv_eng(adev);
+	if (r)
+		return r;
 
 	if (adev->asic_type == CHIP_VEGA10 && !amdgpu_sriov_vf(adev)) {
 		r = gmc_v9_0_ecc_available(adev);
@@ -902,6 +970,9 @@ static int gmc_v9_0_sw_init(void *handle)
 	/* This interrupt is VMC page fault.*/
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_VMC, VMC_1_0__SRCID__VM_FAULT,
 				&adev->gmc.vm_fault);
+	if (r)
+		return r;
+
 	r = amdgpu_irq_add_id(adev, SOC15_IH_CLIENTID_UTCL2, UTCL2_1_0__SRCID__FAULT,
 				&adev->gmc.vm_fault);
 
@@ -934,7 +1005,7 @@ static int gmc_v9_0_sw_init(void *handle)
 	}
 	adev->need_swiotlb = drm_get_max_iomem() > ((u64)1 << dma_bits);
 
-	if (adev->asic_type == CHIP_VEGA20) {
+	if (adev->gmc.xgmi.supported) {
 		r = gfxhub_v1_1_get_xgmi_info(adev);
 		if (r)
 			return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
index b030ca5ea107..5c8deac65580 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.h
@@ -24,6 +24,16 @@
 #ifndef __GMC_V9_0_H__
 #define __GMC_V9_0_H__
 
+	/*
+	 * The latest engine allocation on gfx9 is:
+	 * Engine 2, 3: firmware
+	 * Engine 0, 1, 4~16: amdgpu ring,
+	 *                    subject to change when ring number changes
+	 * Engine 17: Gart flushes
+	 */
+#define GFXHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
+#define MMHUB_FREE_VM_INV_ENGS_BITMAP		0x1FFF3
+
 extern const struct amd_ip_funcs gmc_v9_0_ip_funcs;
 extern const struct amdgpu_ip_block_version gmc_v9_0_ip_block;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
index cf0fc61aebe6..a3984d10b604 100644
--- a/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/iceland_ih.c
@@ -208,34 +208,6 @@ static u32 iceland_ih_get_wptr(struct amdgpu_device *adev)
 }
 
 /**
- * iceland_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool iceland_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u16 pasid;
-
-	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
-	case 146:
-	case 147:
-		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
-		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
-			return true;
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	adev->irq.ih.rptr += 16;
-	return false;
-}
-
-/**
  * iceland_ih_decode_iv - decode an interrupt vector
  *
  * @adev: amdgpu_device pointer
@@ -440,7 +412,6 @@ static const struct amd_ip_funcs iceland_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs iceland_ih_funcs = {
 	.get_wptr = iceland_ih_get_wptr,
-	.prescreen_iv = iceland_ih_prescreen_iv,
 	.decode_iv = iceland_ih_decode_iv,
 	.set_rptr = iceland_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
index 8cbb4655896a..b11a1c17a7f2 100644
--- a/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
+++ b/drivers/gpu/drm/amd/amdgpu/mxgpu_ai.c
@@ -174,7 +174,7 @@ static int xgpu_ai_send_access_requests(struct amdgpu_device *adev,
 			return r;
 		}
 		/* Retrieve checksum from mailbox2 */
-		if (req == IDH_REQ_GPU_INIT_ACCESS) {
+		if (req == IDH_REQ_GPU_INIT_ACCESS || req == IDH_REQ_GPU_RESET_ACCESS) {
 			adev->virt.fw_reserve.checksum_key =
 				RREG32_NO_KIQ(SOC15_REG_OFFSET(NBIO, 0,
 					mmBIF_BX_PF0_MAILBOX_MSGBUF_RCV_DW2));
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
index 6f9c54978cc1..accdedd63c98 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v6_1.c
@@ -32,6 +32,7 @@
 #define smnCPM_CONTROL                                                                                  0x11180460
 #define smnPCIE_CNTL2                                                                                   0x11180070
 #define smnPCIE_CONFIG_CNTL                                                                             0x11180044
+#define smnPCIE_CI_CNTL                                                                                 0x11180080
 
 static u32 nbio_v6_1_get_rev_id(struct amdgpu_device *adev)
 {
@@ -270,6 +271,12 @@ static void nbio_v6_1_init_registers(struct amdgpu_device *adev)
 
 	if (def != data)
 		WREG32_PCIE(smnPCIE_CONFIG_CNTL, data);
+
+	def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
+	data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
+
+	if (def != data)
+		WREG32_PCIE(smnPCIE_CI_CNTL, data);
 }
 
 const struct amdgpu_nbio_funcs nbio_v6_1_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
index f8cee95d61cc..4cd31a276dcd 100644
--- a/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/nbio_v7_4.c
@@ -31,6 +31,7 @@
 
 #define smnCPM_CONTROL                                                                                  0x11180460
 #define smnPCIE_CNTL2                                                                                   0x11180070
+#define smnPCIE_CI_CNTL                                                                                 0x11180080
 
 static u32 nbio_v7_4_get_rev_id(struct amdgpu_device *adev)
 {
@@ -222,7 +223,13 @@ static void nbio_v7_4_detect_hw_virt(struct amdgpu_device *adev)
 
 static void nbio_v7_4_init_registers(struct amdgpu_device *adev)
 {
+	uint32_t def, data;
+
+	def = data = RREG32_PCIE(smnPCIE_CI_CNTL);
+	data = REG_SET_FIELD(data, PCIE_CI_CNTL, CI_SLV_ORDERING_DIS, 1);
 
+	if (def != data)
+		WREG32_PCIE(smnPCIE_CI_CNTL, data);
 }
 
 const struct amdgpu_nbio_funcs nbio_v7_4_funcs = {
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
index 882bd83a28c4..0de00fbe9233 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
+++ b/drivers/gpu/drm/amd/amdgpu/psp_gfx_if.h
@@ -43,6 +43,8 @@ enum psp_gfx_crtl_cmd_id
     GFX_CTRL_CMD_ID_ENABLE_INT      = 0x00050000,   /* enable PSP-to-Gfx interrupt */
     GFX_CTRL_CMD_ID_DISABLE_INT     = 0x00060000,   /* disable PSP-to-Gfx interrupt */
     GFX_CTRL_CMD_ID_MODE1_RST       = 0x00070000,   /* trigger the Mode 1 reset */
+    GFX_CTRL_CMD_ID_CONSUME_CMD     = 0x000A0000,   /* send interrupt to psp for updating write pointer of vf */
+    GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING = 0x000C0000, /* destroy GPCOM ring */
 
     GFX_CTRL_CMD_ID_MAX             = 0x000F0000,   /* max command ID */
 };
@@ -89,7 +91,8 @@ enum psp_gfx_cmd_id
     GFX_CMD_ID_LOAD_IP_FW   = 0x00000006,   /* load HW IP FW */
     GFX_CMD_ID_DESTROY_TMR  = 0x00000007,   /* destroy TMR region */
     GFX_CMD_ID_SAVE_RESTORE = 0x00000008,   /* save/restore HW IP FW */
-
+    GFX_CMD_ID_SETUP_VMR    = 0x00000009,   /* setup VMR region */
+    GFX_CMD_ID_DESTROY_VMR  = 0x0000000A,   /* destroy VMR region */
 };
 
 
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
index e5dd052d9e06..0c6e7f9b143f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v11_0.c
@@ -34,6 +34,7 @@
 #include "nbio/nbio_7_4_offset.h"
 
 MODULE_FIRMWARE("amdgpu/vega20_sos.bin");
+MODULE_FIRMWARE("amdgpu/vega20_asd.bin");
 MODULE_FIRMWARE("amdgpu/vega20_ta.bin");
 
 /* address block */
@@ -100,6 +101,7 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 	char fw_name[30];
 	int err = 0;
 	const struct psp_firmware_header_v1_0 *sos_hdr;
+	const struct psp_firmware_header_v1_0 *asd_hdr;
 	const struct ta_firmware_header_v1_0 *ta_hdr;
 
 	DRM_DEBUG("\n");
@@ -132,14 +134,30 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 	adev->psp.sos_start_addr = (uint8_t *)adev->psp.sys_start_addr +
 				le32_to_cpu(sos_hdr->sos_offset_bytes);
 
+	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_asd.bin", chip_name);
+	err = request_firmware(&adev->psp.asd_fw, fw_name, adev->dev);
+	if (err)
+		goto out1;
+
+	err = amdgpu_ucode_validate(adev->psp.asd_fw);
+	if (err)
+		goto out1;
+
+	asd_hdr = (const struct psp_firmware_header_v1_0 *)adev->psp.asd_fw->data;
+	adev->psp.asd_fw_version = le32_to_cpu(asd_hdr->header.ucode_version);
+	adev->psp.asd_feature_version = le32_to_cpu(asd_hdr->ucode_feature_version);
+	adev->psp.asd_ucode_size = le32_to_cpu(asd_hdr->header.ucode_size_bytes);
+	adev->psp.asd_start_addr = (uint8_t *)asd_hdr +
+				le32_to_cpu(asd_hdr->header.ucode_array_offset_bytes);
+
 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_ta.bin", chip_name);
 	err = request_firmware(&adev->psp.ta_fw, fw_name, adev->dev);
 	if (err)
-		goto out;
+		goto out2;
 
 	err = amdgpu_ucode_validate(adev->psp.ta_fw);
 	if (err)
-		goto out;
+		goto out2;
 
 	ta_hdr = (const struct ta_firmware_header_v1_0 *)adev->psp.ta_fw->data;
 	adev->psp.ta_xgmi_ucode_version = le32_to_cpu(ta_hdr->ta_xgmi_ucode_version);
@@ -148,14 +166,18 @@ static int psp_v11_0_init_microcode(struct psp_context *psp)
 		le32_to_cpu(ta_hdr->header.ucode_array_offset_bytes);
 
 	return 0;
+
+out2:
+	release_firmware(adev->psp.ta_fw);
+	adev->psp.ta_fw = NULL;
+out1:
+	release_firmware(adev->psp.asd_fw);
+	adev->psp.asd_fw = NULL;
 out:
-	if (err) {
-		dev_err(adev->dev,
-			"psp v11.0: Failed to load firmware \"%s\"\n",
-			fw_name);
-		release_firmware(adev->psp.sos_fw);
-		adev->psp.sos_fw = NULL;
-	}
+	dev_err(adev->dev,
+		"psp v11.0: Failed to load firmware \"%s\"\n", fw_name);
+	release_firmware(adev->psp.sos_fw);
+	adev->psp.sos_fw = NULL;
 
 	return err;
 }
@@ -171,8 +193,11 @@ static int psp_v11_0_bootloader_load_sysdrv(struct psp_context *psp)
 	 * are already been loaded.
 	 */
 	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg)
+	if (sol_reg) {
+		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+		printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
 		return 0;
+	}
 
 	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
@@ -288,6 +313,13 @@ static int psp_v11_0_ring_init(struct psp_context *psp,
 	return 0;
 }
 
+static bool psp_v11_0_support_vmr_ring(struct psp_context *psp)
+{
+	if (amdgpu_sriov_vf(psp->adev) && psp->sos_fw_version > 0x80045)
+		return true;
+	return false;
+}
+
 static int psp_v11_0_ring_create(struct psp_context *psp,
 				enum psp_ring_type ring_type)
 {
@@ -296,26 +328,47 @@ static int psp_v11_0_ring_create(struct psp_context *psp,
 	struct psp_ring *ring = &psp->km_ring;
 	struct amdgpu_device *adev = psp->adev;
 
-	/* Write low address of the ring to C2PMSG_69 */
-	psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
-	/* Write high address of the ring to C2PMSG_70 */
-	psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
-	/* Write size of ring to C2PMSG_71 */
-	psp_ring_reg = ring->ring_size;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
-	/* Write the ring initialization command to C2PMSG_64 */
-	psp_ring_reg = ring_type;
-	psp_ring_reg = psp_ring_reg << 16;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
-
-	/* there might be handshake issue with hardware which needs delay */
-	mdelay(20);
-
-	/* Wait for response flag (bit 31) in C2PMSG_64 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-			   0x80000000, 0x8000FFFF, false);
+	if (psp_v11_0_support_vmr_ring(psp)) {
+		/* Write low address of the ring to C2PMSG_102 */
+		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_ring_reg);
+		/* Write high address of the ring to C2PMSG_103 */
+		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_103, psp_ring_reg);
+
+		/* Write the ring initialization command to C2PMSG_101 */
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+					     GFX_CTRL_CMD_ID_INIT_GPCOM_RING);
+
+		/* there might be handshake issue with hardware which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_101 */
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+				   0x80000000, 0x8000FFFF, false);
+
+	} else {
+		/* Write low address of the ring to C2PMSG_69 */
+		psp_ring_reg = lower_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_69, psp_ring_reg);
+		/* Write high address of the ring to C2PMSG_70 */
+		psp_ring_reg = upper_32_bits(ring->ring_mem_mc_addr);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_70, psp_ring_reg);
+		/* Write size of ring to C2PMSG_71 */
+		psp_ring_reg = ring->ring_size;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_71, psp_ring_reg);
+		/* Write the ring initialization command to C2PMSG_64 */
+		psp_ring_reg = ring_type;
+		psp_ring_reg = psp_ring_reg << 16;
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, psp_ring_reg);
+
+		/* there might be handshake issue with hardware which needs delay */
+		mdelay(20);
+
+		/* Wait for response flag (bit 31) in C2PMSG_64 */
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				   0x80000000, 0x8000FFFF, false);
+	}
 
 	return ret;
 }
@@ -326,15 +379,24 @@ static int psp_v11_0_ring_stop(struct psp_context *psp,
 	int ret = 0;
 	struct amdgpu_device *adev = psp->adev;
 
-	/* Write the ring destroy command to C2PMSG_64 */
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64, GFX_CTRL_CMD_ID_DESTROY_RINGS);
+	/* Write the ring destroy command*/
+	if (psp_v11_0_support_vmr_ring(psp))
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101,
+				     GFX_CTRL_CMD_ID_DESTROY_GPCOM_RING);
+	else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_64,
+				     GFX_CTRL_CMD_ID_DESTROY_RINGS);
 
 	/* there might be handshake issue with hardware which needs delay */
 	mdelay(20);
 
-	/* Wait for response flag (bit 31) in C2PMSG_64 */
-	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
-			   0x80000000, 0x80000000, false);
+	/* Wait for response flag (bit 31) */
+	if (psp_v11_0_support_vmr_ring(psp))
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_101),
+				   0x80000000, 0x80000000, false);
+	else
+		ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_64),
+				   0x80000000, 0x80000000, false);
 
 	return ret;
 }
@@ -373,7 +435,10 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
 	uint32_t rb_frame_size_dw = sizeof(struct psp_gfx_rb_frame) / 4;
 
 	/* KM (GPCOM) prepare write pointer */
-	psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
+	if (psp_v11_0_support_vmr_ring(psp))
+		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102);
+	else
+		psp_write_ptr_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67);
 
 	/* Update KM RB frame pointer to new frame */
 	/* write_frame ptr increments by size of rb_frame in bytes */
@@ -402,7 +467,11 @@ static int psp_v11_0_cmd_submit(struct psp_context *psp,
 
 	/* Update the write Pointer in DWORDs */
 	psp_write_ptr_reg = (psp_write_ptr_reg + rb_frame_size_dw) % ring_size_dw;
-	WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
+	if (psp_v11_0_support_vmr_ring(psp)) {
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_102, psp_write_ptr_reg);
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_101, GFX_CTRL_CMD_ID_CONSUME_CMD);
+	} else
+		WREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_67, psp_write_ptr_reg);
 
 	return 0;
 }
@@ -547,7 +616,7 @@ static int psp_v11_0_mode1_reset(struct psp_context *psp)
 	/*send the mode 1 reset command*/
 	WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
 
-	mdelay(1000);
+	msleep(500);
 
 	offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
 
@@ -640,7 +709,7 @@ static int psp_v11_0_xgmi_set_topology_info(struct psp_context *psp,
 	return psp_xgmi_invoke(psp, TA_COMMAND_XGMI__SET_TOPOLOGY_INFO);
 }
 
-static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
+static int psp_v11_0_xgmi_get_hive_id(struct psp_context *psp, uint64_t *hive_id)
 {
 	struct ta_xgmi_shared_memory *xgmi_cmd;
 	int ret;
@@ -653,12 +722,14 @@ static u64 psp_v11_0_xgmi_get_hive_id(struct psp_context *psp)
 	/* Invoke xgmi ta to get hive id */
 	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
 	if (ret)
-		return 0;
-	else
-		return xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
+		return ret;
+
+	*hive_id = xgmi_cmd->xgmi_out_message.get_hive_id.hive_id;
+
+	return 0;
 }
 
-static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
+static int psp_v11_0_xgmi_get_node_id(struct psp_context *psp, uint64_t *node_id)
 {
 	struct ta_xgmi_shared_memory *xgmi_cmd;
 	int ret;
@@ -671,9 +742,11 @@ static u64 psp_v11_0_xgmi_get_node_id(struct psp_context *psp)
 	/* Invoke xgmi ta to get the node id */
 	ret = psp_xgmi_invoke(psp, xgmi_cmd->cmd_id);
 	if (ret)
-		return 0;
-	else
-		return xgmi_cmd->xgmi_out_message.get_node_id.node_id;
+		return ret;
+
+	*node_id = xgmi_cmd->xgmi_out_message.get_node_id.node_id;
+
+	return 0;
 }
 
 static const struct psp_funcs psp_v11_0_funcs = {
@@ -692,6 +765,7 @@ static const struct psp_funcs psp_v11_0_funcs = {
 	.xgmi_set_topology_info = psp_v11_0_xgmi_set_topology_info,
 	.xgmi_get_hive_id = psp_v11_0_xgmi_get_hive_id,
 	.xgmi_get_node_id = psp_v11_0_xgmi_get_node_id,
+	.support_vmr_ring = psp_v11_0_support_vmr_ring,
 };
 
 void psp_v11_0_set_psp_funcs(struct psp_context *psp)
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
index 7efb823dd3b1..79694ff16969 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v3_1.c
@@ -240,8 +240,11 @@ static int psp_v3_1_bootloader_load_sos(struct psp_context *psp)
 	 * are already been loaded.
 	 */
 	sol_reg = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_81);
-	if (sol_reg)
+	if (sol_reg) {
+		psp->sos_fw_version = RREG32_SOC15(MP0, 0, mmMP0_SMN_C2PMSG_58);
+		printk("sos fw version = 0x%x.\n", psp->sos_fw_version);
 		return 0;
+	}
 
 	/* Wait for bootloader to signify that is ready having bit 31 of C2PMSG_35 set to 1 */
 	ret = psp_wait_for(psp, SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_35),
@@ -592,7 +595,7 @@ static int psp_v3_1_mode1_reset(struct psp_context *psp)
 	/*send the mode 1 reset command*/
 	WREG32(offset, GFX_CTRL_CMD_ID_MODE1_RST);
 
-	mdelay(1000);
+	msleep(500);
 
 	offset = SOC15_REG_OFFSET(MP0, 0, mmMP0_SMN_C2PMSG_33);
 
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
index 4b6d3e5c821f..6811a5d05b27 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
@@ -78,7 +78,6 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_RLC1_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_PAGE, 0x000003ff, 0x000003c0),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_UTCL1_WATERMK, 0xfc000000, 0x00000000),
-	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831f07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CLK_CTRL, 0xffffffff, 0x3f000100),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_IB_CNTL, 0x800f0100, 0x00000100),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GFX_RB_WPTR_POLL_CNTL, 0x0000fff0, 0x00403000),
@@ -96,6 +95,7 @@ static const struct soc15_reg_golden golden_settings_sdma_4[] = {
 static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104002),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104002)
 };
@@ -103,6 +103,7 @@ static const struct soc15_reg_golden golden_settings_sdma_vg10[] = {
 static const struct soc15_reg_golden golden_settings_sdma_vg12[] = {
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
 	SOC15_REG_GOLDEN_VALUE(SDMA0, 0, mmSDMA0_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001),
+	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_CHICKEN_BITS, 0xfe931f07, 0x02831d07),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG, 0x0018773f, 0x00104001),
 	SOC15_REG_GOLDEN_VALUE(SDMA1, 0, mmSDMA1_GB_ADDR_CONFIG_READ, 0x0018773f, 0x00104001)
 };
@@ -1458,8 +1459,7 @@ static bool sdma_v4_0_fw_support_paging_queue(struct amdgpu_device *adev)
 		/*return fw_version >= 31;*/
 		return false;
 	case CHIP_VEGA20:
-		/*return fw_version >= 115;*/
-		return false;
+		return fw_version >= 123;
 	default:
 		return false;
 	}
@@ -1706,13 +1706,15 @@ static int sdma_v4_0_process_trap_irq(struct amdgpu_device *adev,
 		amdgpu_fence_process(&adev->sdma.instance[instance].ring);
 		break;
 	case 1:
-		/* XXX compute */
+		if (adev->asic_type == CHIP_VEGA20)
+			amdgpu_fence_process(&adev->sdma.instance[instance].page);
 		break;
 	case 2:
 		/* XXX compute */
 		break;
 	case 3:
-		amdgpu_fence_process(&adev->sdma.instance[instance].page);
+		if (adev->asic_type != CHIP_VEGA20)
+			amdgpu_fence_process(&adev->sdma.instance[instance].page);
 		break;
 	}
 	return 0;
diff --git a/drivers/gpu/drm/amd/amdgpu/si_ih.c b/drivers/gpu/drm/amd/amdgpu/si_ih.c
index b3d7d9f83202..2938fb9f17cc 100644
--- a/drivers/gpu/drm/amd/amdgpu/si_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/si_ih.c
@@ -118,19 +118,6 @@ static u32 si_ih_get_wptr(struct amdgpu_device *adev)
 	return (wptr & adev->irq.ih.ptr_mask);
 }
 
-/**
- * si_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool si_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	/* Process all interrupts */
-	return true;
-}
-
 static void si_ih_decode_iv(struct amdgpu_device *adev,
 			     struct amdgpu_iv_entry *entry)
 {
@@ -301,7 +288,6 @@ static const struct amd_ip_funcs si_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs si_ih_funcs = {
 	.get_wptr = si_ih_get_wptr,
-	.prescreen_iv = si_ih_prescreen_iv,
 	.decode_iv = si_ih_decode_iv,
 	.set_rptr = si_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15.c b/drivers/gpu/drm/amd/amdgpu/soc15.c
index 83624e150ca7..8849b74078d6 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15.c
+++ b/drivers/gpu/drm/amd/amdgpu/soc15.c
@@ -507,6 +507,9 @@ int soc15_set_ip_blocks(struct amdgpu_device *adev)
 		return -EINVAL;
 	}
 
+	if (adev->asic_type == CHIP_VEGA20)
+		adev->gmc.xgmi.supported = true;
+
 	if (adev->flags & AMD_IS_APU)
 		adev->nbio_funcs = &nbio_v7_0_funcs;
 	else if (adev->asic_type == CHIP_VEGA20)
diff --git a/drivers/gpu/drm/amd/amdgpu/soc15_common.h b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
index 958b10a57073..49c262540940 100644
--- a/drivers/gpu/drm/amd/amdgpu/soc15_common.h
+++ b/drivers/gpu/drm/amd/amdgpu/soc15_common.h
@@ -49,14 +49,19 @@
 
 #define SOC15_WAIT_ON_RREG(ip, inst, reg, expected_value, mask, ret) \
 	do {							\
+		uint32_t old_ = 0;	\
 		uint32_t tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
 		uint32_t loop = adev->usec_timeout;		\
 		while ((tmp_ & (mask)) != (expected_value)) {	\
-			udelay(2);				\
+			if (old_ != tmp_) {			\
+				loop = adev->usec_timeout;	\
+				old_ = tmp_;				\
+			} else						\
+				udelay(1);				\
 			tmp_ = RREG32(adev->reg_offset[ip##_HWIP][inst][reg##_BASE_IDX] + reg); \
 			loop--;					\
 			if (!loop) {				\
-				DRM_ERROR("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
+				DRM_WARN("Register(%d) [%s] failed to reach value 0x%08x != 0x%08x\n", \
 					  inst, #reg, (unsigned)expected_value, (unsigned)(tmp_ & (mask))); \
 				ret = -ETIMEDOUT;		\
 				break;				\
diff --git a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
index dcdbb4d72472..15da06ddeb75 100644
--- a/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/tonga_ih.c
@@ -219,34 +219,6 @@ static u32 tonga_ih_get_wptr(struct amdgpu_device *adev)
 }
 
 /**
- * tonga_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool tonga_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u16 pasid;
-
-	switch (le32_to_cpu(adev->irq.ih.ring[ring_index]) & 0xff) {
-	case 146:
-	case 147:
-		pasid = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]) >> 16;
-		if (!pasid || amdgpu_vm_pasid_fault_credit(adev, pasid))
-			return true;
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	adev->irq.ih.rptr += 16;
-	return false;
-}
-
-/**
  * tonga_ih_decode_iv - decode an interrupt vector
  *
  * @adev: amdgpu_device pointer
@@ -506,7 +478,6 @@ static const struct amd_ip_funcs tonga_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs tonga_ih_funcs = {
 	.get_wptr = tonga_ih_get_wptr,
-	.prescreen_iv = tonga_ih_prescreen_iv,
 	.decode_iv = tonga_ih_decode_iv,
 	.set_rptr = tonga_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
index 089645e78f98..aef924026a28 100644
--- a/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/uvd_v7_0.c
@@ -435,7 +435,7 @@ static int uvd_v7_0_sw_init(void *handle)
 			continue;
 		if (!amdgpu_sriov_vf(adev)) {
 			ring = &adev->uvd.inst[j].ring;
-			sprintf(ring->name, "uvd<%d>", j);
+			sprintf(ring->name, "uvd_%d", ring->me);
 			r = amdgpu_ring_init(adev, ring, 512, &adev->uvd.inst[j].irq, 0);
 			if (r)
 				return r;
@@ -443,7 +443,7 @@ static int uvd_v7_0_sw_init(void *handle)
 
 		for (i = 0; i < adev->uvd.num_enc_rings; ++i) {
 			ring = &adev->uvd.inst[j].ring_enc[i];
-			sprintf(ring->name, "uvd_enc%d<%d>", i, j);
+			sprintf(ring->name, "uvd_enc_%d.%d", ring->me, i);
 			if (amdgpu_sriov_vf(adev)) {
 				ring->use_doorbell = true;
 
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
index c1a03505f956..89bb2fef90eb 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v1_0.c
@@ -48,6 +48,7 @@ static void vcn_v1_0_set_enc_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_jpeg_ring_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_set_irq_funcs(struct amdgpu_device *adev);
 static void vcn_v1_0_jpeg_ring_set_patch_ring(struct amdgpu_ring *ring, uint32_t ptr);
+static int vcn_v1_0_set_powergating_state(void *handle, enum amd_powergating_state state);
 
 /**
  * vcn_v1_0_early_init - set function pointers
@@ -213,8 +214,9 @@ static int vcn_v1_0_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct amdgpu_ring *ring = &adev->vcn.ring_dec;
 
-	if (RREG32_SOC15(VCN, 0, mmUVD_STATUS))
-		vcn_v1_0_stop(adev);
+	if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
+		RREG32_SOC15(VCN, 0, mmUVD_STATUS))
+		vcn_v1_0_set_powergating_state(adev, AMD_PG_STATE_GATE);
 
 	ring->sched.ready = false;
 
@@ -1086,7 +1088,8 @@ static int vcn_v1_0_start_dpg_mode(struct amdgpu_device *adev)
 	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_RBC_RB_CNTL), 0,
 			~UVD_RBC_RB_CNTL__RB_NO_FETCH_MASK);
 
-	/* initialize wptr */
+	/* initialize JPEG wptr */
+	ring = &adev->vcn.ring_jpeg;
 	ring->wptr = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
 
 	/* copy patch commands to the jpeg ring */
@@ -1158,21 +1161,29 @@ static int vcn_v1_0_stop_spg_mode(struct amdgpu_device *adev)
 static int vcn_v1_0_stop_dpg_mode(struct amdgpu_device *adev)
 {
 	int ret_code = 0;
+	uint32_t tmp;
 
 	/* Wait for power status to be UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF */
 	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
 			UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
 			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
-	if (!ret_code) {
-		int tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
-		/* wait for read ptr to be equal to write ptr */
-		SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+	/* wait for read ptr to be equal to write ptr */
+	tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR);
+	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
 
-		SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
-			UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
-			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
-	}
+	tmp = RREG32_SOC15(UVD, 0, mmUVD_RB_WPTR2);
+	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, 0, mmUVD_JRBC_RB_WPTR);
+	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_JRBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	tmp = RREG32_SOC15(UVD, 0, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
+	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
+
+	SOC15_WAIT_ON_RREG(UVD, 0, mmUVD_POWER_STATUS,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_TILES_OFF,
+		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
 
 	/* disable dynamic power gating mode */
 	WREG32_P(SOC15_REG_OFFSET(UVD, 0, mmUVD_POWER_STATUS), 0,
diff --git a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
index d84b687240d1..2c250b01a903 100644
--- a/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/vega10_ih.c
@@ -220,90 +220,6 @@ static u32 vega10_ih_get_wptr(struct amdgpu_device *adev)
 }
 
 /**
- * vega10_ih_prescreen_iv - prescreen an interrupt vector
- *
- * @adev: amdgpu_device pointer
- *
- * Returns true if the interrupt vector should be further processed.
- */
-static bool vega10_ih_prescreen_iv(struct amdgpu_device *adev)
-{
-	u32 ring_index = adev->irq.ih.rptr >> 2;
-	u32 dw0, dw3, dw4, dw5;
-	u16 pasid;
-	u64 addr, key;
-	struct amdgpu_vm *vm;
-	int r;
-
-	dw0 = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
-	dw3 = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
-	dw4 = le32_to_cpu(adev->irq.ih.ring[ring_index + 4]);
-	dw5 = le32_to_cpu(adev->irq.ih.ring[ring_index + 5]);
-
-	/* Filter retry page faults, let only the first one pass. If
-	 * there are too many outstanding faults, ignore them until
-	 * some faults get cleared.
-	 */
-	switch (dw0 & 0xff) {
-	case SOC15_IH_CLIENTID_VMC:
-	case SOC15_IH_CLIENTID_UTCL2:
-		break;
-	default:
-		/* Not a VM fault */
-		return true;
-	}
-
-	pasid = dw3 & 0xffff;
-	/* No PASID, can't identify faulting process */
-	if (!pasid)
-		return true;
-
-	/* Not a retry fault, check fault credit */
-	if (!(dw5 & 0x80)) {
-		if (!amdgpu_vm_pasid_fault_credit(adev, pasid))
-			goto ignore_iv;
-		return true;
-	}
-
-	/* Track retry faults in per-VM fault FIFO. */
-	spin_lock(&adev->vm_manager.pasid_lock);
-	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
-	addr = ((u64)(dw5 & 0xf) << 44) | ((u64)dw4 << 12);
-	key = AMDGPU_VM_FAULT(pasid, addr);
-	if (!vm) {
-		/* VM not found, process it normally */
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		return true;
-	} else {
-		r = amdgpu_vm_add_fault(vm->fault_hash, key);
-
-		/* Hash table is full or the fault is already being processed,
-		 * ignore further page faults
-		 */
-		if (r != 0) {
-			spin_unlock(&adev->vm_manager.pasid_lock);
-			goto ignore_iv;
-		}
-	}
-	/* No locking required with single writer and single reader */
-	r = kfifo_put(&vm->faults, key);
-	if (!r) {
-		/* FIFO is full. Ignore it until there is space */
-		amdgpu_vm_clear_fault(vm->fault_hash, key);
-		spin_unlock(&adev->vm_manager.pasid_lock);
-		goto ignore_iv;
-	}
-
-	spin_unlock(&adev->vm_manager.pasid_lock);
-	/* It's the first fault for this address, process it normally */
-	return true;
-
-ignore_iv:
-	adev->irq.ih.rptr += 32;
-	return false;
-}
-
-/**
  * vega10_ih_decode_iv - decode an interrupt vector
  *
  * @adev: amdgpu_device pointer
@@ -487,7 +403,6 @@ const struct amd_ip_funcs vega10_ih_ip_funcs = {
 
 static const struct amdgpu_ih_funcs vega10_ih_funcs = {
 	.get_wptr = vega10_ih_get_wptr,
-	.prescreen_iv = vega10_ih_prescreen_iv,
 	.decode_iv = vega10_ih_decode_iv,
 	.set_rptr = vega10_ih_set_rptr
 };
diff --git a/drivers/gpu/drm/amd/amdgpu/vi.c b/drivers/gpu/drm/amd/amdgpu/vi.c
index ff2906c215fa..77e367459101 100644
--- a/drivers/gpu/drm/amd/amdgpu/vi.c
+++ b/drivers/gpu/drm/amd/amdgpu/vi.c
@@ -87,9 +87,9 @@ static u32 vi_pcie_rreg(struct amdgpu_device *adev, u32 reg)
 	u32 r;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
-	WREG32(mmPCIE_INDEX, reg);
-	(void)RREG32(mmPCIE_INDEX);
-	r = RREG32(mmPCIE_DATA);
+	WREG32_NO_KIQ(mmPCIE_INDEX, reg);
+	(void)RREG32_NO_KIQ(mmPCIE_INDEX);
+	r = RREG32_NO_KIQ(mmPCIE_DATA);
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 	return r;
 }
@@ -99,10 +99,10 @@ static void vi_pcie_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
-	WREG32(mmPCIE_INDEX, reg);
-	(void)RREG32(mmPCIE_INDEX);
-	WREG32(mmPCIE_DATA, v);
-	(void)RREG32(mmPCIE_DATA);
+	WREG32_NO_KIQ(mmPCIE_INDEX, reg);
+	(void)RREG32_NO_KIQ(mmPCIE_INDEX);
+	WREG32_NO_KIQ(mmPCIE_DATA, v);
+	(void)RREG32_NO_KIQ(mmPCIE_DATA);
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 }
 
@@ -123,8 +123,8 @@ static void vi_smc_wreg(struct amdgpu_device *adev, u32 reg, u32 v)
 	unsigned long flags;
 
 	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(mmSMC_IND_INDEX_11, (reg));
-	WREG32(mmSMC_IND_DATA_11, (v));
+	WREG32_NO_KIQ(mmSMC_IND_INDEX_11, (reg));
+	WREG32_NO_KIQ(mmSMC_IND_DATA_11, (v));
 	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
 }
 
diff --git a/drivers/gpu/drm/amd/amdkfd/Kconfig b/drivers/gpu/drm/amd/amdkfd/Kconfig
index fbf0ee5201c3..c3613604a4f8 100644
--- a/drivers/gpu/drm/amd/amdkfd/Kconfig
+++ b/drivers/gpu/drm/amd/amdkfd/Kconfig
@@ -4,8 +4,8 @@
 
 config HSA_AMD
 	bool "HSA kernel driver for AMD GPU devices"
-	depends on DRM_AMDGPU && X86_64
-	imply AMD_IOMMU_V2
+	depends on DRM_AMDGPU && (X86_64 || ARM64)
+	imply AMD_IOMMU_V2 if X86_64
 	select MMU_NOTIFIER
 	help
 	  Enable this if you want to use HSA features on AMD GPU devices.
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
index 5f4062b41add..083bd8114db1 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
@@ -33,6 +33,7 @@
 #include <linux/time.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/dma-buf.h>
 #include <asm/processor.h>
 #include "kfd_priv.h"
 #include "kfd_device_queue_manager.h"
@@ -157,8 +158,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 	}
 
 	if ((args->ring_base_address) &&
-		(!access_ok(VERIFY_WRITE,
-			(const void __user *) args->ring_base_address,
+		(!access_ok((const void __user *) args->ring_base_address,
 			sizeof(uint64_t)))) {
 		pr_err("Can't access ring base address\n");
 		return -EFAULT;
@@ -169,31 +169,27 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties,
 		return -EINVAL;
 	}
 
-	if (!access_ok(VERIFY_WRITE,
-			(const void __user *) args->read_pointer_address,
+	if (!access_ok((const void __user *) args->read_pointer_address,
 			sizeof(uint32_t))) {
 		pr_err("Can't access read pointer\n");
 		return -EFAULT;
 	}
 
-	if (!access_ok(VERIFY_WRITE,
-			(const void __user *) args->write_pointer_address,
+	if (!access_ok((const void __user *) args->write_pointer_address,
 			sizeof(uint32_t))) {
 		pr_err("Can't access write pointer\n");
 		return -EFAULT;
 	}
 
 	if (args->eop_buffer_address &&
-		!access_ok(VERIFY_WRITE,
-			(const void __user *) args->eop_buffer_address,
+		!access_ok((const void __user *) args->eop_buffer_address,
 			sizeof(uint32_t))) {
 		pr_debug("Can't access eop buffer");
 		return -EFAULT;
 	}
 
 	if (args->ctx_save_restore_address &&
-		!access_ok(VERIFY_WRITE,
-			(const void __user *) args->ctx_save_restore_address,
+		!access_ok((const void __user *) args->ctx_save_restore_address,
 			sizeof(uint32_t))) {
 		pr_debug("Can't access ctx save restore buffer");
 		return -EFAULT;
@@ -364,8 +360,7 @@ static int kfd_ioctl_update_queue(struct file *filp, struct kfd_process *p,
 	}
 
 	if ((args->ring_base_address) &&
-		(!access_ok(VERIFY_WRITE,
-			(const void __user *) args->ring_base_address,
+		(!access_ok((const void __user *) args->ring_base_address,
 			sizeof(uint64_t)))) {
 		pr_err("Can't access ring base address\n");
 		return -EFAULT;
@@ -1273,6 +1268,12 @@ static int kfd_ioctl_alloc_memory_of_gpu(struct file *filep,
 		return -EINVAL;
 	}
 
+	if (flags & KFD_IOC_ALLOC_MEM_FLAGS_DOORBELL) {
+		if (args->size != kfd_doorbell_process_slice(dev))
+			return -EINVAL;
+		offset = kfd_get_process_doorbells(dev, p);
+	}
+
 	mutex_lock(&p->mutex);
 
 	pdd = kfd_bind_process_to_device(dev, p);
@@ -1550,6 +1551,115 @@ copy_from_user_failed:
 	return err;
 }
 
+static int kfd_ioctl_get_dmabuf_info(struct file *filep,
+		struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_get_dmabuf_info_args *args = data;
+	struct kfd_dev *dev = NULL;
+	struct kgd_dev *dma_buf_kgd;
+	void *metadata_buffer = NULL;
+	uint32_t flags;
+	unsigned int i;
+	int r;
+
+	/* Find a KFD GPU device that supports the get_dmabuf_info query */
+	for (i = 0; kfd_topology_enum_kfd_devices(i, &dev) == 0; i++)
+		if (dev)
+			break;
+	if (!dev)
+		return -EINVAL;
+
+	if (args->metadata_ptr) {
+		metadata_buffer = kzalloc(args->metadata_size, GFP_KERNEL);
+		if (!metadata_buffer)
+			return -ENOMEM;
+	}
+
+	/* Get dmabuf info from KGD */
+	r = amdgpu_amdkfd_get_dmabuf_info(dev->kgd, args->dmabuf_fd,
+					  &dma_buf_kgd, &args->size,
+					  metadata_buffer, args->metadata_size,
+					  &args->metadata_size, &flags);
+	if (r)
+		goto exit;
+
+	/* Reverse-lookup gpu_id from kgd pointer */
+	dev = kfd_device_by_kgd(dma_buf_kgd);
+	if (!dev) {
+		r = -EINVAL;
+		goto exit;
+	}
+	args->gpu_id = dev->id;
+	args->flags = flags;
+
+	/* Copy metadata buffer to user mode */
+	if (metadata_buffer) {
+		r = copy_to_user((void __user *)args->metadata_ptr,
+				 metadata_buffer, args->metadata_size);
+		if (r != 0)
+			r = -EFAULT;
+	}
+
+exit:
+	kfree(metadata_buffer);
+
+	return r;
+}
+
+static int kfd_ioctl_import_dmabuf(struct file *filep,
+				   struct kfd_process *p, void *data)
+{
+	struct kfd_ioctl_import_dmabuf_args *args = data;
+	struct kfd_process_device *pdd;
+	struct dma_buf *dmabuf;
+	struct kfd_dev *dev;
+	int idr_handle;
+	uint64_t size;
+	void *mem;
+	int r;
+
+	dev = kfd_device_by_id(args->gpu_id);
+	if (!dev)
+		return -EINVAL;
+
+	dmabuf = dma_buf_get(args->dmabuf_fd);
+	if (IS_ERR(dmabuf))
+		return PTR_ERR(dmabuf);
+
+	mutex_lock(&p->mutex);
+
+	pdd = kfd_bind_process_to_device(dev, p);
+	if (IS_ERR(pdd)) {
+		r = PTR_ERR(pdd);
+		goto err_unlock;
+	}
+
+	r = amdgpu_amdkfd_gpuvm_import_dmabuf(dev->kgd, dmabuf,
+					      args->va_addr, pdd->vm,
+					      (struct kgd_mem **)&mem, &size,
+					      NULL);
+	if (r)
+		goto err_unlock;
+
+	idr_handle = kfd_process_device_create_obj_handle(pdd, mem);
+	if (idr_handle < 0) {
+		r = -EFAULT;
+		goto err_free;
+	}
+
+	mutex_unlock(&p->mutex);
+
+	args->handle = MAKE_HANDLE(args->gpu_id, idr_handle);
+
+	return 0;
+
+err_free:
+	amdgpu_amdkfd_gpuvm_free_memory_of_gpu(dev->kgd, (struct kgd_mem *)mem);
+err_unlock:
+	mutex_unlock(&p->mutex);
+	return r;
+}
+
 #define AMDKFD_IOCTL_DEF(ioctl, _func, _flags) \
 	[_IOC_NR(ioctl)] = {.cmd = ioctl, .func = _func, .flags = _flags, \
 			    .cmd_drv = 0, .name = #ioctl}
@@ -1635,7 +1745,13 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = {
 			kfd_ioctl_set_cu_mask, 0),
 
 	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_QUEUE_WAVE_STATE,
-			kfd_ioctl_get_queue_wave_state, 0)
+			kfd_ioctl_get_queue_wave_state, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_GET_DMABUF_INFO,
+				kfd_ioctl_get_dmabuf_info, 0),
+
+	AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF,
+				kfd_ioctl_import_dmabuf, 0),
 
 };
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
index c02adbbeef2a..5d85ff341385 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_crat.c
@@ -853,7 +853,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
 	 */
 	pgdat = NODE_DATA(numa_node_id);
 	for (zone_type = 0; zone_type < MAX_NR_ZONES; zone_type++)
-		mem_in_bytes += pgdat->node_zones[zone_type].managed_pages;
+		mem_in_bytes += zone_managed_pages(&pgdat->node_zones[zone_type]);
 	mem_in_bytes <<= PAGE_SHIFT;
 
 	sub_type_hdr->length_low = lower_32_bits(mem_in_bytes);
@@ -863,6 +863,7 @@ static int kfd_fill_mem_info_for_cpu(int numa_node_id, int *avail_size,
 	return 0;
 }
 
+#if CONFIG_X86_64
 static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
 				uint32_t *num_entries,
 				struct crat_subtype_iolink *sub_type_hdr)
@@ -905,6 +906,7 @@ static int kfd_fill_iolink_info_for_cpu(int numa_node_id, int *avail_size,
 
 	return 0;
 }
+#endif
 
 /* kfd_create_vcrat_image_cpu - Create Virtual CRAT for CPU
  *
@@ -920,7 +922,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 	struct crat_subtype_generic *sub_type_hdr;
 	int avail_size = *size;
 	int numa_node_id;
+#ifdef CONFIG_X86_64
 	uint32_t entries = 0;
+#endif
 	int ret = 0;
 
 	if (!pcrat_image || avail_size < VCRAT_SIZE_FOR_CPU)
@@ -982,6 +986,7 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 			sub_type_hdr->length);
 
 		/* Fill in Subtype: IO Link */
+#ifdef CONFIG_X86_64
 		ret = kfd_fill_iolink_info_for_cpu(numa_node_id, &avail_size,
 				&entries,
 				(struct crat_subtype_iolink *)sub_type_hdr);
@@ -992,6 +997,9 @@ static int kfd_create_vcrat_image_cpu(void *pcrat_image, size_t *size)
 
 		sub_type_hdr = (typeof(sub_type_hdr))((char *)sub_type_hdr +
 				sub_type_hdr->length * entries);
+#else
+		pr_info("IO link not available for non x86 platforms\n");
+#endif
 
 		crat_table->num_domains++;
 	}
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
index 9ed14a11afa2..8be9677c0c07 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c
@@ -378,7 +378,13 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x6864, &vega10_device_info },	/* Vega10 */
 	{ 0x6867, &vega10_device_info },	/* Vega10 */
 	{ 0x6868, &vega10_device_info },	/* Vega10 */
+	{ 0x6869, &vega10_device_info },	/* Vega10 */
+	{ 0x686A, &vega10_device_info },	/* Vega10 */
+	{ 0x686B, &vega10_device_info },	/* Vega10 */
 	{ 0x686C, &vega10_vf_device_info },	/* Vega10  vf*/
+	{ 0x686D, &vega10_device_info },	/* Vega10 */
+	{ 0x686E, &vega10_device_info },	/* Vega10 */
+	{ 0x686F, &vega10_device_info },	/* Vega10 */
 	{ 0x687F, &vega10_device_info },	/* Vega10 */
 	{ 0x69A0, &vega12_device_info },	/* Vega12 */
 	{ 0x69A1, &vega12_device_info },	/* Vega12 */
@@ -389,6 +395,7 @@ static const struct kfd_deviceid supported_devices[] = {
 	{ 0x66a1, &vega20_device_info },	/* Vega20 */
 	{ 0x66a2, &vega20_device_info },	/* Vega20 */
 	{ 0x66a3, &vega20_device_info },	/* Vega20 */
+	{ 0x66a4, &vega20_device_info },	/* Vega20 */
 	{ 0x66a7, &vega20_device_info },	/* Vega20 */
 	{ 0x66af, &vega20_device_info }		/* Vega20 */
 };
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
index dec8e64f36bd..0689d4ccbbc0 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h
@@ -793,6 +793,7 @@ struct kfd_topology_device *kfd_topology_device_by_proximity_domain(
 struct kfd_topology_device *kfd_topology_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_id(uint32_t gpu_id);
 struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev);
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd);
 int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev);
 int kfd_numa_node_to_apic_id(int numa_node_id);
 
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index aa793fcbbdcc..09da91644f9f 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -101,7 +101,25 @@ struct kfd_dev *kfd_device_by_pci_dev(const struct pci_dev *pdev)
 	down_read(&topology_lock);
 
 	list_for_each_entry(top_dev, &topology_device_list, list)
-		if (top_dev->gpu->pdev == pdev) {
+		if (top_dev->gpu && top_dev->gpu->pdev == pdev) {
+			device = top_dev->gpu;
+			break;
+		}
+
+	up_read(&topology_lock);
+
+	return device;
+}
+
+struct kfd_dev *kfd_device_by_kgd(const struct kgd_dev *kgd)
+{
+	struct kfd_topology_device *top_dev;
+	struct kfd_dev *device = NULL;
+
+	down_read(&topology_lock);
+
+	list_for_each_entry(top_dev, &topology_device_list, list)
+		if (top_dev->gpu && top_dev->gpu->kgd == kgd) {
 			device = top_dev->gpu;
 			break;
 		}
@@ -1075,8 +1093,6 @@ static uint32_t kfd_generate_gpu_id(struct kfd_dev *gpu)
  *		the GPU device is not already present in the topology device
  *		list then return NULL. This means a new topology device has to
  *		be created for this GPU.
- * TODO: Rather than assiging @gpu to first topology device withtout
- *		gpu attached, it will better to have more stringent check.
  */
 static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 {
@@ -1084,12 +1100,20 @@ static struct kfd_topology_device *kfd_assign_gpu(struct kfd_dev *gpu)
 	struct kfd_topology_device *out_dev = NULL;
 
 	down_write(&topology_lock);
-	list_for_each_entry(dev, &topology_device_list, list)
+	list_for_each_entry(dev, &topology_device_list, list) {
+		/* Discrete GPUs need their own topology device list
+		 * entries. Don't assign them to CPU/APU nodes.
+		 */
+		if (!gpu->device_info->needs_iommu_device &&
+		    dev->node_props.cpu_cores_count)
+			continue;
+
 		if (!dev->gpu && (dev->node_props.simd_count > 0)) {
 			dev->gpu = gpu;
 			out_dev = dev;
 			break;
 		}
+	}
 	up_write(&topology_lock);
 	return out_dev;
 }
@@ -1374,7 +1398,6 @@ int kfd_topology_enum_kfd_devices(uint8_t idx, struct kfd_dev **kdev)
 
 static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
 {
-	const struct cpuinfo_x86 *cpuinfo;
 	int first_cpu_of_numa_node;
 
 	if (!cpumask || cpumask == cpu_none_mask)
@@ -1382,9 +1405,11 @@ static int kfd_cpumask_to_apic_id(const struct cpumask *cpumask)
 	first_cpu_of_numa_node = cpumask_first(cpumask);
 	if (first_cpu_of_numa_node >= nr_cpu_ids)
 		return -1;
-	cpuinfo = &cpu_data(first_cpu_of_numa_node);
-
-	return cpuinfo->apicid;
+#ifdef CONFIG_X86_64
+	return cpu_data(first_cpu_of_numa_node).apicid;
+#else
+	return first_cpu_of_numa_node;
+#endif
 }
 
 /* kfd_numa_node_to_apic_id - Returns the APIC ID of the first logical processor
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 32e791d9b9a8..f4fa40c387d3 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -23,6 +23,9 @@
  *
  */
 
+/* The caprices of the preprocessor require that this be declared right here */
+#define CREATE_TRACE_POINTS
+
 #include "dm_services_types.h"
 #include "dc.h"
 #include "dc/inc/core_types.h"
@@ -54,6 +57,7 @@
 
 #include <drm/drmP.h>
 #include <drm/drm_atomic.h>
+#include <drm/drm_atomic_uapi.h>
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_mst_helper.h>
 #include <drm/drm_fb_helper.h>
@@ -72,6 +76,7 @@
 
 #include "modules/inc/mod_freesync.h"
 #include "modules/power/power_helpers.h"
+#include "modules/inc/mod_info_packet.h"
 
 #define FIRMWARE_RAVEN_DMCU		"amdgpu/raven_dmcu.bin"
 MODULE_FIRMWARE(FIRMWARE_RAVEN_DMCU);
@@ -129,6 +134,8 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state);
 static int amdgpu_dm_atomic_check(struct drm_device *dev,
 				  struct drm_atomic_state *state);
 
+static void handle_cursor_update(struct drm_plane *plane,
+				 struct drm_plane_state *old_plane_state);
 
 
 
@@ -324,12 +331,29 @@ static void dm_crtc_high_irq(void *interrupt_params)
 	struct common_irq_params *irq_params = interrupt_params;
 	struct amdgpu_device *adev = irq_params->adev;
 	struct amdgpu_crtc *acrtc;
+	struct dm_crtc_state *acrtc_state;
 
 	acrtc = get_crtc_by_otg_inst(adev, irq_params->irq_src - IRQ_TYPE_VBLANK);
 
 	if (acrtc) {
 		drm_crtc_handle_vblank(&acrtc->base);
 		amdgpu_dm_crtc_handle_crc_irq(&acrtc->base);
+
+		acrtc_state = to_dm_crtc_state(acrtc->base.state);
+
+		if (acrtc_state->stream &&
+		    acrtc_state->vrr_params.supported &&
+		    acrtc_state->freesync_config.state == VRR_STATE_ACTIVE_VARIABLE) {
+			mod_freesync_handle_v_update(
+				adev->dm.freesync_module,
+				acrtc_state->stream,
+				&acrtc_state->vrr_params);
+
+			dc_stream_adjust_vmin_vmax(
+				adev->dm.dc,
+				acrtc_state->stream,
+				&acrtc_state->vrr_params.adjust);
+		}
 	}
 }
 
@@ -398,6 +422,8 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 	/* Zero all the fields */
 	memset(&init_data, 0, sizeof(init_data));
 
+	mutex_init(&adev->dm.dc_lock);
+
 	if(amdgpu_dm_irq_init(adev)) {
 		DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
 		goto error;
@@ -512,6 +538,9 @@ static void amdgpu_dm_fini(struct amdgpu_device *adev)
 	/* DC Destroy TODO: Replace destroy DAL */
 	if (adev->dm.dc)
 		dc_destroy(&adev->dm.dc);
+
+	mutex_destroy(&adev->dm.dc_lock);
+
 	return;
 }
 
@@ -670,22 +699,36 @@ static void s3_handle_mst(struct drm_device *dev, bool suspend)
 {
 	struct amdgpu_dm_connector *aconnector;
 	struct drm_connector *connector;
+	struct drm_dp_mst_topology_mgr *mgr;
+	int ret;
+	bool need_hotplug = false;
 
 	drm_modeset_lock(&dev->mode_config.connection_mutex, NULL);
 
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		   aconnector = to_amdgpu_dm_connector(connector);
-		   if (aconnector->dc_link->type == dc_connection_mst_branch &&
-				   !aconnector->mst_port) {
+	list_for_each_entry(connector, &dev->mode_config.connector_list,
+			    head) {
+		aconnector = to_amdgpu_dm_connector(connector);
+		if (aconnector->dc_link->type != dc_connection_mst_branch ||
+		    aconnector->mst_port)
+			continue;
 
-			   if (suspend)
-				   drm_dp_mst_topology_mgr_suspend(&aconnector->mst_mgr);
-			   else
-				   drm_dp_mst_topology_mgr_resume(&aconnector->mst_mgr);
-		   }
+		mgr = &aconnector->mst_mgr;
+
+		if (suspend) {
+			drm_dp_mst_topology_mgr_suspend(mgr);
+		} else {
+			ret = drm_dp_mst_topology_mgr_resume(mgr);
+			if (ret < 0) {
+				drm_dp_mst_topology_mgr_set_mst(mgr, false);
+				need_hotplug = true;
+			}
+		}
 	}
 
 	drm_modeset_unlock(&dev->mode_config.connection_mutex);
+
+	if (need_hotplug)
+		drm_kms_helper_hotplug_event(dev);
 }
 
 /**
@@ -869,7 +912,6 @@ static int dm_resume(void *handle)
 	struct drm_plane_state *new_plane_state;
 	struct dm_plane_state *dm_new_plane_state;
 	enum dc_connection_type new_connection_type = dc_connection_none;
-	int ret;
 	int i;
 
 	/* power on hardware */
@@ -942,13 +984,13 @@ static int dm_resume(void *handle)
 		}
 	}
 
-	ret = drm_atomic_helper_resume(ddev, dm->cached_state);
+	drm_atomic_helper_resume(ddev, dm->cached_state);
 
 	dm->cached_state = NULL;
 
 	amdgpu_dm_irq_resume_late(adev);
 
-	return ret;
+	return 0;
 }
 
 /**
@@ -1730,7 +1772,7 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 		+ caps.min_input_signal * 0x101;
 
 	if (dc_link_set_backlight_level(dm->backlight_link,
-			brightness, 0, 0))
+			brightness, 0))
 		return 0;
 	else
 		return 1;
@@ -2698,9 +2740,9 @@ static void fill_audio_info(struct audio_info *audio_info,
 
 	cea_revision = drm_connector->display_info.cea_rev;
 
-	strncpy(audio_info->display_name,
+	strscpy(audio_info->display_name,
 		edid_caps->display_name,
-		AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS - 1);
+		AUDIO_INFO_DISPLAY_NAME_SIZE_IN_CHARS);
 
 	if (cea_revision >= 3) {
 		audio_info->mode_count = edid_caps->audio_mode_count;
@@ -2930,6 +2972,7 @@ create_stream_for_sink(struct amdgpu_dm_connector *aconnector,
 
 	if (dm_state && dm_state->freesync_capable)
 		stream->ignore_msa_timing_param = true;
+
 finish:
 	if (sink && sink->sink_signal == SIGNAL_TYPE_VIRTUAL && aconnector->base.force != DRM_FORCE_ON)
 		dc_sink_release(sink);
@@ -2996,11 +3039,12 @@ dm_crtc_duplicate_state(struct drm_crtc *crtc)
 		dc_stream_retain(state->stream);
 	}
 
-	state->adjust = cur->adjust;
+	state->vrr_params = cur->vrr_params;
 	state->vrr_infopacket = cur->vrr_infopacket;
 	state->abm_level = cur->abm_level;
 	state->vrr_supported = cur->vrr_supported;
 	state->freesync_config = cur->freesync_config;
+	state->crc_enabled = cur->crc_enabled;
 
 	/* TODO Duplicate dc_stream after objects are stream object is flattened */
 
@@ -3094,10 +3138,8 @@ int amdgpu_dm_connector_atomic_set_property(struct drm_connector *connector,
 			rmx_type = RMX_FULL;
 			break;
 		case DRM_MODE_SCALE_NONE:
-			rmx_type = RMX_OFF;
-			break;
 		default:
-			rmx_type = RMX_ASPECT;
+			rmx_type = RMX_OFF;
 			break;
 		}
 
@@ -3210,10 +3252,11 @@ void amdgpu_dm_connector_funcs_reset(struct drm_connector *connector)
 	state = kzalloc(sizeof(*state), GFP_KERNEL);
 
 	if (state) {
-		state->scaling = RMX_ASPECT;
+		state->scaling = RMX_OFF;
 		state->underscan_enable = false;
 		state->underscan_hborder = 0;
 		state->underscan_vborder = 0;
+		state->max_bpc = 8;
 
 		__drm_atomic_helper_connector_reset(connector, &state->base);
 	}
@@ -3235,6 +3278,11 @@ amdgpu_dm_connector_atomic_duplicate_state(struct drm_connector *connector)
 
 	new_state->freesync_capable = state->freesync_capable;
 	new_state->abm_level = state->abm_level;
+	new_state->scaling = state->scaling;
+	new_state->underscan_enable = state->underscan_enable;
+	new_state->underscan_hborder = state->underscan_hborder;
+	new_state->underscan_vborder = state->underscan_vborder;
+	new_state->max_bpc = state->max_bpc;
 
 	return &new_state->base;
 }
@@ -3607,10 +3655,53 @@ static int dm_plane_atomic_check(struct drm_plane *plane,
 	return -EINVAL;
 }
 
+static int dm_plane_atomic_async_check(struct drm_plane *plane,
+				       struct drm_plane_state *new_plane_state)
+{
+	struct drm_plane_state *old_plane_state =
+		drm_atomic_get_old_plane_state(new_plane_state->state, plane);
+
+	/* Only support async updates on cursor planes. */
+	if (plane->type != DRM_PLANE_TYPE_CURSOR)
+		return -EINVAL;
+
+	/*
+	 * DRM calls prepare_fb and cleanup_fb on new_plane_state for
+	 * async commits so don't allow fb changes.
+	 */
+	if (old_plane_state->fb != new_plane_state->fb)
+		return -EINVAL;
+
+	return 0;
+}
+
+static void dm_plane_atomic_async_update(struct drm_plane *plane,
+					 struct drm_plane_state *new_state)
+{
+	struct drm_plane_state *old_state =
+		drm_atomic_get_old_plane_state(new_state->state, plane);
+
+	if (plane->state->fb != new_state->fb)
+		drm_atomic_set_fb_for_plane(plane->state, new_state->fb);
+
+	plane->state->src_x = new_state->src_x;
+	plane->state->src_y = new_state->src_y;
+	plane->state->src_w = new_state->src_w;
+	plane->state->src_h = new_state->src_h;
+	plane->state->crtc_x = new_state->crtc_x;
+	plane->state->crtc_y = new_state->crtc_y;
+	plane->state->crtc_w = new_state->crtc_w;
+	plane->state->crtc_h = new_state->crtc_h;
+
+	handle_cursor_update(plane, old_state);
+}
+
 static const struct drm_plane_helper_funcs dm_plane_helper_funcs = {
 	.prepare_fb = dm_plane_helper_prepare_fb,
 	.cleanup_fb = dm_plane_helper_cleanup_fb,
 	.atomic_check = dm_plane_atomic_check,
+	.atomic_async_check = dm_plane_atomic_async_check,
+	.atomic_async_update = dm_plane_atomic_async_update
 };
 
 /*
@@ -3824,7 +3915,7 @@ amdgpu_dm_create_common_mode(struct drm_encoder *encoder,
 	mode->hdisplay = hdisplay;
 	mode->vdisplay = vdisplay;
 	mode->type &= ~DRM_MODE_TYPE_PREFERRED;
-	strncpy(mode->name, name, DRM_DISPLAY_MODE_LEN);
+	strscpy(mode->name, name, DRM_DISPLAY_MODE_LEN);
 
 	return mode;
 
@@ -4299,6 +4390,7 @@ static int get_cursor_position(struct drm_plane *plane, struct drm_crtc *crtc,
 static void handle_cursor_update(struct drm_plane *plane,
 				 struct drm_plane_state *old_plane_state)
 {
+	struct amdgpu_device *adev = plane->dev->dev_private;
 	struct amdgpu_framebuffer *afb = to_amdgpu_framebuffer(plane->state->fb);
 	struct drm_crtc *crtc = afb ? plane->state->crtc : old_plane_state->crtc;
 	struct dm_crtc_state *crtc_state = crtc ? to_dm_crtc_state(crtc->state) : NULL;
@@ -4323,9 +4415,12 @@ static void handle_cursor_update(struct drm_plane *plane,
 
 	if (!position.enable) {
 		/* turn off cursor */
-		if (crtc_state && crtc_state->stream)
+		if (crtc_state && crtc_state->stream) {
+			mutex_lock(&adev->dm.dc_lock);
 			dc_stream_set_cursor_position(crtc_state->stream,
 						      &position);
+			mutex_unlock(&adev->dm.dc_lock);
+		}
 		return;
 	}
 
@@ -4343,6 +4438,7 @@ static void handle_cursor_update(struct drm_plane *plane,
 	attributes.pitch = attributes.width;
 
 	if (crtc_state->stream) {
+		mutex_lock(&adev->dm.dc_lock);
 		if (!dc_stream_set_cursor_attributes(crtc_state->stream,
 							 &attributes))
 			DRM_ERROR("DC failed to set cursor attributes\n");
@@ -4350,6 +4446,7 @@ static void handle_cursor_update(struct drm_plane *plane,
 		if (!dc_stream_set_cursor_position(crtc_state->stream,
 						   &position))
 			DRM_ERROR("DC failed to set cursor position\n");
+		mutex_unlock(&adev->dm.dc_lock);
 	}
 }
 
@@ -4388,9 +4485,11 @@ struct dc_stream_status *dc_state_get_stream_status(
 static void update_freesync_state_on_stream(
 	struct amdgpu_display_manager *dm,
 	struct dm_crtc_state *new_crtc_state,
-	struct dc_stream_state *new_stream)
+	struct dc_stream_state *new_stream,
+	struct dc_plane_state *surface,
+	u32 flip_timestamp_in_us)
 {
-	struct mod_vrr_params vrr = {0};
+	struct mod_vrr_params vrr_params = new_crtc_state->vrr_params;
 	struct dc_info_packet vrr_infopacket = {0};
 	struct mod_freesync_config config = new_crtc_state->freesync_config;
 
@@ -4417,43 +4516,52 @@ static void update_freesync_state_on_stream(
 
 	mod_freesync_build_vrr_params(dm->freesync_module,
 				      new_stream,
-				      &config, &vrr);
+				      &config, &vrr_params);
+
+	if (surface) {
+		mod_freesync_handle_preflip(
+			dm->freesync_module,
+			surface,
+			new_stream,
+			flip_timestamp_in_us,
+			&vrr_params);
+	}
 
 	mod_freesync_build_vrr_infopacket(
 		dm->freesync_module,
 		new_stream,
-		&vrr,
-		packet_type_vrr,
-		transfer_func_unknown,
+		&vrr_params,
+		PACKET_TYPE_VRR,
+		TRANSFER_FUNC_UNKNOWN,
 		&vrr_infopacket);
 
 	new_crtc_state->freesync_timing_changed =
-		(memcmp(&new_crtc_state->adjust,
-			&vrr.adjust,
-			sizeof(vrr.adjust)) != 0);
+		(memcmp(&new_crtc_state->vrr_params.adjust,
+			&vrr_params.adjust,
+			sizeof(vrr_params.adjust)) != 0);
 
 	new_crtc_state->freesync_vrr_info_changed =
 		(memcmp(&new_crtc_state->vrr_infopacket,
 			&vrr_infopacket,
 			sizeof(vrr_infopacket)) != 0);
 
-	new_crtc_state->adjust = vrr.adjust;
+	new_crtc_state->vrr_params = vrr_params;
 	new_crtc_state->vrr_infopacket = vrr_infopacket;
 
-	new_stream->adjust = new_crtc_state->adjust;
+	new_stream->adjust = new_crtc_state->vrr_params.adjust;
 	new_stream->vrr_infopacket = vrr_infopacket;
 
 	if (new_crtc_state->freesync_vrr_info_changed)
 		DRM_DEBUG_KMS("VRR packet update: crtc=%u enabled=%d state=%d",
 			      new_crtc_state->base.crtc->base.id,
 			      (int)new_crtc_state->base.vrr_enabled,
-			      (int)vrr.state);
+			      (int)vrr_params.state);
 
 	if (new_crtc_state->freesync_timing_changed)
 		DRM_DEBUG_KMS("VRR timing update: crtc=%u min=%u max=%u\n",
 			      new_crtc_state->base.crtc->base.id,
-			      vrr.adjust.v_total_min,
-			      vrr.adjust.v_total_max);
+				  vrr_params.adjust.v_total_min,
+				  vrr_params.adjust.v_total_max);
 }
 
 /*
@@ -4467,6 +4575,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 			      struct dc_state *state)
 {
 	unsigned long flags;
+	uint64_t timestamp_ns;
 	uint32_t target_vblank;
 	int r, vpos, hpos;
 	struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
@@ -4480,6 +4589,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 	struct dc_stream_update stream_update = {0};
 	struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
 	struct dc_stream_status *stream_status;
+	struct dc_plane_state *surface;
 
 
 	/* Prepare wait for target vblank early - before the fence-waits */
@@ -4529,6 +4639,9 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 	addr.address.grph.addr.high_part = upper_32_bits(afb->address);
 	addr.flip_immediate = async_flip;
 
+	timestamp_ns = ktime_get_ns();
+	addr.flip_timestamp_in_us = div_u64(timestamp_ns, 1000);
+
 
 	if (acrtc->base.state->event)
 		prepare_flip_isr(acrtc);
@@ -4542,8 +4655,10 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 		return;
 	}
 
-	surface_updates->surface = stream_status->plane_states[0];
-	if (!surface_updates->surface) {
+	surface = stream_status->plane_states[0];
+	surface_updates->surface = surface;
+
+	if (!surface) {
 		DRM_ERROR("No surface for CRTC: id=%d\n",
 			acrtc->crtc_id);
 		return;
@@ -4554,7 +4669,9 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 		update_freesync_state_on_stream(
 			&adev->dm,
 			acrtc_state,
-			acrtc_state->stream);
+			acrtc_state->stream,
+			surface,
+			addr.flip_timestamp_in_us);
 
 		if (acrtc_state->freesync_timing_changed)
 			stream_update.adjust =
@@ -4565,6 +4682,16 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 				&acrtc_state->stream->vrr_infopacket;
 	}
 
+	/* Update surface timing information. */
+	surface->time.time_elapsed_in_us[surface->time.index] =
+		addr.flip_timestamp_in_us - surface->time.prev_update_time_in_us;
+	surface->time.prev_update_time_in_us = addr.flip_timestamp_in_us;
+	surface->time.index++;
+	if (surface->time.index >= DC_PLANE_UPDATE_TIMES_MAX)
+		surface->time.index = 0;
+
+	mutex_lock(&adev->dm.dc_lock);
+
 	dc_commit_updates_for_stream(adev->dm.dc,
 					     surface_updates,
 					     1,
@@ -4572,6 +4699,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
 					     &stream_update,
 					     &surface_updates->surface,
 					     state);
+	mutex_unlock(&adev->dm.dc_lock);
 
 	DRM_DEBUG_DRIVER("%s Flipping to hi: 0x%x, low: 0x%x \n",
 			 __func__,
@@ -4586,6 +4714,7 @@ static void amdgpu_dm_do_flip(struct drm_crtc *crtc,
  * with a dc_plane_state and follow the atomic model a bit more closely here.
  */
 static bool commit_planes_to_stream(
+		struct amdgpu_display_manager *dm,
 		struct dc *dc,
 		struct dc_plane_state **plane_states,
 		uint8_t new_plane_count,
@@ -4662,11 +4791,13 @@ static bool commit_planes_to_stream(
 		updates[i].scaling_info = &scaling_info[i];
 	}
 
+	mutex_lock(&dm->dc_lock);
 	dc_commit_updates_for_stream(
 			dc,
 			updates,
 			new_plane_count,
 			dc_stream, stream_update, plane_states, state);
+	mutex_unlock(&dm->dc_lock);
 
 	kfree(flip_addr);
 	kfree(plane_info);
@@ -4772,7 +4903,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
 
 		dc_stream_attach->abm_level = acrtc_state->abm_level;
 
-		if (false == commit_planes_to_stream(dm->dc,
+		if (false == commit_planes_to_stream(dm,
+							dm->dc,
 							plane_states_constructed,
 							planes_count,
 							acrtc_state,
@@ -4942,7 +5074,9 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 	if (dc_state) {
 		dm_enable_per_frame_crtc_master_sync(dc_state);
+		mutex_lock(&dm->dc_lock);
 		WARN_ON(!dc_commit_state(dm->dc, dc_state));
+		mutex_unlock(&dm->dc_lock);
 	}
 
 	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
@@ -5004,6 +5138,7 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
 
 		/*TODO How it works with MPO ?*/
 		if (!commit_planes_to_stream(
+				dm,
 				dm->dc,
 				status->plane_states,
 				status->plane_count,
@@ -5248,6 +5383,7 @@ static void get_freesync_config_for_crtc(
 		config.max_refresh_in_uhz =
 				aconnector->max_vfreq * 1000000;
 		config.vsif_supported = true;
+		config.btr = true;
 	}
 
 	new_crtc_state->freesync_config = config;
@@ -5258,8 +5394,8 @@ static void reset_freesync_config_for_crtc(
 {
 	new_crtc_state->vrr_supported = false;
 
-	memset(&new_crtc_state->adjust, 0,
-	       sizeof(new_crtc_state->adjust));
+	memset(&new_crtc_state->vrr_params, 0,
+	       sizeof(new_crtc_state->vrr_params));
 	memset(&new_crtc_state->vrr_infopacket, 0,
 	       sizeof(new_crtc_state->vrr_infopacket));
 }
@@ -5797,7 +5933,7 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 	for_each_oldnew_crtc_in_state(state, crtc, old_crtc_state, new_crtc_state, i) {
 		if (!drm_atomic_crtc_needs_modeset(new_crtc_state) &&
 		    !new_crtc_state->color_mgmt_changed &&
-		    !new_crtc_state->vrr_enabled)
+		    old_crtc_state->vrr_enabled == new_crtc_state->vrr_enabled)
 			continue;
 
 		if (!new_crtc_state->enable)
@@ -5896,6 +6032,13 @@ static int amdgpu_dm_atomic_check(struct drm_device *dev,
 			ret = -EINVAL;
 			goto fail;
 		}
+	} else if (state->legacy_cursor_update) {
+		/*
+		 * This is a fast cursor update coming from the plane update
+		 * helper, check if it can be done asynchronously for better
+		 * performance.
+		 */
+		state->async_update = !drm_atomic_helper_async_check(dev, state);
 	}
 
 	/* Must be success */
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
index 4326dc256491..fbd161ddc3f4 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
@@ -135,6 +135,14 @@ struct amdgpu_display_manager {
 	struct drm_modeset_lock atomic_obj_lock;
 
 	/**
+	 * @dc_lock:
+	 *
+	 * Guards access to DC functions that can issue register write
+	 * sequences.
+	 */
+	struct mutex dc_lock;
+
+	/**
 	 * @irq_handler_list_low_tab:
 	 *
 	 * Low priority IRQ handler table.
@@ -260,7 +268,7 @@ struct dm_crtc_state {
 
 	bool vrr_supported;
 	struct mod_freesync_config freesync_config;
-	struct dc_crtc_timing_adjust adjust;
+	struct mod_vrr_params vrr_params;
 	struct dc_info_packet vrr_infopacket;
 
 	int abm_level;
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index d02c32a1039c..1b0d209d8367 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -342,10 +342,9 @@ dm_dp_add_mst_connector(struct drm_dp_mst_topology_mgr *mgr,
 		master->connector_id);
 
 	aconnector->mst_encoder = dm_dp_create_fake_mst_encoder(master);
+	drm_connector_attach_encoder(&aconnector->base,
+				     &aconnector->mst_encoder->base);
 
-	/*
-	 * TODO: understand why this one is needed
-	 */
 	drm_object_attach_property(
 		&connector->base,
 		dev->mode_config.path_property,
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
new file mode 100644
index 000000000000..d898981684d5
--- /dev/null
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_trace.h
@@ -0,0 +1,104 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: AMD
+ *
+ */
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM amdgpu_dm
+
+#if !defined(_AMDGPU_DM_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _AMDGPU_DM_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(amdgpu_dc_rreg,
+	TP_PROTO(unsigned long *read_count, uint32_t reg, uint32_t value),
+	TP_ARGS(read_count, reg, value),
+	TP_STRUCT__entry(
+			__field(uint32_t, reg)
+			__field(uint32_t, value)
+		),
+	TP_fast_assign(
+			__entry->reg = reg;
+			__entry->value = value;
+			*read_count = *read_count + 1;
+		),
+	TP_printk("reg=0x%08lx, value=0x%08lx",
+			(unsigned long)__entry->reg,
+			(unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_dc_wreg,
+	TP_PROTO(unsigned long *write_count, uint32_t reg, uint32_t value),
+	TP_ARGS(write_count, reg, value),
+	TP_STRUCT__entry(
+			__field(uint32_t, reg)
+			__field(uint32_t, value)
+		),
+	TP_fast_assign(
+			__entry->reg = reg;
+			__entry->value = value;
+			*write_count = *write_count + 1;
+		),
+	TP_printk("reg=0x%08lx, value=0x%08lx",
+			(unsigned long)__entry->reg,
+			(unsigned long)__entry->value)
+);
+
+
+TRACE_EVENT(amdgpu_dc_performance,
+	TP_PROTO(unsigned long read_count, unsigned long write_count,
+		unsigned long *last_read, unsigned long *last_write,
+		const char *func, unsigned int line),
+	TP_ARGS(read_count, write_count, last_read, last_write, func, line),
+	TP_STRUCT__entry(
+			__field(uint32_t, reads)
+			__field(uint32_t, writes)
+			__field(uint32_t, read_delta)
+			__field(uint32_t, write_delta)
+			__string(func, func)
+			__field(uint32_t, line)
+			),
+	TP_fast_assign(
+			__entry->reads = read_count;
+			__entry->writes = write_count;
+			__entry->read_delta = read_count - *last_read;
+			__entry->write_delta = write_count - *last_write;
+			__assign_str(func, func);
+			__entry->line = line;
+			*last_read = read_count;
+			*last_write = write_count;
+			),
+	TP_printk("%s:%d reads=%08ld (%08ld total), writes=%08ld (%08ld total)",
+			__get_str(func), __entry->line,
+			(unsigned long)__entry->read_delta,
+			(unsigned long)__entry->reads,
+			(unsigned long)__entry->write_delta,
+			(unsigned long)__entry->writes)
+);
+#endif /* _AMDGPU_DM_TRACE_H_ */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE amdgpu_dm_trace
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
index 751bb614fc0e..c513ab6f3843 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/bios_parser2.c
@@ -638,6 +638,7 @@ static enum bp_result get_ss_info_v4_1(
 {
 	enum bp_result result = BP_RESULT_OK;
 	struct atom_display_controller_info_v4_1 *disp_cntl_tbl = NULL;
+	struct atom_smu_info_v3_3 *smu_info = NULL;
 
 	if (!ss_info)
 		return BP_RESULT_BADINPUT;
@@ -650,6 +651,7 @@ static enum bp_result get_ss_info_v4_1(
 	if (!disp_cntl_tbl)
 		return BP_RESULT_BADBIOSTABLE;
 
+
 	ss_info->type.STEP_AND_DELAY_INFO = false;
 	ss_info->spread_percentage_divider = 1000;
 	/* BIOS no longer uses target clock.  Always enable for now */
@@ -688,6 +690,19 @@ static enum bp_result get_ss_info_v4_1(
 		 */
 		result = BP_RESULT_UNSUPPORTED;
 		break;
+	case AS_SIGNAL_TYPE_XGMI:
+		smu_info =  GET_IMAGE(struct atom_smu_info_v3_3,
+				      DATA_TABLES(smu_info));
+		if (!smu_info)
+			return BP_RESULT_BADBIOSTABLE;
+
+		ss_info->spread_spectrum_percentage =
+				smu_info->waflclk_ss_percentage;
+		ss_info->spread_spectrum_range =
+				smu_info->gpuclk_ss_rate_10hz * 10;
+		if (smu_info->waflclk_ss_mode & ATOM_SS_CENTRE_SPREAD_MODE)
+			ss_info->type.CENTER_MODE = true;
+		break;
 	default:
 		result = BP_RESULT_UNSUPPORTED;
 	}
diff --git a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
index 65b006ad372e..8196f3bb10c7 100644
--- a/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
+++ b/drivers/gpu/drm/amd/display/dc/bios/command_table_helper2.c
@@ -67,6 +67,7 @@ bool dal_bios_parser_init_cmd_tbl_helper2(
 		return true;
 #endif
 	case DCE_VERSION_12_0:
+	case DCE_VERSION_12_1:
 		*h = dal_cmd_tbl_helper_dce112_get_table2();
 		return true;
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index dba6b57830c7..5fd52094d459 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -62,6 +62,55 @@
 
 const static char DC_BUILD_ID[] = "production-build";
 
+/**
+ * DOC: Overview
+ *
+ * DC is the OS-agnostic component of the amdgpu DC driver.
+ *
+ * DC maintains and validates a set of structs representing the state of the
+ * driver and writes that state to AMD hardware
+ *
+ * Main DC HW structs:
+ *
+ * struct dc - The central struct.  One per driver.  Created on driver load,
+ * destroyed on driver unload.
+ *
+ * struct dc_context - One per driver.
+ * Used as a backpointer by most other structs in dc.
+ *
+ * struct dc_link - One per connector (the physical DP, HDMI, miniDP, or eDP
+ * plugpoints).  Created on driver load, destroyed on driver unload.
+ *
+ * struct dc_sink - One per display.  Created on boot or hotplug.
+ * Destroyed on shutdown or hotunplug.  A dc_link can have a local sink
+ * (the display directly attached).  It may also have one or more remote
+ * sinks (in the Multi-Stream Transport case)
+ *
+ * struct resource_pool - One per driver.  Represents the hw blocks not in the
+ * main pipeline.  Not directly accessible by dm.
+ *
+ * Main dc state structs:
+ *
+ * These structs can be created and destroyed as needed.  There is a full set of
+ * these structs in dc->current_state representing the currently programmed state.
+ *
+ * struct dc_state - The global DC state to track global state information,
+ * such as bandwidth values.
+ *
+ * struct dc_stream_state - Represents the hw configuration for the pipeline from
+ * a framebuffer to a display.  Maps one-to-one with dc_sink.
+ *
+ * struct dc_plane_state - Represents a framebuffer.  Each stream has at least one,
+ * and may have more in the Multi-Plane Overlay case.
+ *
+ * struct resource_context - Represents the programmable state of everything in
+ * the resource_pool.  Not directly accessible by dm.
+ *
+ * struct pipe_ctx - A member of struct resource_context.  Represents the
+ * internal hardware pipeline components.  Each dc_plane_state has either
+ * one or two (in the pipe-split case).
+ */
+
 /*******************************************************************************
  * Private functions
  ******************************************************************************/
@@ -102,10 +151,6 @@ static bool create_links(
 		return false;
 	}
 
-	if (connectors_num == 0 && num_virtual_links == 0) {
-		dm_error("DC: Number of connectors is zero!\n");
-	}
-
 	dm_output_to_console(
 		"DC: %s: connectors_num: physical:%d, virtual:%d\n",
 		__func__,
@@ -175,6 +220,17 @@ failed_alloc:
 	return false;
 }
 
+static struct dc_perf_trace *dc_perf_trace_create(void)
+{
+	return kzalloc(sizeof(struct dc_perf_trace), GFP_KERNEL);
+}
+
+static void dc_perf_trace_destroy(struct dc_perf_trace **perf_trace)
+{
+	kfree(*perf_trace);
+	*perf_trace = NULL;
+}
+
 /**
  *****************************************************************************
  *  Function: dc_stream_adjust_vmin_vmax
@@ -240,7 +296,7 @@ bool dc_stream_get_crtc_position(struct dc *dc,
 }
 
 /**
- * dc_stream_configure_crc: Configure CRC capture for the given stream.
+ * dc_stream_configure_crc() - Configure CRC capture for the given stream.
  * @dc: DC Object
  * @stream: The stream to configure CRC on.
  * @enable: Enable CRC if true, disable otherwise.
@@ -292,7 +348,7 @@ bool dc_stream_configure_crc(struct dc *dc, struct dc_stream_state *stream,
 }
 
 /**
- * dc_stream_get_crc: Get CRC values for the given stream.
+ * dc_stream_get_crc() - Get CRC values for the given stream.
  * @dc: DC object
  * @stream: The DC stream state of the stream to get CRCs from.
  * @r_cr, g_y, b_cb: CRC values for the three channels are stored here.
@@ -536,6 +592,8 @@ static void destruct(struct dc *dc)
 	if (dc->ctx->created_bios)
 		dal_bios_parser_destroy(&dc->ctx->dc_bios);
 
+	dc_perf_trace_destroy(&dc->ctx->perf_trace);
+
 	kfree(dc->ctx);
 	dc->ctx = NULL;
 
@@ -659,6 +717,12 @@ static bool construct(struct dc *dc,
 		goto fail;
 	}
 
+	dc_ctx->perf_trace = dc_perf_trace_create();
+	if (!dc_ctx->perf_trace) {
+		ASSERT_CRITICAL(false);
+		goto fail;
+	}
+
 	/* Create GPIO service */
 	dc_ctx->gpio_service = dal_gpio_service_create(
 			dc_version,
@@ -1329,6 +1393,11 @@ static enum surface_update_type check_update_surfaces_for_stream(
 	return overall_type;
 }
 
+/**
+ * dc_check_update_surfaces_for_stream() - Determine update type (fast, med, or full)
+ *
+ * See :c:type:`enum surface_update_type <surface_update_type>` for explanation of update types
+ */
 enum surface_update_type dc_check_update_surfaces_for_stream(
 		struct dc *dc,
 		struct dc_surface_update *updates,
@@ -1398,7 +1467,8 @@ static void commit_planes_do_stream_update(struct dc *dc,
 
 			if ((stream_update->hdr_static_metadata && !stream->use_dynamic_meta) ||
 					stream_update->vrr_infopacket ||
-					stream_update->vsc_infopacket) {
+					stream_update->vsc_infopacket ||
+					stream_update->vsp_infopacket) {
 				resource_build_info_frame(pipe_ctx);
 				dc->hwss.update_info_frame(pipe_ctx);
 			}
@@ -1409,6 +1479,14 @@ static void commit_planes_do_stream_update(struct dc *dc,
 			if (stream_update->output_csc_transform)
 				dc_stream_program_csc_matrix(dc, stream);
 
+			if (stream_update->dither_option) {
+				resource_build_bit_depth_reduction_params(pipe_ctx->stream,
+									&pipe_ctx->stream->bit_depth_params);
+				pipe_ctx->stream_res.opp->funcs->opp_program_fmt(pipe_ctx->stream_res.opp,
+						&stream->bit_depth_params,
+						&stream->clamping);
+			}
+
 			/* Full fe update*/
 			if (update_type == UPDATE_TYPE_FAST)
 				continue;
@@ -1492,9 +1570,6 @@ static void commit_planes_for_stream(struct dc *dc,
 		}
 	}
 
-	if (update_type == UPDATE_TYPE_FULL)
-		context_timing_trace(dc, &context->res_ctx);
-
 	// Update Type FAST, Surface updates
 	if (update_type == UPDATE_TYPE_FAST) {
 		/* Lock the top pipe while updating plane addrs, since freesync requires
@@ -1631,6 +1706,9 @@ enum dc_irq_source dc_interrupt_to_irq_source(
 	return dal_irq_service_to_irq_source(dc->res_pool->irqs, src_id, ext_id);
 }
 
+/**
+ * dc_interrupt_set() - Enable/disable an AMD hw interrupt source
+ */
 bool dc_interrupt_set(struct dc *dc, enum dc_irq_source src, bool enable)
 {
 
@@ -1724,6 +1802,11 @@ static bool link_add_remote_sink_helper(struct dc_link *dc_link, struct dc_sink
 	return true;
 }
 
+/**
+ * dc_link_add_remote_sink() - Create a sink and attach it to an existing link
+ *
+ * EDID length is in bytes
+ */
 struct dc_sink *dc_link_add_remote_sink(
 		struct dc_link *link,
 		const uint8_t *edid,
@@ -1782,6 +1865,12 @@ fail_add_sink:
 	return NULL;
 }
 
+/**
+ * dc_link_remove_remote_sink() - Remove a remote sink from a dc_link
+ *
+ * Note that this just removes the struct dc_sink - it doesn't
+ * program hardware or alter other members of dc_link
+ */
 void dc_link_remove_remote_sink(struct dc_link *link, struct dc_sink *sink)
 {
 	int i;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link.c b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
index 948596a02392..b0265dbebd4c 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link.c
@@ -198,6 +198,13 @@ static bool program_hpd_filter(
 	return result;
 }
 
+/**
+ * dc_link_detect_sink() - Determine if there is a sink connected
+ *
+ * @type: Returned connection type
+ * Does not detect downstream devices, such as MST sinks
+ * or display connected through active dongles
+ */
 bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 {
 	uint32_t is_hpd_high = 0;
@@ -208,6 +215,9 @@ bool dc_link_detect_sink(struct dc_link *link, enum dc_connection_type *type)
 		return true;
 	}
 
+	if (link->connector_signal == SIGNAL_TYPE_EDP)
+		link->dc->hwss.edp_wait_for_hpd_ready(link, true);
+
 	/* todo: may need to lock gpio access */
 	hpd_pin = get_hpd_gpio(link->ctx->dc_bios, link->link_id, link->ctx->gpio_service);
 	if (hpd_pin == NULL)
@@ -324,15 +334,15 @@ static enum signal_type get_basic_signal_type(
 	return SIGNAL_TYPE_NONE;
 }
 
-/*
- * @brief
- * Check whether there is a dongle on DP connector
+/**
+ * dc_link_is_dp_sink_present() - Check if there is a native DP
+ * or passive DP-HDMI dongle connected
  */
 bool dc_link_is_dp_sink_present(struct dc_link *link)
 {
 	enum gpio_result gpio_result;
 	uint32_t clock_pin = 0;
-
+	uint8_t retry = 0;
 	struct ddc *ddc;
 
 	enum connector_id connector_id =
@@ -361,11 +371,22 @@ bool dc_link_is_dp_sink_present(struct dc_link *link)
 		return present;
 	}
 
-	/* Read GPIO: DP sink is present if both clock and data pins are zero */
-	/* [anaumov] in DAL2, there was no check for GPIO failure */
-
-	gpio_result = dal_gpio_get_value(ddc->pin_clock, &clock_pin);
-	ASSERT(gpio_result == GPIO_RESULT_OK);
+	/*
+	 * Read GPIO: DP sink is present if both clock and data pins are zero
+	 *
+	 * [W/A] plug-unplug DP cable, sometimes customer board has
+	 * one short pulse on clk_pin(1V, < 1ms). DP will be config to HDMI/DVI
+	 * then monitor can't br light up. Add retry 3 times
+	 * But in real passive dongle, it need additional 3ms to detect
+	 */
+	do {
+		gpio_result = dal_gpio_get_value(ddc->pin_clock, &clock_pin);
+		ASSERT(gpio_result == GPIO_RESULT_OK);
+		if (clock_pin)
+			udelay(1000);
+		else
+			break;
+	} while (retry++ < 3);
 
 	present = (gpio_result == GPIO_RESULT_OK) && !clock_pin;
 
@@ -593,6 +614,14 @@ static bool is_same_edid(struct dc_edid *old_edid, struct dc_edid *new_edid)
 	return (memcmp(old_edid->raw_edid, new_edid->raw_edid, new_edid->length) == 0);
 }
 
+/**
+ * dc_link_detect() - Detect if a sink is attached to a given link
+ *
+ * link->local_sink is created or destroyed as needed.
+ *
+ * This does not create remote sinks but will trigger DM
+ * to start MST detection if a branch is detected.
+ */
 bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 {
 	struct dc_sink_init_data sink_init_data = { 0 };
@@ -688,12 +717,26 @@ bool dc_link_detect(struct dc_link *link, enum dc_detect_reason reason)
 				if (memcmp(&link->dpcd_caps, &prev_dpcd_caps, sizeof(struct dpcd_caps)))
 					same_dpcd = false;
 			}
-			/* Active dongle downstream unplug */
+			/* Active dongle plug in without display or downstream unplug*/
 			if (link->type == dc_connection_active_dongle
 					&& link->dpcd_caps.sink_count.
 					bits.SINK_COUNT == 0) {
-				if (prev_sink != NULL)
+				if (prev_sink != NULL) {
+					/* Downstream unplug */
 					dc_sink_release(prev_sink);
+				} else {
+					/* Empty dongle plug in */
+					for (i = 0; i < LINK_TRAINING_MAX_VERIFY_RETRY; i++) {
+						int fail_count = 0;
+
+						dp_verify_link_cap(link,
+								  &link->reported_link_cap,
+								  &fail_count);
+
+						if (fail_count == 0)
+							break;
+					}
+				}
 				return true;
 			}
 
@@ -2147,8 +2190,7 @@ int dc_link_get_backlight_level(const struct dc_link *link)
 
 bool dc_link_set_backlight_level(const struct dc_link *link,
 		uint32_t backlight_pwm_u16_16,
-		uint32_t frame_ramp,
-		const struct dc_stream_state *stream)
+		uint32_t frame_ramp)
 {
 	struct dc  *core_dc = link->ctx->dc;
 	struct abm *abm = core_dc->res_pool->abm;
@@ -2163,10 +2205,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
 		(abm->funcs->set_backlight_level_pwm == NULL))
 		return false;
 
-	if (stream)
-		((struct dc_stream_state *)stream)->bl_pwm_level =
-				backlight_pwm_u16_16;
-
 	use_smooth_brightness = dmcu->funcs->is_dmcu_initialized(dmcu);
 
 	DC_LOG_BACKLIGHT("New Backlight level: %d (0x%X)\n",
@@ -2594,11 +2632,6 @@ void core_link_enable_stream(
 
 		if (dc_is_dp_signal(pipe_ctx->stream->signal))
 			enable_stream_features(pipe_ctx);
-
-		dc_link_set_backlight_level(pipe_ctx->stream->sink->link,
-				pipe_ctx->stream->bl_pwm_level,
-				0,
-				pipe_ctx->stream);
 	}
 
 }
@@ -2607,11 +2640,11 @@ void core_link_disable_stream(struct pipe_ctx *pipe_ctx, int option)
 {
 	struct dc  *core_dc = pipe_ctx->stream->ctx->dc;
 
+	core_dc->hwss.blank_stream(pipe_ctx);
+
 	if (pipe_ctx->stream->signal == SIGNAL_TYPE_DISPLAY_PORT_MST)
 		deallocate_mst_payload(pipe_ctx);
 
-	core_dc->hwss.blank_stream(pipe_ctx);
-
 	core_dc->hwss.disable_stream(pipe_ctx, option);
 
 	disable_link(pipe_ctx->stream->sink->link, pipe_ctx->stream->signal);
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
index 4d1f8ac069c1..0caacb60b02f 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_dp.c
@@ -1089,6 +1089,121 @@ static struct dc_link_settings get_max_link_cap(struct dc_link *link)
 	return max_link_cap;
 }
 
+static enum dc_status read_hpd_rx_irq_data(
+	struct dc_link *link,
+	union hpd_irq_data *irq_data)
+{
+	static enum dc_status retval;
+
+	/* The HW reads 16 bytes from 200h on HPD,
+	 * but if we get an AUX_DEFER, the HW cannot retry
+	 * and this causes the CTS tests 4.3.2.1 - 3.2.4 to
+	 * fail, so we now explicitly read 6 bytes which is
+	 * the req from the above mentioned test cases.
+	 *
+	 * For DP 1.4 we need to read those from 2002h range.
+	 */
+	if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14)
+		retval = core_link_read_dpcd(
+			link,
+			DP_SINK_COUNT,
+			irq_data->raw,
+			sizeof(union hpd_irq_data));
+	else {
+		/* Read 14 bytes in a single read and then copy only the required fields.
+		 * This is more efficient than doing it in two separate AUX reads. */
+
+		uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1];
+
+		retval = core_link_read_dpcd(
+			link,
+			DP_SINK_COUNT_ESI,
+			tmp,
+			sizeof(tmp));
+
+		if (retval != DC_OK)
+			return retval;
+
+		irq_data->bytes.sink_cnt.raw = tmp[DP_SINK_COUNT_ESI - DP_SINK_COUNT_ESI];
+		irq_data->bytes.device_service_irq.raw = tmp[DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 - DP_SINK_COUNT_ESI];
+		irq_data->bytes.lane01_status.raw = tmp[DP_LANE0_1_STATUS_ESI - DP_SINK_COUNT_ESI];
+		irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI];
+		irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI];
+		irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI];
+	}
+
+	return retval;
+}
+
+static bool hpd_rx_irq_check_link_loss_status(
+	struct dc_link *link,
+	union hpd_irq_data *hpd_irq_dpcd_data)
+{
+	uint8_t irq_reg_rx_power_state = 0;
+	enum dc_status dpcd_result = DC_ERROR_UNEXPECTED;
+	union lane_status lane_status;
+	uint32_t lane;
+	bool sink_status_changed;
+	bool return_code;
+
+	sink_status_changed = false;
+	return_code = false;
+
+	if (link->cur_link_settings.lane_count == 0)
+		return return_code;
+
+	/*1. Check that Link Status changed, before re-training.*/
+
+	/*parse lane status*/
+	for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) {
+		/* check status of lanes 0,1
+		 * changed DpcdAddress_Lane01Status (0x202)
+		 */
+		lane_status.raw = get_nibble_at_index(
+			&hpd_irq_dpcd_data->bytes.lane01_status.raw,
+			lane);
+
+		if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
+			!lane_status.bits.CR_DONE_0 ||
+			!lane_status.bits.SYMBOL_LOCKED_0) {
+			/* if one of the channel equalization, clock
+			 * recovery or symbol lock is dropped
+			 * consider it as (link has been
+			 * dropped) dp sink status has changed
+			 */
+			sink_status_changed = true;
+			break;
+		}
+	}
+
+	/* Check interlane align.*/
+	if (sink_status_changed ||
+		!hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) {
+
+		DC_LOG_HW_HPD_IRQ("%s: Link Status changed.\n", __func__);
+
+		return_code = true;
+
+		/*2. Check that we can handle interrupt: Not in FS DOS,
+		 *  Not in "Display Timeout" state, Link is trained.
+		 */
+		dpcd_result = core_link_read_dpcd(link,
+			DP_SET_POWER,
+			&irq_reg_rx_power_state,
+			sizeof(irq_reg_rx_power_state));
+
+		if (dpcd_result != DC_OK) {
+			DC_LOG_HW_HPD_IRQ("%s: DPCD read failed to obtain power state.\n",
+				__func__);
+		} else {
+			if (irq_reg_rx_power_state != DP_SET_POWER_D0)
+				return_code = false;
+		}
+	}
+
+	return return_code;
+}
+
 bool dp_verify_link_cap(
 	struct dc_link *link,
 	struct dc_link_settings *known_limit_link_setting,
@@ -1104,12 +1219,14 @@ bool dp_verify_link_cap(
 	struct clock_source *dp_cs;
 	enum clock_source_id dp_cs_id = CLOCK_SOURCE_ID_EXTERNAL;
 	enum link_training_result status;
+	union hpd_irq_data irq_data;
 
 	if (link->dc->debug.skip_detection_link_training) {
 		link->verified_link_cap = *known_limit_link_setting;
 		return true;
 	}
 
+	memset(&irq_data, 0, sizeof(irq_data));
 	success = false;
 	skip_link_training = false;
 
@@ -1168,9 +1285,15 @@ bool dp_verify_link_cap(
 				(*fail_count)++;
 		}
 
-		if (success)
+		if (success) {
 			link->verified_link_cap = *cur;
-
+			udelay(1000);
+			if (read_hpd_rx_irq_data(link, &irq_data) == DC_OK)
+				if (hpd_rx_irq_check_link_loss_status(
+						link,
+						&irq_data))
+					(*fail_count)++;
+		}
 		/* always disable the link before trying another
 		 * setting or before returning we'll enable it later
 		 * based on the actual mode we're driving
@@ -1572,122 +1695,6 @@ void decide_link_settings(struct dc_stream_state *stream,
 }
 
 /*************************Short Pulse IRQ***************************/
-
-static bool hpd_rx_irq_check_link_loss_status(
-	struct dc_link *link,
-	union hpd_irq_data *hpd_irq_dpcd_data)
-{
-	uint8_t irq_reg_rx_power_state = 0;
-	enum dc_status dpcd_result = DC_ERROR_UNEXPECTED;
-	union lane_status lane_status;
-	uint32_t lane;
-	bool sink_status_changed;
-	bool return_code;
-
-	sink_status_changed = false;
-	return_code = false;
-
-	if (link->cur_link_settings.lane_count == 0)
-		return return_code;
-
-	/*1. Check that Link Status changed, before re-training.*/
-
-	/*parse lane status*/
-	for (lane = 0; lane < link->cur_link_settings.lane_count; lane++) {
-		/* check status of lanes 0,1
-		 * changed DpcdAddress_Lane01Status (0x202)
-		 */
-		lane_status.raw = get_nibble_at_index(
-			&hpd_irq_dpcd_data->bytes.lane01_status.raw,
-			lane);
-
-		if (!lane_status.bits.CHANNEL_EQ_DONE_0 ||
-			!lane_status.bits.CR_DONE_0 ||
-			!lane_status.bits.SYMBOL_LOCKED_0) {
-			/* if one of the channel equalization, clock
-			 * recovery or symbol lock is dropped
-			 * consider it as (link has been
-			 * dropped) dp sink status has changed
-			 */
-			sink_status_changed = true;
-			break;
-		}
-	}
-
-	/* Check interlane align.*/
-	if (sink_status_changed ||
-		!hpd_irq_dpcd_data->bytes.lane_status_updated.bits.INTERLANE_ALIGN_DONE) {
-
-		DC_LOG_HW_HPD_IRQ("%s: Link Status changed.\n", __func__);
-
-		return_code = true;
-
-		/*2. Check that we can handle interrupt: Not in FS DOS,
-		 *  Not in "Display Timeout" state, Link is trained.
-		 */
-		dpcd_result = core_link_read_dpcd(link,
-			DP_SET_POWER,
-			&irq_reg_rx_power_state,
-			sizeof(irq_reg_rx_power_state));
-
-		if (dpcd_result != DC_OK) {
-			DC_LOG_HW_HPD_IRQ("%s: DPCD read failed to obtain power state.\n",
-				__func__);
-		} else {
-			if (irq_reg_rx_power_state != DP_SET_POWER_D0)
-				return_code = false;
-		}
-	}
-
-	return return_code;
-}
-
-static enum dc_status read_hpd_rx_irq_data(
-	struct dc_link *link,
-	union hpd_irq_data *irq_data)
-{
-	static enum dc_status retval;
-
-	/* The HW reads 16 bytes from 200h on HPD,
-	 * but if we get an AUX_DEFER, the HW cannot retry
-	 * and this causes the CTS tests 4.3.2.1 - 3.2.4 to
-	 * fail, so we now explicitly read 6 bytes which is
-	 * the req from the above mentioned test cases.
-	 *
-	 * For DP 1.4 we need to read those from 2002h range.
-	 */
-	if (link->dpcd_caps.dpcd_rev.raw < DPCD_REV_14)
-		retval = core_link_read_dpcd(
-			link,
-			DP_SINK_COUNT,
-			irq_data->raw,
-			sizeof(union hpd_irq_data));
-	else {
-		/* Read 14 bytes in a single read and then copy only the required fields.
-		 * This is more efficient than doing it in two separate AUX reads. */
-
-		uint8_t tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI + 1];
-
-		retval = core_link_read_dpcd(
-			link,
-			DP_SINK_COUNT_ESI,
-			tmp,
-			sizeof(tmp));
-
-		if (retval != DC_OK)
-			return retval;
-
-		irq_data->bytes.sink_cnt.raw = tmp[DP_SINK_COUNT_ESI - DP_SINK_COUNT_ESI];
-		irq_data->bytes.device_service_irq.raw = tmp[DP_DEVICE_SERVICE_IRQ_VECTOR_ESI0 - DP_SINK_COUNT_ESI];
-		irq_data->bytes.lane01_status.raw = tmp[DP_LANE0_1_STATUS_ESI - DP_SINK_COUNT_ESI];
-		irq_data->bytes.lane23_status.raw = tmp[DP_LANE2_3_STATUS_ESI - DP_SINK_COUNT_ESI];
-		irq_data->bytes.lane_status_updated.raw = tmp[DP_LANE_ALIGN_STATUS_UPDATED_ESI - DP_SINK_COUNT_ESI];
-		irq_data->bytes.sink_status.raw = tmp[DP_SINK_STATUS_ESI - DP_SINK_COUNT_ESI];
-	}
-
-	return retval;
-}
-
 static bool allow_hpd_rx_irq(const struct dc_link *link)
 {
 	/*
@@ -2196,7 +2203,7 @@ static void get_active_converter_info(
 	}
 
 	if (link->dpcd_caps.dpcd_rev.raw >= DPCD_REV_11) {
-		uint8_t det_caps[4];
+		uint8_t det_caps[16]; /* CTS 4.2.2.7 expects source to read Detailed Capabilities Info : 00080h-0008F.*/
 		union dwnstream_port_caps_byte0 *port_caps =
 			(union dwnstream_port_caps_byte0 *)det_caps;
 		core_link_read_dpcd(link, DP_DOWNSTREAM_PORT_0,
@@ -2240,7 +2247,8 @@ static void get_active_converter_info(
 					translate_dpcd_max_bpc(
 						hdmi_color_caps.bits.MAX_BITS_PER_COLOR_COMPONENT);
 
-				link->dpcd_caps.dongle_caps.extendedCapValid = true;
+				if (link->dpcd_caps.dongle_caps.dp_hdmi_max_pixel_clk != 0)
+					link->dpcd_caps.dongle_caps.extendedCapValid = true;
 			}
 
 			break;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
index 82cd1d6e6e59..0065ec7d5330 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_link_hwss.c
@@ -96,6 +96,7 @@ void dp_enable_link_phy(
 						link_settings,
 						clock_source);
 	}
+	link->cur_link_settings = *link_settings;
 
 	dp_receiver_power_ctrl(link, true);
 }
@@ -307,6 +308,7 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 						link->link_enc,
 						link_setting,
 						pipes[i].clock_source->id);
+			link->cur_link_settings = *link_setting;
 
 			dp_receiver_power_ctrl(link, true);
 
@@ -316,7 +318,6 @@ void dp_retrain_link_dp_test(struct dc_link *link,
 					skip_video_pattern,
 					LINK_TRAINING_ATTEMPTS);
 
-			link->cur_link_settings = *link_setting;
 
 			link->dc->hwss.enable_stream(&pipes[i]);
 
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
index 0bb844a7b990..76137df74a53 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_resource.c
@@ -83,7 +83,10 @@ enum dce_version resource_parse_asic_id(struct hw_asic_id asic_id)
 			dc_version = DCE_VERSION_11_22;
 		break;
 	case FAMILY_AI:
-		dc_version = DCE_VERSION_12_0;
+		if (ASICREV_IS_VEGA20_P(asic_id.hw_internal_rev))
+			dc_version = DCE_VERSION_12_1;
+		else
+			dc_version = DCE_VERSION_12_0;
 		break;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case FAMILY_RV:
@@ -136,6 +139,7 @@ struct resource_pool *dc_create_resource_pool(
 			num_virtual_links, dc);
 		break;
 	case DCE_VERSION_12_0:
+	case DCE_VERSION_12_1:
 		res_pool = dce120_create_resource_pool(
 			num_virtual_links, dc);
 		break;
@@ -1447,6 +1451,14 @@ static bool are_stream_backends_same(
 	return true;
 }
 
+/**
+ * dc_is_stream_unchanged() - Compare two stream states for equivalence.
+ *
+ * Checks if there a difference between the two states
+ * that would require a mode change.
+ *
+ * Does not compare cursor position or attributes.
+ */
 bool dc_is_stream_unchanged(
 	struct dc_stream_state *old_stream, struct dc_stream_state *stream)
 {
@@ -1457,6 +1469,9 @@ bool dc_is_stream_unchanged(
 	return true;
 }
 
+/**
+ * dc_is_stream_scaling_unchanged() - Compare scaling rectangles of two streams.
+ */
 bool dc_is_stream_scaling_unchanged(
 	struct dc_stream_state *old_stream, struct dc_stream_state *stream)
 {
@@ -1616,6 +1631,9 @@ bool resource_is_stream_unchanged(
 	return false;
 }
 
+/**
+ * dc_add_stream_to_ctx() - Add a new dc_stream_state to a dc_state.
+ */
 enum dc_status dc_add_stream_to_ctx(
 		struct dc *dc,
 		struct dc_state *new_ctx,
@@ -1640,6 +1658,9 @@ enum dc_status dc_add_stream_to_ctx(
 	return res;
 }
 
+/**
+ * dc_remove_stream_from_ctx() - Remove a stream from a dc_state.
+ */
 enum dc_status dc_remove_stream_from_ctx(
 			struct dc *dc,
 			struct dc_state *new_ctx,
@@ -1860,6 +1881,12 @@ enum dc_status resource_map_pool_resources(
 	return DC_ERROR_UNEXPECTED;
 }
 
+/**
+ * dc_resource_state_copy_construct_current() - Creates a new dc_state from existing state
+ * Is a shallow copy.  Increments refcounts on existing streams and planes.
+ * @dc: copy out of dc->current_state
+ * @dst_ctx: copy into this
+ */
 void dc_resource_state_copy_construct_current(
 		const struct dc *dc,
 		struct dc_state *dst_ctx)
@@ -1875,6 +1902,14 @@ void dc_resource_state_construct(
 	dst_ctx->dccg = dc->res_pool->clk_mgr;
 }
 
+/**
+ * dc_validate_global_state() - Determine if HW can support a given state
+ * Checks HW resource availability and bandwidth requirement.
+ * @dc: dc struct for this driver
+ * @new_ctx: state to be validated
+ *
+ * Return: DC_OK if the result can be programmed.  Otherwise, an error code.
+ */
 enum dc_status dc_validate_global_state(
 		struct dc *dc,
 		struct dc_state *new_ctx)
@@ -2202,113 +2237,15 @@ static void set_vendor_info_packet(
 		struct dc_info_packet *info_packet,
 		struct dc_stream_state *stream)
 {
-	uint32_t length = 0;
-	bool hdmi_vic_mode = false;
-	uint8_t checksum = 0;
-	uint32_t i = 0;
-	enum dc_timing_3d_format format;
-	// Can be different depending on packet content /*todo*/
-	// unsigned int length = pPathMode->dolbyVision ? 24 : 5;
-
-	info_packet->valid = false;
-
-	format = stream->timing.timing_3d_format;
-	if (stream->view_format == VIEW_3D_FORMAT_NONE)
-		format = TIMING_3D_FORMAT_NONE;
-
-	/* Can be different depending on packet content */
-	length = 5;
-
-	if (stream->timing.hdmi_vic != 0
-			&& stream->timing.h_total >= 3840
-			&& stream->timing.v_total >= 2160)
-		hdmi_vic_mode = true;
-
-	/* According to HDMI 1.4a CTS, VSIF should be sent
-	 * for both 3D stereo and HDMI VIC modes.
-	 * For all other modes, there is no VSIF sent.  */
+	/* SPD info packet for FreeSync */
 
-	if (format == TIMING_3D_FORMAT_NONE && !hdmi_vic_mode)
+	/* Check if Freesync is supported. Return if false. If true,
+	 * set the corresponding bit in the info packet
+	 */
+	if (!stream->vsp_infopacket.valid)
 		return;
 
-	/* 24bit IEEE Registration identifier (0x000c03). LSB first. */
-	info_packet->sb[1] = 0x03;
-	info_packet->sb[2] = 0x0C;
-	info_packet->sb[3] = 0x00;
-
-	/*PB4: 5 lower bytes = 0 (reserved). 3 higher bits = HDMI_Video_Format.
-	 * The value for HDMI_Video_Format are:
-	 * 0x0 (0b000) - No additional HDMI video format is presented in this
-	 * packet
-	 * 0x1 (0b001) - Extended resolution format present. 1 byte of HDMI_VIC
-	 * parameter follows
-	 * 0x2 (0b010) - 3D format indication present. 3D_Structure and
-	 * potentially 3D_Ext_Data follows
-	 * 0x3..0x7 (0b011..0b111) - reserved for future use */
-	if (format != TIMING_3D_FORMAT_NONE)
-		info_packet->sb[4] = (2 << 5);
-	else if (hdmi_vic_mode)
-		info_packet->sb[4] = (1 << 5);
-
-	/* PB5: If PB4 claims 3D timing (HDMI_Video_Format = 0x2):
-	 * 4 lower bites = 0 (reserved). 4 higher bits = 3D_Structure.
-	 * The value for 3D_Structure are:
-	 * 0x0 - Frame Packing
-	 * 0x1 - Field Alternative
-	 * 0x2 - Line Alternative
-	 * 0x3 - Side-by-Side (full)
-	 * 0x4 - L + depth
-	 * 0x5 - L + depth + graphics + graphics-depth
-	 * 0x6 - Top-and-Bottom
-	 * 0x7 - Reserved for future use
-	 * 0x8 - Side-by-Side (Half)
-	 * 0x9..0xE - Reserved for future use
-	 * 0xF - Not used */
-	switch (format) {
-	case TIMING_3D_FORMAT_HW_FRAME_PACKING:
-	case TIMING_3D_FORMAT_SW_FRAME_PACKING:
-		info_packet->sb[5] = (0x0 << 4);
-		break;
-
-	case TIMING_3D_FORMAT_SIDE_BY_SIDE:
-	case TIMING_3D_FORMAT_SBS_SW_PACKED:
-		info_packet->sb[5] = (0x8 << 4);
-		length = 6;
-		break;
-
-	case TIMING_3D_FORMAT_TOP_AND_BOTTOM:
-	case TIMING_3D_FORMAT_TB_SW_PACKED:
-		info_packet->sb[5] = (0x6 << 4);
-		break;
-
-	default:
-		break;
-	}
-
-	/*PB5: If PB4 is set to 0x1 (extended resolution format)
-	 * fill PB5 with the correct HDMI VIC code */
-	if (hdmi_vic_mode)
-		info_packet->sb[5] = stream->timing.hdmi_vic;
-
-	/* Header */
-	info_packet->hb0 = HDMI_INFOFRAME_TYPE_VENDOR; /* VSIF packet type. */
-	info_packet->hb1 = 0x01; /* Version */
-
-	/* 4 lower bits = Length, 4 higher bits = 0 (reserved) */
-	info_packet->hb2 = (uint8_t) (length);
-
-	/* Calculate checksum */
-	checksum = 0;
-	checksum += info_packet->hb0;
-	checksum += info_packet->hb1;
-	checksum += info_packet->hb2;
-
-	for (i = 1; i <= length; i++)
-		checksum += info_packet->sb[i];
-
-	info_packet->sb[0] = (uint8_t) (0x100 - checksum);
-
-	info_packet->valid = true;
+	*info_packet = stream->vsp_infopacket;
 }
 
 static void set_spd_info_packet(
@@ -2364,10 +2301,6 @@ void dc_resource_state_destruct(struct dc_state *context)
 	}
 }
 
-/*
- * Copy src_ctx into dst_ctx and retain all surfaces and streams referenced
- * by the src_ctx
- */
 void dc_resource_state_copy_construct(
 		const struct dc_state *src_ctx,
 		struct dc_state *dst_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
index 780838a05f44..66e5c4623a49 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc_stream.c
@@ -170,7 +170,7 @@ struct dc_stream_status *dc_stream_get_status(
 }
 
 /**
- * Update the cursor attributes and set cursor surface address
+ * dc_stream_set_cursor_attributes() - Update cursor attributes and set cursor surface address
  */
 bool dc_stream_set_cursor_attributes(
 	struct dc_stream_state *stream,
diff --git a/drivers/gpu/drm/amd/display/dc/dc.h b/drivers/gpu/drm/amd/display/dc/dc.h
index dea8bc39c688..4b5bbb13ce7f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc.h
+++ b/drivers/gpu/drm/amd/display/dc/dc.h
@@ -39,7 +39,7 @@
 #include "inc/hw/dmcu.h"
 #include "dml/display_mode_lib.h"
 
-#define DC_VER "3.2.06"
+#define DC_VER "3.2.08"
 
 #define MAX_SURFACES 3
 #define MAX_STREAMS 6
diff --git a/drivers/gpu/drm/amd/display/dc/dc_helper.c b/drivers/gpu/drm/amd/display/dc/dc_helper.c
index fcfd50b5dba0..4842d2378bbf 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dc_helper.c
@@ -234,14 +234,14 @@ uint32_t generic_reg_wait(const struct dc_context *ctx,
 		if (field_value == condition_value) {
 			if (i * delay_between_poll_us > 1000 &&
 					!IS_FPGA_MAXIMUS_DC(ctx->dce_environment))
-				dm_output_to_console("REG_WAIT taking a while: %dms in %s line:%d\n",
+				DC_LOG_DC("REG_WAIT taking a while: %dms in %s line:%d\n",
 						delay_between_poll_us * i / 1000,
 						func_name, line);
 			return reg_val;
 		}
 	}
 
-	dm_error("REG_WAIT timeout %dus * %d tries - %s line:%d\n",
+	DC_LOG_WARNING("REG_WAIT timeout %dus * %d tries - %s line:%d\n",
 			delay_between_poll_us, time_out_num_tries,
 			func_name, line);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
index 7825e4b5e97c..e72fce4eca65 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_hw_types.h
@@ -192,7 +192,6 @@ enum surface_pixel_format {
 	/*swaped & float*/
 	SURFACE_PIXEL_FORMAT_GRPH_ABGR16161616F,
 	/*grow graphics here if necessary */
-	SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888,
 	SURFACE_PIXEL_FORMAT_VIDEO_BEGIN,
 	SURFACE_PIXEL_FORMAT_VIDEO_420_YCbCr =
 		SURFACE_PIXEL_FORMAT_VIDEO_BEGIN,
@@ -200,6 +199,7 @@ enum surface_pixel_format {
 	SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCbCr,
 	SURFACE_PIXEL_FORMAT_VIDEO_420_10bpc_YCrCb,
 		SURFACE_PIXEL_FORMAT_SUBSAMPLE_END,
+	SURFACE_PIXEL_FORMAT_VIDEO_AYCrCb8888,
 	SURFACE_PIXEL_FORMAT_INVALID
 
 	/*grow 444 video here if necessary */
@@ -358,15 +358,16 @@ union dc_tiling_info {
 	} gfx8;
 
 	struct {
+		enum swizzle_mode_values swizzle;
 		unsigned int num_pipes;
-		unsigned int num_banks;
+		unsigned int max_compressed_frags;
 		unsigned int pipe_interleave;
+
+		unsigned int num_banks;
 		unsigned int num_shader_engines;
 		unsigned int num_rb_per_se;
-		unsigned int max_compressed_frags;
 		bool shaderEnable;
 
-		enum swizzle_mode_values swizzle;
 		bool meta_linear;
 		bool rb_aligned;
 		bool pipe_aligned;
diff --git a/drivers/gpu/drm/amd/display/dc/dc_link.h b/drivers/gpu/drm/amd/display/dc/dc_link.h
index 8738f27a8708..b2243e0dad1f 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_link.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_link.h
@@ -128,8 +128,10 @@ struct dc_link {
 
 const struct dc_link_status *dc_link_get_status(const struct dc_link *dc_link);
 
-/*
- * Return an enumerated dc_link.  dc_link order is constant and determined at
+/**
+ * dc_get_link_at_index() - Return an enumerated dc_link.
+ *
+ * dc_link order is constant and determined at
  * boot time.  They cannot be created or destroyed.
  * Use dc_get_caps() to get number of links.
  */
@@ -144,8 +146,7 @@ static inline struct dc_link *dc_get_link_at_index(struct dc *dc, uint32_t link_
  */
 bool dc_link_set_backlight_level(const struct dc_link *dc_link,
 		uint32_t backlight_pwm_u16_16,
-		uint32_t frame_ramp,
-		const struct dc_stream_state *stream);
+		uint32_t frame_ramp);
 
 int dc_link_get_backlight_level(const struct dc_link *dc_link);
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_stream.h b/drivers/gpu/drm/amd/display/dc/dc_stream.h
index 771d9f17e26e..d70c9e1cda3d 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_stream.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_stream.h
@@ -56,6 +56,7 @@ struct dc_stream_state {
 	struct dc_crtc_timing_adjust adjust;
 	struct dc_info_packet vrr_infopacket;
 	struct dc_info_packet vsc_infopacket;
+	struct dc_info_packet vsp_infopacket;
 
 	struct rect src; /* composition area */
 	struct rect dst; /* stream addressable area */
@@ -90,7 +91,6 @@ struct dc_stream_state {
 
 	/* DMCU info */
 	unsigned int abm_level;
-	unsigned int bl_pwm_level;
 
 	/* from core_stream struct */
 	struct dc_context *ctx;
@@ -129,11 +129,13 @@ struct dc_stream_update {
 	struct dc_crtc_timing_adjust *adjust;
 	struct dc_info_packet *vrr_infopacket;
 	struct dc_info_packet *vsc_infopacket;
+	struct dc_info_packet *vsp_infopacket;
 
 	bool *dpms_off;
 
 	struct colorspace_transform *gamut_remap;
 	enum dc_color_space *output_color_space;
+	enum dc_dither_option *dither_option;
 
 	struct dc_csc_transform *output_csc_transform;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dc_types.h b/drivers/gpu/drm/amd/display/dc/dc_types.h
index 6e12d640d020..0b20ae23f169 100644
--- a/drivers/gpu/drm/amd/display/dc/dc_types.h
+++ b/drivers/gpu/drm/amd/display/dc/dc_types.h
@@ -73,10 +73,18 @@ struct hw_asic_id {
 	void *atombios_base_address;
 };
 
+struct dc_perf_trace {
+	unsigned long read_count;
+	unsigned long write_count;
+	unsigned long last_entry_read;
+	unsigned long last_entry_write;
+};
+
 struct dc_context {
 	struct dc *dc;
 
 	void *driver_context; /* e.g. amdgpu_device */
+	struct dc_perf_trace *perf_trace;
 	void *cgs_device;
 
 	enum dce_environment dce_environment;
@@ -191,7 +199,6 @@ union display_content_support {
 };
 
 struct dc_panel_patch {
-	unsigned int disconnect_delay;
 	unsigned int dppowerup_delay;
 	unsigned int extra_t12_ms;
 };
diff --git a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
index bd22f51813bf..afd287f08bc9 100644
--- a/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dce/dce_clk_mgr.c
@@ -676,6 +676,11 @@ static void dce112_update_clocks(struct clk_mgr *clk_mgr,
 {
 	struct dce_clk_mgr *clk_mgr_dce = TO_DCE_CLK_MGR(clk_mgr);
 	struct dm_pp_power_level_change_request level_change_req;
+	int unpatched_disp_clk = context->bw.dce.dispclk_khz;
+
+	/*TODO: W/A for dal3 linux, investigate why this works */
+	if (!clk_mgr_dce->dfs_bypass_active)
+		context->bw.dce.dispclk_khz = context->bw.dce.dispclk_khz * 115 / 100;
 
 	level_change_req.power_level = dce_get_required_clocks_state(clk_mgr, context);
 	/* get max clock state from PPLIB */
@@ -690,6 +695,8 @@ static void dce112_update_clocks(struct clk_mgr *clk_mgr,
 		clk_mgr->clks.dispclk_khz = context->bw.dce.dispclk_khz;
 	}
 	dce11_pplib_apply_display_requirements(clk_mgr->ctx->dc, context);
+
+	context->bw.dce.dispclk_khz = unpatched_disp_clk;
 }
 
 static void dce12_update_clocks(struct clk_mgr *clk_mgr,
diff --git a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
index bc50a8e25f4f..87771676acac 100644
--- a/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce100/dce100_hw_sequencer.c
@@ -117,6 +117,18 @@ void dce100_prepare_bandwidth(
 			false);
 }
 
+void dce100_optimize_bandwidth(
+		struct dc *dc,
+		struct dc_state *context)
+{
+	dce110_set_safe_displaymarks(&context->res_ctx, dc->res_pool);
+
+	dc->res_pool->clk_mgr->funcs->update_clocks(
+			dc->res_pool->clk_mgr,
+			context,
+			true);
+}
+
 /**************************************************************************/
 
 void dce100_hw_sequencer_construct(struct dc *dc)
@@ -125,6 +137,6 @@ void dce100_hw_sequencer_construct(struct dc *dc)
 
 	dc->hwss.enable_display_power_gating = dce100_enable_display_power_gating;
 	dc->hwss.prepare_bandwidth = dce100_prepare_bandwidth;
-	dc->hwss.optimize_bandwidth = dce100_prepare_bandwidth;
+	dc->hwss.optimize_bandwidth = dce100_optimize_bandwidth;
 }
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
index 1f7f25013217..52d50e24a995 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_compressor.c
@@ -64,65 +64,37 @@ static const struct dce110_compressor_reg_offsets reg_offsets[] = {
 
 static const uint32_t dce11_one_lpt_channel_max_resolution = 2560 * 1600;
 
-enum fbc_idle_force {
-	/* Bit 0 - Display registers updated */
-	FBC_IDLE_FORCE_DISPLAY_REGISTER_UPDATE = 0x00000001,
-
-	/* Bit 2 - FBC_GRPH_COMP_EN register updated */
-	FBC_IDLE_FORCE_GRPH_COMP_EN = 0x00000002,
-	/* Bit 3 - FBC_SRC_SEL register updated */
-	FBC_IDLE_FORCE_SRC_SEL_CHANGE = 0x00000004,
-	/* Bit 4 - FBC_MIN_COMPRESSION register updated */
-	FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE = 0x00000008,
-	/* Bit 5 - FBC_ALPHA_COMP_EN register updated */
-	FBC_IDLE_FORCE_ALPHA_COMP_EN = 0x00000010,
-	/* Bit 6 - FBC_ZERO_ALPHA_CHUNK_SKIP_EN register updated */
-	FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN = 0x00000020,
-	/* Bit 7 - FBC_FORCE_COPY_TO_COMP_BUF register updated */
-	FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF = 0x00000040,
-
-	/* Bit 24 - Memory write to region 0 defined by MC registers. */
-	FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION0 = 0x01000000,
-	/* Bit 25 - Memory write to region 1 defined by MC registers */
-	FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION1 = 0x02000000,
-	/* Bit 26 - Memory write to region 2 defined by MC registers */
-	FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION2 = 0x04000000,
-	/* Bit 27 - Memory write to region 3 defined by MC registers. */
-	FBC_IDLE_FORCE_MEMORY_WRITE_TO_REGION3 = 0x08000000,
-
-	/* Bit 28 - Memory write from any client other than MCIF */
-	FBC_IDLE_FORCE_MEMORY_WRITE_OTHER_THAN_MCIF = 0x10000000,
-	/* Bit 29 - CG statics screen signal is inactive */
-	FBC_IDLE_FORCE_CG_STATIC_SCREEN_IS_INACTIVE = 0x20000000,
-};
-
-
 static uint32_t align_to_chunks_number_per_line(uint32_t pixels)
 {
 	return 256 * ((pixels + 255) / 256);
 }
 
-static void reset_lb_on_vblank(struct dc_context *ctx)
+static void reset_lb_on_vblank(struct compressor *compressor, uint32_t crtc_inst)
 {
-	uint32_t value, frame_count;
+	uint32_t value;
+	uint32_t frame_count;
+	uint32_t status_pos;
 	uint32_t retry = 0;
-	uint32_t status_pos =
-			dm_read_reg(ctx, mmCRTC_STATUS_POSITION);
+	struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor);
+
+	cp110->offsets = reg_offsets[crtc_inst];
+
+	status_pos = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION));
 
 
 	/* Only if CRTC is enabled and counter is moving we wait for one frame. */
-	if (status_pos != dm_read_reg(ctx, mmCRTC_STATUS_POSITION)) {
+	if (status_pos != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_POSITION))) {
 		/* Resetting LB on VBlank */
-		value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL);
+		value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL));
 		set_reg_field_value(value, 3, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL);
 		set_reg_field_value(value, 1, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2);
-		dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value);
+		dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value);
 
-		frame_count = dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT);
+		frame_count = dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT));
 
 
 		for (retry = 10000; retry > 0; retry--) {
-			if (frame_count != dm_read_reg(ctx, mmCRTC_STATUS_FRAME_COUNT))
+			if (frame_count != dm_read_reg(compressor->ctx, DCP_REG(mmCRTC_STATUS_FRAME_COUNT)))
 				break;
 			udelay(10);
 		}
@@ -130,13 +102,11 @@ static void reset_lb_on_vblank(struct dc_context *ctx)
 			dm_error("Frame count did not increase for 100ms.\n");
 
 		/* Resetting LB on VBlank */
-		value = dm_read_reg(ctx, mmLB_SYNC_RESET_SEL);
+		value = dm_read_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL));
 		set_reg_field_value(value, 2, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL);
 		set_reg_field_value(value, 0, LB_SYNC_RESET_SEL, LB_SYNC_RESET_SEL2);
-		dm_write_reg(ctx, mmLB_SYNC_RESET_SEL, value);
-
+		dm_write_reg(compressor->ctx, DCP_REG(mmLB_SYNC_RESET_SEL), value);
 	}
-
 }
 
 static void wait_for_fbc_state_changed(
@@ -226,10 +196,10 @@ void dce110_compressor_enable_fbc(
 		uint32_t addr;
 		uint32_t value, misc_value;
 
-
 		addr = mmFBC_CNTL;
 		value = dm_read_reg(compressor->ctx, addr);
 		set_reg_field_value(value, 1, FBC_CNTL, FBC_GRPH_COMP_EN);
+		/* params->inst is valid HW CRTC instance start from 0 */
 		set_reg_field_value(
 			value,
 			params->inst,
@@ -238,8 +208,10 @@ void dce110_compressor_enable_fbc(
 
 		/* Keep track of enum controller_id FBC is attached to */
 		compressor->is_enabled = true;
-		compressor->attached_inst = params->inst;
-		cp110->offsets = reg_offsets[params->inst];
+		/* attached_inst is SW CRTC instance start from 1
+		 * 0 = CONTROLLER_ID_UNDEFINED means not attached crtc
+		 */
+		compressor->attached_inst = params->inst + CONTROLLER_ID_D0;
 
 		/* Toggle it as there is bug in HW */
 		set_reg_field_value(value, 0, FBC_CNTL, FBC_GRPH_COMP_EN);
@@ -268,9 +240,10 @@ void dce110_compressor_enable_fbc(
 void dce110_compressor_disable_fbc(struct compressor *compressor)
 {
 	struct dce110_compressor *cp110 = TO_DCE110_COMPRESSOR(compressor);
+	uint32_t crtc_inst = 0;
 
 	if (compressor->options.bits.FBC_SUPPORT) {
-		if (dce110_compressor_is_fbc_enabled_in_hw(compressor, NULL)) {
+		if (dce110_compressor_is_fbc_enabled_in_hw(compressor, &crtc_inst)) {
 			uint32_t reg_data;
 			/* Turn off compression */
 			reg_data = dm_read_reg(compressor->ctx, mmFBC_CNTL);
@@ -284,8 +257,10 @@ void dce110_compressor_disable_fbc(struct compressor *compressor)
 			wait_for_fbc_state_changed(cp110, false);
 		}
 
-		/* Sync line buffer  - dce100/110 only*/
-		reset_lb_on_vblank(compressor->ctx);
+		/* Sync line buffer which fbc was attached to dce100/110 only */
+		if (crtc_inst > CONTROLLER_ID_UNDEFINED && crtc_inst < CONTROLLER_ID_D3)
+			reset_lb_on_vblank(compressor,
+					crtc_inst - CONTROLLER_ID_D0);
 	}
 }
 
@@ -328,6 +303,8 @@ void dce110_compressor_program_compressed_surface_address_and_pitch(
 	uint32_t compressed_surf_address_low_part =
 		compressor->compr_surface_address.addr.low_part;
 
+	cp110->offsets = reg_offsets[params->inst];
+
 	/* Clear content first. */
 	dm_write_reg(
 		compressor->ctx,
@@ -410,13 +387,7 @@ void dce110_compressor_set_fbc_invalidation_triggers(
 	value = dm_read_reg(compressor->ctx, addr);
 	set_reg_field_value(
 		value,
-		fbc_trigger |
-		FBC_IDLE_FORCE_GRPH_COMP_EN |
-		FBC_IDLE_FORCE_SRC_SEL_CHANGE |
-		FBC_IDLE_FORCE_MIN_COMPRESSION_CHANGE |
-		FBC_IDLE_FORCE_ALPHA_COMP_EN |
-		FBC_IDLE_FORCE_ZERO_ALPHA_CHUNK_SKIP_EN |
-		FBC_IDLE_FORCE_FORCE_COPY_TO_COMP_BUF,
+		fbc_trigger,
 		FBC_IDLE_FORCE_CLEAR_MASK,
 		FBC_IDLE_FORCE_CLEAR_MASK);
 	dm_write_reg(compressor->ctx, addr, value);
@@ -549,7 +520,7 @@ void dce110_compressor_construct(struct dce110_compressor *compressor,
 	compressor->base.channel_interleave_size = 0;
 	compressor->base.dram_channels_num = 0;
 	compressor->base.lpt_channels_num = 0;
-	compressor->base.attached_inst = 0;
+	compressor->base.attached_inst = CONTROLLER_ID_UNDEFINED;
 	compressor->base.is_enabled = false;
 	compressor->base.funcs = &dce110_compressor_funcs;
 
diff --git a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
index 2f062bacd78a..8f09b8625c5d 100644
--- a/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dce110/dce110_hw_sequencer.c
@@ -1000,7 +1000,7 @@ void dce110_enable_audio_stream(struct pipe_ctx *pipe_ctx)
 
 		pipe_ctx->stream_res.audio->funcs->az_enable(pipe_ctx->stream_res.audio);
 
-		if (num_audio == 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL)
+		if (num_audio >= 1 && pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL)
 			/*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
 			pp_smu->set_pme_wa_enable(&pp_smu->pp_smu);
 		/* un-mute audio */
@@ -1017,6 +1017,8 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 	pipe_ctx->stream_res.stream_enc->funcs->audio_mute_control(
 			pipe_ctx->stream_res.stream_enc, true);
 	if (pipe_ctx->stream_res.audio) {
+		struct pp_smu_funcs_rv *pp_smu = dc->res_pool->pp_smu;
+
 		if (option != KEEP_ACQUIRED_RESOURCE ||
 				!dc->debug.az_endpoint_mute_only) {
 			/*only disalbe az_endpoint if power down or free*/
@@ -1036,6 +1038,9 @@ void dce110_disable_audio_stream(struct pipe_ctx *pipe_ctx, int option)
 			update_audio_usage(&dc->current_state->res_ctx, dc->res_pool, pipe_ctx->stream_res.audio, false);
 			pipe_ctx->stream_res.audio = NULL;
 		}
+		if (pp_smu != NULL && pp_smu->set_pme_wa_enable != NULL)
+			/*this is the first audio. apply the PME w/a in order to wake AZ from D3*/
+			pp_smu->set_pme_wa_enable(&pp_smu->pp_smu);
 
 		/* TODO: notify audio driver for if audio modes list changed
 		 * add audio mode list change flag */
@@ -1267,10 +1272,19 @@ static void program_scaler(const struct dc *dc,
 		pipe_ctx->plane_res.scl_data.lb_params.depth,
 		&pipe_ctx->stream->bit_depth_params);
 
-	if (pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color)
+	if (pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color) {
+		/*
+		 * The way 420 is packed, 2 channels carry Y component, 1 channel
+		 * alternate between Cb and Cr, so both channels need the pixel
+		 * value for Y
+		 */
+		if (pipe_ctx->stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+			color.color_r_cr = color.color_g_y;
+
 		pipe_ctx->stream_res.tg->funcs->set_overscan_blank_color(
 				pipe_ctx->stream_res.tg,
 				&color);
+	}
 
 	pipe_ctx->plane_res.xfm->funcs->transform_set_scaler(pipe_ctx->plane_res.xfm,
 		&pipe_ctx->plane_res.scl_data);
@@ -1766,12 +1780,13 @@ static void set_static_screen_control(struct pipe_ctx **pipe_ctx,
  *  Check if FBC can be enabled
  */
 static bool should_enable_fbc(struct dc *dc,
-			      struct dc_state *context,
-			      uint32_t *pipe_idx)
+		struct dc_state *context,
+		uint32_t *pipe_idx)
 {
 	uint32_t i;
 	struct pipe_ctx *pipe_ctx = NULL;
 	struct resource_context *res_ctx = &context->res_ctx;
+	unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
 
 
 	ASSERT(dc->fbc_compressor);
@@ -1786,14 +1801,28 @@ static bool should_enable_fbc(struct dc *dc,
 
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		if (res_ctx->pipe_ctx[i].stream) {
+
 			pipe_ctx = &res_ctx->pipe_ctx[i];
-			*pipe_idx = i;
-			break;
+
+			if (!pipe_ctx)
+				continue;
+
+			/* fbc not applicable on underlay pipe */
+			if (pipe_ctx->pipe_idx != underlay_idx) {
+				*pipe_idx = i;
+				break;
+			}
 		}
 	}
 
-	/* Pipe context should be found */
-	ASSERT(pipe_ctx);
+	if (i == dc->res_pool->pipe_count)
+		return false;
+
+	if (!pipe_ctx->stream->sink)
+		return false;
+
+	if (!pipe_ctx->stream->sink->link)
+		return false;
 
 	/* Only supports eDP */
 	if (pipe_ctx->stream->sink->link->connector_signal != SIGNAL_TYPE_EDP)
@@ -1817,8 +1846,9 @@ static bool should_enable_fbc(struct dc *dc,
 /*
  *  Enable FBC
  */
-static void enable_fbc(struct dc *dc,
-		       struct dc_state *context)
+static void enable_fbc(
+		struct dc *dc,
+		struct dc_state *context)
 {
 	uint32_t pipe_idx = 0;
 
@@ -1828,10 +1858,9 @@ static void enable_fbc(struct dc *dc,
 		struct compressor *compr = dc->fbc_compressor;
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[pipe_idx];
 
-
 		params.source_view_width = pipe_ctx->stream->timing.h_addressable;
 		params.source_view_height = pipe_ctx->stream->timing.v_addressable;
-
+		params.inst = pipe_ctx->stream_res.tg->inst;
 		compr->compr_surface_address.quad_part = dc->ctx->fbc_gpu_addr;
 
 		compr->funcs->surface_address_and_pitch(compr, &params);
@@ -2046,10 +2075,10 @@ enum dc_status dce110_apply_ctx_to_hw(
 			return status;
 	}
 
-	dcb->funcs->set_scratch_critical_state(dcb, false);
-
 	if (dc->fbc_compressor)
-		enable_fbc(dc, context);
+		enable_fbc(dc, dc->current_state);
+
+	dcb->funcs->set_scratch_critical_state(dcb, false);
 
 	return DC_OK;
 }
@@ -2408,7 +2437,6 @@ static void dce110_program_front_end_for_pipe(
 	struct dc_plane_state *plane_state = pipe_ctx->plane_state;
 	struct xfm_grph_csc_adjustment adjust;
 	struct out_csc_color_matrix tbl_entry;
-	unsigned int underlay_idx = dc->res_pool->underlay_pipe_index;
 	unsigned int i;
 	DC_LOGGER_INIT();
 	memset(&tbl_entry, 0, sizeof(tbl_entry));
@@ -2449,15 +2477,6 @@ static void dce110_program_front_end_for_pipe(
 
 	program_scaler(dc, pipe_ctx);
 
-	/* fbc not applicable on Underlay pipe */
-	if (dc->fbc_compressor && old_pipe->stream &&
-	    pipe_ctx->pipe_idx != underlay_idx) {
-		if (plane_state->tiling_info.gfx8.array_mode == DC_ARRAY_LINEAR_GENERAL)
-			dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
-		else
-			enable_fbc(dc, dc->current_state);
-	}
-
 	mi->funcs->mem_input_program_surface_config(
 			mi,
 			plane_state->format,
@@ -2534,6 +2553,9 @@ static void dce110_apply_ctx_for_surface(
 	if (num_planes == 0)
 		return;
 
+	if (dc->fbc_compressor)
+		dc->fbc_compressor->funcs->disable_fbc(dc->fbc_compressor);
+
 	for (i = 0; i < dc->res_pool->pipe_count; i++) {
 		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
 		struct pipe_ctx *old_pipe_ctx = &dc->current_state->res_ctx.pipe_ctx[i];
@@ -2576,6 +2598,9 @@ static void dce110_apply_ctx_for_surface(
 			(pipe_ctx->plane_state || old_pipe_ctx->plane_state))
 			dc->hwss.pipe_control_lock(dc, pipe_ctx, false);
 	}
+
+	if (dc->fbc_compressor)
+		enable_fbc(dc, dc->current_state);
 }
 
 static void dce110_power_down_fe(struct dc *dc, struct pipe_ctx *pipe_ctx)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
index f9d7d2c26cc2..54abedbf1b43 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.c
@@ -328,12 +328,10 @@ static void dcn1_update_clocks(struct clk_mgr *clk_mgr,
 
 	*smu_req_cur = smu_req;
 }
-
 static const struct clk_mgr_funcs dcn1_funcs = {
 	.get_dp_ref_clk_frequency = dce12_get_dp_ref_freq_khz,
 	.update_clocks = dcn1_update_clocks
 };
-
 struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)
 {
 	struct dc_debug_options *debug = &ctx->dc->debug;
@@ -373,3 +371,5 @@ struct clk_mgr *dcn1_clk_mgr_create(struct dc_context *ctx)
 
 	return &clk_mgr_dce->base;
 }
+
+
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h
index 9dbaf6578006..a995eda443a3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_clk_mgr.h
@@ -28,6 +28,12 @@
 
 #include "../dce/dce_clk_mgr.h"
 
+struct clk_bypass {
+	uint32_t dcfclk_bypass;
+	uint32_t dispclk_pypass;
+	uint32_t dprefclk_bypass;
+};
+
 void dcn1_pplib_apply_display_requirements(
 	struct dc *dc,
 	struct dc_state *context);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
index 3eea44092a04..7469333a2c8a 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_cm_common.c
@@ -324,7 +324,7 @@ bool cm_helper_translate_curve_to_hw_format(
 	if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
 		return false;
 
-	PERF_TRACE();
+	PERF_TRACE_CTX(output_tf->ctx);
 
 	corner_points = lut_params->corner_points;
 	rgb_resulted = lut_params->rgb_resulted;
@@ -513,7 +513,7 @@ bool cm_helper_translate_curve_to_degamma_hw_format(
 	if (output_tf == NULL || lut_params == NULL || output_tf->type == TF_TYPE_BYPASS)
 		return false;
 
-	PERF_TRACE();
+	PERF_TRACE_CTX(output_tf->ctx);
 
 	corner_points = lut_params->corner_points;
 	rgb_resulted = lut_params->rgb_resulted;
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
index dcb3c5530236..cd1ebe57ed59 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_dpp.c
@@ -463,7 +463,7 @@ void dpp1_set_cursor_position(
 	if (src_y_offset >= (int)param->viewport.height)
 		cur_en = 0;  /* not visible beyond bottom edge*/
 
-	if (src_y_offset < 0)
+	if (src_y_offset + (int)height <= 0)
 		cur_en = 0;  /* not visible beyond top edge*/
 
 	REG_UPDATE(CURSOR0_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
index 345af015d061..d1acd7165bc8 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
@@ -1140,7 +1140,7 @@ void hubp1_cursor_set_position(
 	if (src_y_offset >= (int)param->viewport.height)
 		cur_en = 0;  /* not visible beyond bottom edge*/
 
-	if (src_y_offset < 0) //+ (int)hubp->curs_attr.height
+	if (src_y_offset + (int)hubp->curs_attr.height <= 0)
 		cur_en = 0;  /* not visible beyond top edge*/
 
 	if (cur_en && REG_READ(CURSOR_SURFACE_ADDRESS) == 0)
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
index 0bd33a713836..58a12ddf12f3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
@@ -2159,6 +2159,15 @@ static void dcn10_blank_pixel_data(
 	color_space = stream->output_color_space;
 	color_space_to_black_color(dc, color_space, &black_color);
 
+	/*
+	 * The way 420 is packed, 2 channels carry Y component, 1 channel
+	 * alternate between Cb and Cr, so both channels need the pixel
+	 * value for Y
+	 */
+	if (stream->timing.pixel_encoding == PIXEL_ENCODING_YCBCR420)
+		black_color.color_r_cr = black_color.color_g_y;
+
+
 	if (stream_res->tg->funcs->set_blank_color)
 		stream_res->tg->funcs->set_blank_color(
 				stream_res->tg,
@@ -2346,27 +2355,22 @@ static void dcn10_apply_ctx_for_surface(
 			top_pipe_to_program->plane_state->update_flags.bits.full_update)
 		for (i = 0; i < dc->res_pool->pipe_count; i++) {
 			struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
+			tg = pipe_ctx->stream_res.tg;
 			/* Skip inactive pipes and ones already updated */
-			if (!pipe_ctx->stream || pipe_ctx->stream == stream)
+			if (!pipe_ctx->stream || pipe_ctx->stream == stream
+					|| !pipe_ctx->plane_state
+					|| !tg->funcs->is_tg_enabled(tg))
 				continue;
 
-			pipe_ctx->stream_res.tg->funcs->lock(pipe_ctx->stream_res.tg);
+			tg->funcs->lock(tg);
 
 			pipe_ctx->plane_res.hubp->funcs->hubp_setup_interdependent(
 				pipe_ctx->plane_res.hubp,
 				&pipe_ctx->dlg_regs,
 				&pipe_ctx->ttu_regs);
-		}
 
-	for (i = 0; i < dc->res_pool->pipe_count; i++) {
-		struct pipe_ctx *pipe_ctx = &context->res_ctx.pipe_ctx[i];
-
-		if (!pipe_ctx->stream || pipe_ctx->stream == stream)
-			continue;
-
-		dcn10_pipe_control_lock(dc, pipe_ctx, false);
-	}
+			tg->funcs->unlock(tg);
+		}
 
 	if (num_planes == 0)
 		false_optc_underflow_wa(dc, stream, tg);
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
index 211bb240a720..cd469014baa3 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer_debug.c
@@ -44,6 +44,7 @@
 #include "dcn10_hubp.h"
 #include "dcn10_hubbub.h"
 #include "dcn10_cm_common.h"
+#include "dcn10_clk_mgr.h"
 
 static unsigned int snprintf_count(char *pBuf, unsigned int bufSize, char *fmt, ...)
 {
@@ -463,19 +464,22 @@ static unsigned int dcn10_get_otg_states(struct dc *dc, char *pBuf, unsigned int
 static unsigned int dcn10_get_clock_states(struct dc *dc, char *pBuf, unsigned int bufSize)
 {
 	unsigned int chars_printed = 0;
+	unsigned int remaining_buffer = bufSize;
 
-	chars_printed = snprintf_count(pBuf, bufSize, "dcfclk_khz,dcfclk_deep_sleep_khz,dispclk_khz,"
-		"dppclk_khz,max_supported_dppclk_khz,fclk_khz,socclk_khz\n"
-		"%d,%d,%d,%d,%d,%d,%d\n",
+	chars_printed = snprintf_count(pBuf, bufSize, "dcfclk,dcfclk_deep_sleep,dispclk,"
+		"dppclk,fclk,socclk\n"
+		"%d,%d,%d,%d,%d,%d\n",
 		dc->current_state->bw.dcn.clk.dcfclk_khz,
 		dc->current_state->bw.dcn.clk.dcfclk_deep_sleep_khz,
 		dc->current_state->bw.dcn.clk.dispclk_khz,
 		dc->current_state->bw.dcn.clk.dppclk_khz,
-		dc->current_state->bw.dcn.clk.max_supported_dppclk_khz,
 		dc->current_state->bw.dcn.clk.fclk_khz,
 		dc->current_state->bw.dcn.clk.socclk_khz);
 
-	return chars_printed;
+	remaining_buffer -= chars_printed;
+	pBuf += chars_printed;
+
+	return bufSize - remaining_buffer;
 }
 
 static void dcn10_clear_otpc_underflow(struct dc *dc)
@@ -538,16 +542,16 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne
 	 *  Bit 0 - 15: Hardware block mask
 	 *  Bit 15: 1 = Invariant Only, 0 = All
 	 */
-	const unsigned int DC_HW_STATE_MASK_HUBBUB 	= 0x1;
-	const unsigned int DC_HW_STATE_MASK_HUBP 	= 0x2;
-	const unsigned int DC_HW_STATE_MASK_RQ 		= 0x4;
-	const unsigned int DC_HW_STATE_MASK_DLG 	= 0x8;
-	const unsigned int DC_HW_STATE_MASK_TTU 	= 0x10;
-	const unsigned int DC_HW_STATE_MASK_CM 		= 0x20;
-	const unsigned int DC_HW_STATE_MASK_MPCC 	= 0x40;
-	const unsigned int DC_HW_STATE_MASK_OTG 	= 0x80;
-	const unsigned int DC_HW_STATE_MASK_CLOCKS 	= 0x100;
-	const unsigned int DC_HW_STATE_INVAR_ONLY	= 0x8000;
+	const unsigned int DC_HW_STATE_MASK_HUBBUB			= 0x1;
+	const unsigned int DC_HW_STATE_MASK_HUBP			= 0x2;
+	const unsigned int DC_HW_STATE_MASK_RQ				= 0x4;
+	const unsigned int DC_HW_STATE_MASK_DLG				= 0x8;
+	const unsigned int DC_HW_STATE_MASK_TTU				= 0x10;
+	const unsigned int DC_HW_STATE_MASK_CM				= 0x20;
+	const unsigned int DC_HW_STATE_MASK_MPCC			= 0x40;
+	const unsigned int DC_HW_STATE_MASK_OTG				= 0x80;
+	const unsigned int DC_HW_STATE_MASK_CLOCKS			= 0x100;
+	const unsigned int DC_HW_STATE_INVAR_ONLY			= 0x8000;
 
 	unsigned int chars_printed = 0;
 	unsigned int remaining_buf_size = bufSize;
@@ -603,6 +607,9 @@ void dcn10_get_hw_state(struct dc *dc, char *pBuf, unsigned int bufSize, unsigne
 		remaining_buf_size -= chars_printed;
 	}
 
-	if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0)
+	if ((mask & DC_HW_STATE_MASK_CLOCKS) && remaining_buf_size > 0) {
 		chars_printed = dcn10_get_clock_states(dc, pBuf, remaining_buf_size);
+		pBuf += chars_printed;
+		remaining_buf_size -= chars_printed;
+	}
 }
diff --git a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
index 47dbe4bb294a..5d4772dec0ba 100644
--- a/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
+++ b/drivers/gpu/drm/amd/display/dc/dcn10/dcn10_resource.c
@@ -202,7 +202,6 @@ enum dcn10_clk_src_array_id {
 #define MMHUB_SR(reg_name)\
 		.reg_name = MMHUB_BASE(mm ## reg_name ## _BASE_IDX) +  \
 					mm ## reg_name
-
 /* macros to expend register list macro defined in HW object header file
  * end *********************/
 
@@ -436,7 +435,6 @@ static const struct dcn_optc_mask tg_mask = {
 	TG_COMMON_MASK_SH_LIST_DCN1_0(_MASK)
 };
 
-
 static const struct bios_registers bios_regs = {
 		NBIO_SR(BIOS_SCRATCH_0),
 		NBIO_SR(BIOS_SCRATCH_3),
@@ -497,7 +495,6 @@ static const struct dce110_clk_src_mask cs_mask = {
 		CS_COMMON_MASK_SH_LIST_DCN1_0(_MASK)
 };
 
-
 static const struct resource_caps res_cap = {
 		.num_timing_generator = 4,
 		.num_opp = 4,
@@ -1277,7 +1274,6 @@ static bool construct(
 			goto fail;
 		}
 	}
-
 	pool->base.clk_mgr = dcn1_clk_mgr_create(ctx);
 	if (pool->base.clk_mgr == NULL) {
 		dm_error("DC: failed to create display clock!\n");
diff --git a/drivers/gpu/drm/amd/display/dc/dm_event_log.h b/drivers/gpu/drm/amd/display/dc/dm_event_log.h
index 34a701ca879e..65663f4d93e1 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_event_log.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_event_log.h
@@ -33,6 +33,7 @@
 
 #define EVENT_LOG_AUX_REQ(ddc, type, action, address, len, data)
 #define EVENT_LOG_AUX_REP(ddc, type, replyStatus, len, data)
+#define EVENT_LOG_CUST_MSG(tag, a, ...)
 
 #endif
 
diff --git a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
index beb08fd12b1d..0029a39efb1c 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_pp_smu.h
@@ -102,7 +102,7 @@ struct pp_smu_funcs_rv {
 	 */
 	void (*set_display_count)(struct pp_smu *pp, int count);
 
-	/* which SMU message?  are reader and writer WM separate SMU msg? */
+	/* reader and writer WM's are sent together as part of one table*/
 	/*
 	 * PPSMC_MSG_SetDriverDramAddrHigh
 	 * PPSMC_MSG_SetDriverDramAddrLow
diff --git a/drivers/gpu/drm/amd/display/dc/dm_services.h b/drivers/gpu/drm/amd/display/dc/dm_services.h
index 28128c02de00..1961cc6d9143 100644
--- a/drivers/gpu/drm/amd/display/dc/dm_services.h
+++ b/drivers/gpu/drm/amd/display/dc/dm_services.h
@@ -31,6 +31,8 @@
 
 #define __DM_SERVICES_H__
 
+#include "amdgpu_dm_trace.h"
+
 /* TODO: remove when DC is complete. */
 #include "dm_services_types.h"
 #include "logger_interface.h"
@@ -70,6 +72,7 @@ static inline uint32_t dm_read_reg_func(
 	}
 #endif
 	value = cgs_read_register(ctx->cgs_device, address);
+	trace_amdgpu_dc_rreg(&ctx->perf_trace->read_count, address, value);
 
 	return value;
 }
@@ -90,6 +93,7 @@ static inline void dm_write_reg_func(
 	}
 #endif
 	cgs_write_register(ctx->cgs_device, address, value);
+	trace_amdgpu_dc_wreg(&ctx->perf_trace->write_count, address, value);
 }
 
 static inline uint32_t dm_read_index_reg(
@@ -351,8 +355,12 @@ unsigned long long dm_get_elapse_time_in_ns(struct dc_context *ctx,
 /*
  * performance tracing
  */
-void dm_perf_trace_timestamp(const char *func_name, unsigned int line);
-#define PERF_TRACE()	dm_perf_trace_timestamp(__func__, __LINE__)
+#define PERF_TRACE()	trace_amdgpu_dc_performance(CTX->perf_trace->read_count,\
+		CTX->perf_trace->write_count, &CTX->perf_trace->last_entry_read,\
+		&CTX->perf_trace->last_entry_write, __func__, __LINE__)
+#define PERF_TRACE_CTX(__CTX)	trace_amdgpu_dc_performance(__CTX->perf_trace->read_count,\
+		__CTX->perf_trace->write_count, &__CTX->perf_trace->last_entry_read,\
+		&__CTX->perf_trace->last_entry_write, __func__, __LINE__)
 
 
 /*
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
index f20161c5706d..dada04296025 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.c
@@ -56,7 +56,6 @@ struct gpio_service *dal_gpio_service_create(
 	struct dc_context *ctx)
 {
 	struct gpio_service *service;
-
 	uint32_t index_of_id;
 
 	service = kzalloc(sizeof(struct gpio_service), GFP_KERNEL);
@@ -78,44 +77,33 @@ struct gpio_service *dal_gpio_service_create(
 		goto failure_1;
 	}
 
-	/* allocate and initialize business storage */
+	/* allocate and initialize busyness storage */
 	{
-		const uint32_t bits_per_uint = sizeof(uint32_t) << 3;
-
 		index_of_id = 0;
 		service->ctx = ctx;
 
 		do {
 			uint32_t number_of_bits =
 				service->factory.number_of_pins[index_of_id];
+			uint32_t i = 0;
 
-			uint32_t number_of_uints =
-				(number_of_bits + bits_per_uint - 1) /
-				bits_per_uint;
-
-			uint32_t *slot;
-
-			if (number_of_bits) {
-				uint32_t index_of_uint = 0;
+			if (number_of_bits)  {
+				service->busyness[index_of_id] =
+					kcalloc(number_of_bits, sizeof(char),
+						GFP_KERNEL);
 
-				slot = kcalloc(number_of_uints,
-					       sizeof(uint32_t),
-					       GFP_KERNEL);
-
-				if (!slot) {
+				if (!service->busyness[index_of_id]) {
 					BREAK_TO_DEBUGGER();
 					goto failure_2;
 				}
 
 				do {
-					slot[index_of_uint] = 0;
-
-					++index_of_uint;
-				} while (index_of_uint < number_of_uints);
-			} else
-				slot = NULL;
-
-			service->busyness[index_of_id] = slot;
+					service->busyness[index_of_id][i] = 0;
+					++i;
+				} while (i < number_of_bits);
+			} else {
+				service->busyness[index_of_id] = NULL;
+			}
 
 			++index_of_id;
 		} while (index_of_id < GPIO_ID_COUNT);
@@ -125,13 +113,8 @@ struct gpio_service *dal_gpio_service_create(
 
 failure_2:
 	while (index_of_id) {
-		uint32_t *slot;
-
 		--index_of_id;
-
-		slot = service->busyness[index_of_id];
-
-		kfree(slot);
+		kfree(service->busyness[index_of_id]);
 	}
 
 failure_1:
@@ -169,9 +152,7 @@ void dal_gpio_service_destroy(
 		uint32_t index_of_id = 0;
 
 		do {
-			uint32_t *slot = (*ptr)->busyness[index_of_id];
-
-			kfree(slot);
+			kfree((*ptr)->busyness[index_of_id]);
 
 			++index_of_id;
 		} while (index_of_id < GPIO_ID_COUNT);
@@ -192,11 +173,7 @@ static bool is_pin_busy(
 	enum gpio_id id,
 	uint32_t en)
 {
-	const uint32_t bits_per_uint = sizeof(uint32_t) << 3;
-
-	const uint32_t *slot = service->busyness[id] + (en / bits_per_uint);
-
-	return 0 != (*slot & (1 << (en % bits_per_uint)));
+	return service->busyness[id][en];
 }
 
 static void set_pin_busy(
@@ -204,10 +181,7 @@ static void set_pin_busy(
 	enum gpio_id id,
 	uint32_t en)
 {
-	const uint32_t bits_per_uint = sizeof(uint32_t) << 3;
-
-	service->busyness[id][en / bits_per_uint] |=
-		(1 << (en % bits_per_uint));
+	service->busyness[id][en] = true;
 }
 
 static void set_pin_free(
@@ -215,10 +189,7 @@ static void set_pin_free(
 	enum gpio_id id,
 	uint32_t en)
 {
-	const uint32_t bits_per_uint = sizeof(uint32_t) << 3;
-
-	service->busyness[id][en / bits_per_uint] &=
-		~(1 << (en % bits_per_uint));
+	service->busyness[id][en] = false;
 }
 
 enum gpio_result dal_gpio_service_open(
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h
index c7f3081f59cc..1d501a43d13b 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h
+++ b/drivers/gpu/drm/amd/display/dc/gpio/gpio_service.h
@@ -36,10 +36,9 @@ struct gpio_service {
 	/*
 	 * @brief
 	 * Business storage.
-	 * For each member of 'enum gpio_id',
-	 * store array of bits (packed into uint32_t slots),
-	 * index individual bit by 'en' value */
-	uint32_t *busyness[GPIO_ID_COUNT];
+	 * one byte For each member of 'enum gpio_id'
+	 */
+	char *busyness[GPIO_ID_COUNT];
 };
 
 enum gpio_result dal_gpio_service_open(
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
index a683f4102e65..c2028c4744a6 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_factory.c
@@ -79,6 +79,7 @@ bool dal_hw_factory_init(
 		dal_hw_factory_dce110_init(factory);
 		return true;
 	case DCE_VERSION_12_0:
+	case DCE_VERSION_12_1:
 		dal_hw_factory_dce120_init(factory);
 		return true;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
diff --git a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
index 096f45628630..236ca28784a9 100644
--- a/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
+++ b/drivers/gpu/drm/amd/display/dc/gpio/hw_translate.c
@@ -76,6 +76,7 @@ bool dal_hw_translate_init(
 		dal_hw_translate_dce110_init(translate);
 		return true;
 	case DCE_VERSION_12_0:
+	case DCE_VERSION_12_1:
 		dal_hw_translate_dce120_init(translate);
 		return true;
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
diff --git a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
index e56093f26eed..1ad6e49102ff 100644
--- a/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
+++ b/drivers/gpu/drm/amd/display/dc/i2caux/i2caux.c
@@ -90,6 +90,7 @@ struct i2caux *dal_i2caux_create(
 	case DCE_VERSION_10_0:
 		return dal_i2caux_dce100_create(ctx);
 	case DCE_VERSION_12_0:
+	case DCE_VERSION_12_1:
 		return dal_i2caux_dce120_create(ctx);
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case DCN_VERSION_1_0:
diff --git a/drivers/gpu/drm/amd/display/dc/inc/compressor.h b/drivers/gpu/drm/amd/display/dc/inc/compressor.h
index bcb18f5e1e60..7a147a9762a0 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/compressor.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/compressor.h
@@ -77,6 +77,7 @@ struct compressor_funcs {
 };
 struct compressor {
 	struct dc_context *ctx;
+	/* CONTROLLER_ID_D0 + instance, CONTROLLER_ID_UNDEFINED = 0 */
 	uint32_t attached_inst;
 	bool is_enabled;
 	const struct compressor_funcs *funcs;
diff --git a/drivers/gpu/drm/amd/display/dc/inc/core_types.h b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
index e3ee96afa60e..b168a5e9dd9d 100644
--- a/drivers/gpu/drm/amd/display/dc/inc/core_types.h
+++ b/drivers/gpu/drm/amd/display/dc/inc/core_types.h
@@ -272,6 +272,17 @@ union bw_context {
 	struct dce_bw_output dce;
 };
 
+/**
+ * struct dc_state - The full description of a state requested by a user
+ *
+ * @streams: Stream properties
+ * @stream_status: The planes on a given stream
+ * @res_ctx: Persistent state of resources
+ * @bw: The output from bandwidth and watermark calculations
+ * @pp_display_cfg: PowerPlay clocks and settings
+ * @dcn_bw_vars: non-stack memory to support bandwidth calculations
+ *
+ */
 struct dc_state {
 	struct dc_stream_state *streams[MAX_PIPES];
 	struct dc_stream_status stream_status[MAX_PIPES];
@@ -279,7 +290,6 @@ struct dc_state {
 
 	struct resource_context res_ctx;
 
-	/* The output from BW and WM calculations. */
 	union bw_context bw;
 
 	/* Note: these are big structures, do *not* put on stack! */
diff --git a/drivers/gpu/drm/amd/display/include/bios_parser_types.h b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
index f8dbfa5b89f2..7fd78a696800 100644
--- a/drivers/gpu/drm/amd/display/include/bios_parser_types.h
+++ b/drivers/gpu/drm/amd/display/include/bios_parser_types.h
@@ -41,6 +41,7 @@ enum as_signal_type {
 	AS_SIGNAL_TYPE_LVDS,
 	AS_SIGNAL_TYPE_DISPLAY_PORT,
 	AS_SIGNAL_TYPE_GPU_PLL,
+	AS_SIGNAL_TYPE_XGMI,
 	AS_SIGNAL_TYPE_UNKNOWN
 };
 
diff --git a/drivers/gpu/drm/amd/display/include/dal_types.h b/drivers/gpu/drm/amd/display/include/dal_types.h
index 89627133e188..f5bd869d4320 100644
--- a/drivers/gpu/drm/amd/display/include/dal_types.h
+++ b/drivers/gpu/drm/amd/display/include/dal_types.h
@@ -42,6 +42,7 @@ enum dce_version {
 	DCE_VERSION_11_2,
 	DCE_VERSION_11_22,
 	DCE_VERSION_12_0,
+	DCE_VERSION_12_1,
 	DCE_VERSION_MAX,
 	DCN_VERSION_1_0,
 #if defined(CONFIG_DRM_AMD_DC_DCN1_01)
diff --git a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
index bbecbaefb741..479b77c2e89e 100644
--- a/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
+++ b/drivers/gpu/drm/amd/display/modules/color/color_gamma.c
@@ -1761,7 +1761,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 
 	struct pwl_float_data *rgb_user = NULL;
 	struct pwl_float_data_ex *curve = NULL;
-	struct gamma_pixel *axix_x = NULL;
+	struct gamma_pixel *axis_x = NULL;
 	struct pixel_gamma_point *coeff = NULL;
 	enum dc_transfer_func_predefined tf = TRANSFER_FUNCTION_SRGB;
 	bool ret = false;
@@ -1787,10 +1787,10 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 			 GFP_KERNEL);
 	if (!curve)
 		goto curve_alloc_fail;
-	axix_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axix_x),
+	axis_x = kvcalloc(ramp->num_entries + _EXTRA_POINTS, sizeof(*axis_x),
 			  GFP_KERNEL);
-	if (!axix_x)
-		goto axix_x_alloc_fail;
+	if (!axis_x)
+		goto axis_x_alloc_fail;
 	coeff = kvcalloc(MAX_HW_POINTS + _EXTRA_POINTS, sizeof(*coeff),
 			 GFP_KERNEL);
 	if (!coeff)
@@ -1803,7 +1803,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 	tf = input_tf->tf;
 
 	build_evenly_distributed_points(
-			axix_x,
+			axis_x,
 			ramp->num_entries,
 			dividers);
 
@@ -1828,7 +1828,7 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 	tf_pts->x_point_at_y1_blue = 1;
 
 	map_regamma_hw_to_x_user(ramp, coeff, rgb_user,
-			coordinates_x, axix_x, curve,
+			coordinates_x, axis_x, curve,
 			MAX_HW_POINTS, tf_pts,
 			mapUserRamp && ramp->type != GAMMA_CUSTOM);
 	if (ramp->type == GAMMA_CUSTOM)
@@ -1838,8 +1838,8 @@ bool mod_color_calculate_degamma_params(struct dc_transfer_func *input_tf,
 
 	kvfree(coeff);
 coeff_alloc_fail:
-	kvfree(axix_x);
-axix_x_alloc_fail:
+	kvfree(axis_x);
+axis_x_alloc_fail:
 	kvfree(curve);
 curve_alloc_fail:
 	kvfree(rgb_user);
diff --git a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
index 620a171620ee..1544ed3f1747 100644
--- a/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
+++ b/drivers/gpu/drm/amd/display/modules/freesync/freesync.c
@@ -608,12 +608,12 @@ static void build_vrr_infopacket_data(const struct mod_vrr_params *vrr,
 static void build_vrr_infopacket_fs2_data(enum color_transfer_func app_tf,
 		struct dc_info_packet *infopacket)
 {
-	if (app_tf != transfer_func_unknown) {
+	if (app_tf != TRANSFER_FUNC_UNKNOWN) {
 		infopacket->valid = true;
 
 		infopacket->sb[6] |= 0x08;  // PB6 = [Bit 3 = Native Color Active]
 
-		if (app_tf == transfer_func_gamma_22) {
+		if (app_tf == TRANSFER_FUNC_GAMMA_22) {
 			infopacket->sb[9] |= 0x04;  // PB6 = [Bit 2 = Gamma 2.2 EOTF Active]
 		}
 	}
@@ -688,11 +688,11 @@ void mod_freesync_build_vrr_infopacket(struct mod_freesync *mod_freesync,
 		return;
 
 	switch (packet_type) {
-	case packet_type_fs2:
+	case PACKET_TYPE_FS2:
 		build_vrr_infopacket_v2(stream->signal, vrr, app_tf, infopacket);
 		break;
-	case packet_type_vrr:
-	case packet_type_fs1:
+	case PACKET_TYPE_VRR:
+	case PACKET_TYPE_FS1:
 	default:
 		build_vrr_infopacket_v1(stream->signal, vrr, infopacket);
 	}
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
index 786b34380f85..5b1c9a4c7643 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_info_packet.h
@@ -26,15 +26,13 @@
 #ifndef MOD_INFO_PACKET_H_
 #define MOD_INFO_PACKET_H_
 
-struct info_packet_inputs {
-	const struct dc_stream_state *pStream;
-};
+#include "mod_shared.h"
 
-struct info_packets {
-	struct dc_info_packet *pVscInfoPacket;
-};
+//Forward Declarations
+struct dc_stream_state;
+struct dc_info_packet;
 
-void mod_build_infopackets(struct info_packet_inputs *inputs,
-		struct info_packets *info_packets);
+void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+		struct dc_info_packet *info_packet);
 
 #endif
diff --git a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
index 238c431ae483..1bd02c0ac30c 100644
--- a/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
+++ b/drivers/gpu/drm/amd/display/modules/inc/mod_shared.h
@@ -23,27 +23,26 @@
  *
  */
 
-
 #ifndef MOD_SHARED_H_
 #define MOD_SHARED_H_
 
 enum color_transfer_func {
-	transfer_func_unknown,
-	transfer_func_srgb,
-	transfer_func_bt709,
-	transfer_func_pq2084,
-	transfer_func_pq2084_interim,
-	transfer_func_linear_0_1,
-	transfer_func_linear_0_125,
-	transfer_func_dolbyvision,
-	transfer_func_gamma_22,
-	transfer_func_gamma_26
+	TRANSFER_FUNC_UNKNOWN,
+	TRANSFER_FUNC_SRGB,
+	TRANSFER_FUNC_BT709,
+	TRANSFER_FUNC_PQ2084,
+	TRANSFER_FUNC_PQ2084_INTERIM,
+	TRANSFER_FUNC_LINEAR_0_1,
+	TRANSFER_FUNC_LINEAR_0_125,
+	TRANSFER_FUNC_GAMMA_22,
+	TRANSFER_FUNC_GAMMA_26
 };
 
 enum vrr_packet_type {
-	packet_type_vrr,
-	packet_type_fs1,
-	packet_type_fs2
+	PACKET_TYPE_VRR,
+	PACKET_TYPE_FS1,
+	PACKET_TYPE_FS2
 };
 
+
 #endif /* MOD_SHARED_H_ */
diff --git a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
index ff8bfb9b43b0..db06fab2ad5c 100644
--- a/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
+++ b/drivers/gpu/drm/amd/display/modules/info_packet/info_packet.c
@@ -25,6 +25,10 @@
 
 #include "mod_info_packet.h"
 #include "core_types.h"
+#include "dc_types.h"
+#include "mod_shared.h"
+
+#define HDMI_INFOFRAME_TYPE_VENDOR 0x81
 
 enum ColorimetryRGBDP {
 	ColorimetryRGB_DP_sRGB               = 0,
@@ -41,7 +45,7 @@ enum ColorimetryYCCDP {
 	ColorimetryYCC_DP_ITU2020YCbCr  = 7,
 };
 
-static void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
+void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 		struct dc_info_packet *info_packet)
 {
 	unsigned int vscPacketRevision = 0;
@@ -159,7 +163,7 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 	 *   DPCD register is exposed in the new Extended Receiver Capability field for DPCD Rev. 1.4
 	 *   (and higher). When MISC1. bit 6. is Set to 1, a Source device uses a VSC SDP to indicate
 	 *   the Pixel Encoding/Colorimetry Format and that a Sink device must ignore MISC1, bit 7, and
-	 *   MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become “don’t care”).)
+	 *   MISC0, bits 7:1 (MISC1, bit 7. and MISC0, bits 7:1 become "don't care").)
 	 */
 	if (vscPacketRevision == 0x5) {
 		/* Secondary-data Packet ID = 0 */
@@ -320,10 +324,3 @@ static void mod_build_vsc_infopacket(const struct dc_stream_state *stream,
 
 }
 
-void mod_build_infopackets(struct info_packet_inputs *inputs,
-		struct info_packets *info_packets)
-{
-	if (info_packets->pVscInfoPacket != NULL)
-		mod_build_vsc_infopacket(inputs->pStream, info_packets->pVscInfoPacket);
-}
-
diff --git a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
index 00f63b7dd32f..c11a443dcbc8 100644
--- a/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
+++ b/drivers/gpu/drm/amd/display/modules/power/power_helpers.c
@@ -57,6 +57,7 @@ static const unsigned char abm_config[abm_defines_max_config][abm_defines_max_le
 #define NUM_POWER_FN_SEGS 8
 #define NUM_BL_CURVE_SEGS 16
 
+#pragma pack(push, 1)
 /* NOTE: iRAM is 256B in size */
 struct iram_table_v_2 {
 	/* flags                      */
@@ -100,6 +101,7 @@ struct iram_table_v_2 {
 	uint8_t dummy8;							/* 0xfe       */
 	uint8_t dummy9;							/* 0xff       */
 };
+#pragma pack(pop)
 
 static uint16_t backlight_8_to_16(unsigned int backlight_8bit)
 {
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h
new file mode 100644
index 000000000000..8f515875a34d
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_offset.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _mmhub_9_4_0_OFFSET_HEADER
+#define _mmhub_9_4_0_OFFSET_HEADER
+
+
+// addressBlock: mmhub_utcl2_vmsharedpfdec
+// base address: 0x6a040
+#define mmMC_VM_XGMI_LFB_CNTL                                                                          0x0823
+#define mmMC_VM_XGMI_LFB_CNTL_BASE_IDX                                                                 0
+#define mmMC_VM_XGMI_LFB_SIZE                                                                          0x0824
+#define mmMC_VM_XGMI_LFB_SIZE_BASE_IDX                                                                 0
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h
new file mode 100644
index 000000000000..0a6b072d191e
--- /dev/null
+++ b/drivers/gpu/drm/amd/include/asic_reg/mmhub/mmhub_9_4_0_sh_mask.h
@@ -0,0 +1,35 @@
+/*
+ * Copyright (C) 2018  Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+ * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+#ifndef _mmhub_9_4_0_SH_MASK_HEADER
+#define _mmhub_9_4_0_SH_MASK_HEADER
+
+
+// addressBlock: mmhub_utcl2_vmsharedpfdec
+//MC_VM_XGMI_LFB_CNTL
+#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION__SHIFT                                                             0x0
+#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION__SHIFT                                                             0x4
+#define MC_VM_XGMI_LFB_CNTL__PF_LFB_REGION_MASK                                                               0x00000007L
+#define MC_VM_XGMI_LFB_CNTL__PF_MAX_REGION_MASK                                                               0x00000070L
+//MC_VM_XGMI_LFB_SIZE
+#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE__SHIFT                                                               0x0
+#define MC_VM_XGMI_LFB_SIZE__PF_LFB_SIZE_MASK                                                                 0x0000FFFFL
+
+#endif
diff --git a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
index 58ac0b90c310..8154d67388cc 100644
--- a/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_kfd_interface.h
@@ -188,8 +188,8 @@ struct tile_config {
  */
 #define ALLOC_MEM_FLAGS_VRAM		(1 << 0)
 #define ALLOC_MEM_FLAGS_GTT		(1 << 1)
-#define ALLOC_MEM_FLAGS_USERPTR		(1 << 2) /* TODO */
-#define ALLOC_MEM_FLAGS_DOORBELL	(1 << 3) /* TODO */
+#define ALLOC_MEM_FLAGS_USERPTR		(1 << 2)
+#define ALLOC_MEM_FLAGS_DOORBELL	(1 << 3)
 
 /*
  * Allocation flags attributes/access options.
diff --git a/drivers/gpu/drm/amd/include/kgd_pp_interface.h b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
index 980e696989b1..789c4f288485 100644
--- a/drivers/gpu/drm/amd/include/kgd_pp_interface.h
+++ b/drivers/gpu/drm/amd/include/kgd_pp_interface.h
@@ -127,12 +127,13 @@ enum amd_pp_task {
 };
 
 enum PP_SMC_POWER_PROFILE {
-	PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x0,
-	PP_SMC_POWER_PROFILE_POWERSAVING  = 0x1,
-	PP_SMC_POWER_PROFILE_VIDEO        = 0x2,
-	PP_SMC_POWER_PROFILE_VR           = 0x3,
-	PP_SMC_POWER_PROFILE_COMPUTE      = 0x4,
-	PP_SMC_POWER_PROFILE_CUSTOM       = 0x5,
+	PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT = 0x0,
+	PP_SMC_POWER_PROFILE_FULLSCREEN3D = 0x1,
+	PP_SMC_POWER_PROFILE_POWERSAVING  = 0x2,
+	PP_SMC_POWER_PROFILE_VIDEO        = 0x3,
+	PP_SMC_POWER_PROFILE_VR           = 0x4,
+	PP_SMC_POWER_PROFILE_COMPUTE      = 0x5,
+	PP_SMC_POWER_PROFILE_CUSTOM       = 0x6,
 };
 
 enum {
@@ -276,6 +277,10 @@ struct amd_pm_funcs {
 		struct amd_pp_simple_clock_info *clocks);
 	int (*notify_smu_enable_pwe)(void *handle);
 	int (*enable_mgpu_fan_boost)(void *handle);
+	int (*set_active_display_count)(void *handle, uint32_t count);
+	int (*set_hard_min_dcefclk_by_freq)(void *handle, uint32_t clock);
+	int (*set_hard_min_fclk_by_freq)(void *handle, uint32_t clock);
+	int (*set_min_deep_sleep_dcefclk)(void *handle, uint32_t clock);
 };
 
 #endif
diff --git a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
index b68c2e0fef01..9bc27f468d5b 100644
--- a/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
+++ b/drivers/gpu/drm/amd/powerplay/amd_powerplay.c
@@ -725,7 +725,7 @@ static int pp_dpm_force_clock_level(void *handle,
 	}
 
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
-		pr_info("force clock level is for dpm manual mode only.\n");
+		pr_debug("force clock level is for dpm manual mode only.\n");
 		return -EINVAL;
 	}
 
@@ -899,7 +899,7 @@ static int pp_set_power_profile_mode(void *handle, long *input, uint32_t size)
 	}
 
 	if (hwmgr->dpm_level != AMD_DPM_FORCED_LEVEL_MANUAL) {
-		pr_info("power profile setting is for manual dpm mode only.\n");
+		pr_debug("power profile setting is for manual dpm mode only.\n");
 		return ret;
 	}
 
@@ -1072,7 +1072,7 @@ static int pp_get_current_clocks(void *handle,
 					&hw_clocks, PHM_PerformanceLevelDesignation_Activity);
 
 	if (ret) {
-		pr_info("Error in phm_get_clock_info \n");
+		pr_debug("Error in phm_get_clock_info \n");
 		mutex_unlock(&hwmgr->smu_lock);
 		return -EINVAL;
 	}
@@ -1332,6 +1332,78 @@ static int pp_enable_mgpu_fan_boost(void *handle)
 	return 0;
 }
 
+static int pp_set_min_deep_sleep_dcefclk(void *handle, uint32_t clock)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
+
+	if (hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk == NULL) {
+		pr_debug("%s was not implemented.\n", __func__);
+		return -EINVAL;;
+	}
+
+	mutex_lock(&hwmgr->smu_lock);
+	hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return 0;
+}
+
+static int pp_set_hard_min_dcefclk_by_freq(void *handle, uint32_t clock)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
+
+	if (hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq == NULL) {
+		pr_debug("%s was not implemented.\n", __func__);
+		return -EINVAL;;
+	}
+
+	mutex_lock(&hwmgr->smu_lock);
+	hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return 0;
+}
+
+static int pp_set_hard_min_fclk_by_freq(void *handle, uint32_t clock)
+{
+	struct pp_hwmgr *hwmgr = handle;
+
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
+
+	if (hwmgr->hwmgr_func->set_hard_min_fclk_by_freq == NULL) {
+		pr_debug("%s was not implemented.\n", __func__);
+		return -EINVAL;;
+	}
+
+	mutex_lock(&hwmgr->smu_lock);
+	hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return 0;
+}
+
+static int pp_set_active_display_count(void *handle, uint32_t count)
+{
+	struct pp_hwmgr *hwmgr = handle;
+	int ret = 0;
+
+	if (!hwmgr || !hwmgr->pm_en)
+		return -EINVAL;
+
+	mutex_lock(&hwmgr->smu_lock);
+	ret = phm_set_active_display_count(hwmgr, count);
+	mutex_unlock(&hwmgr->smu_lock);
+
+	return ret;
+}
+
 static const struct amd_pm_funcs pp_dpm_funcs = {
 	.load_firmware = pp_dpm_load_fw,
 	.wait_for_fw_loading_complete = pp_dpm_fw_loading_complete,
@@ -1378,4 +1450,8 @@ static const struct amd_pm_funcs pp_dpm_funcs = {
 	.get_display_mode_validation_clocks = pp_get_display_mode_validation_clocks,
 	.notify_smu_enable_pwe = pp_notify_smu_enable_pwe,
 	.enable_mgpu_fan_boost = pp_enable_mgpu_fan_boost,
+	.set_active_display_count = pp_set_active_display_count,
+	.set_min_deep_sleep_dcefclk = pp_set_min_deep_sleep_dcefclk,
+	.set_hard_min_dcefclk_by_freq = pp_set_hard_min_dcefclk_by_freq,
+	.set_hard_min_fclk_by_freq = pp_set_hard_min_fclk_by_freq,
 };
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
index 85119c2bdcc8..1f92a9f4c9e3 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hardwaremanager.c
@@ -80,7 +80,9 @@ int phm_enable_dynamic_state_management(struct pp_hwmgr *hwmgr)
 	PHM_FUNC_CHECK(hwmgr);
 	adev = hwmgr->adev;
 
-	if (smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev)) {
+	/* Skip for suspend/resume case */
+	if (smum_is_dpm_running(hwmgr) && !amdgpu_passthrough(adev)
+		&& adev->in_suspend) {
 		pr_info("dpm has been enabled\n");
 		return 0;
 	}
@@ -286,8 +288,8 @@ int phm_store_dal_configuration_data(struct pp_hwmgr *hwmgr,
 	if (display_config == NULL)
 		return -EINVAL;
 
-	if (NULL != hwmgr->hwmgr_func->set_deep_sleep_dcefclk)
-		hwmgr->hwmgr_func->set_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk);
+	if (NULL != hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk)
+		hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, display_config->min_dcef_deep_sleep_set_clk);
 
 	for (index = 0; index < display_config->num_path_including_non_display; index++) {
 		if (display_config->displays[index].controller_id != 0)
@@ -478,3 +480,44 @@ int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr)
 
 	return hwmgr->hwmgr_func->disable_smc_firmware_ctf(hwmgr);
 }
+
+int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (!hwmgr->hwmgr_func->set_active_display_count)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->set_active_display_count(hwmgr, count);
+}
+
+int phm_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (!hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->set_min_deep_sleep_dcefclk(hwmgr, clock);
+}
+
+int phm_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (!hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->set_hard_min_dcefclk_by_freq(hwmgr, clock);
+}
+
+int phm_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock)
+{
+	PHM_FUNC_CHECK(hwmgr);
+
+	if (!hwmgr->hwmgr_func->set_hard_min_fclk_by_freq)
+		return -EINVAL;
+
+	return hwmgr->hwmgr_func->set_hard_min_fclk_by_freq(hwmgr, clock);
+}
+
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
index 47ac92369739..310b102a9292 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/hwmgr.c
@@ -64,17 +64,19 @@ static int ci_set_asic_special_caps(struct pp_hwmgr *hwmgr);
 
 static void hwmgr_init_workload_prority(struct pp_hwmgr *hwmgr)
 {
-	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 2;
-	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 0;
-	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 1;
-	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VR] = 3;
-	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 4;
-
-	hwmgr->workload_setting[0] = PP_SMC_POWER_PROFILE_POWERSAVING;
-	hwmgr->workload_setting[1] = PP_SMC_POWER_PROFILE_VIDEO;
-	hwmgr->workload_setting[2] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
-	hwmgr->workload_setting[3] = PP_SMC_POWER_PROFILE_VR;
-	hwmgr->workload_setting[4] = PP_SMC_POWER_PROFILE_COMPUTE;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT] = 0;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_FULLSCREEN3D] = 1;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_POWERSAVING] = 2;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO] = 3;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VR] = 4;
+	hwmgr->workload_prority[PP_SMC_POWER_PROFILE_COMPUTE] = 5;
+
+	hwmgr->workload_setting[0] = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+	hwmgr->workload_setting[1] = PP_SMC_POWER_PROFILE_FULLSCREEN3D;
+	hwmgr->workload_setting[2] = PP_SMC_POWER_PROFILE_POWERSAVING;
+	hwmgr->workload_setting[3] = PP_SMC_POWER_PROFILE_VIDEO;
+	hwmgr->workload_setting[4] = PP_SMC_POWER_PROFILE_VR;
+	hwmgr->workload_setting[5] = PP_SMC_POWER_PROFILE_COMPUTE;
 }
 
 int hwmgr_early_init(struct pp_hwmgr *hwmgr)
@@ -352,6 +354,9 @@ int hwmgr_handle_task(struct pp_hwmgr *hwmgr, enum amd_pp_task task_id,
 
 	switch (task_id) {
 	case AMD_PP_TASK_DISPLAY_CONFIG_CHANGE:
+		ret = phm_pre_display_configuration_changed(hwmgr);
+		if (ret)
+			return ret;
 		ret = phm_set_cpu_power_state(hwmgr);
 		if (ret)
 			return ret;
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
index 91ffb7bc4ee7..56437866d120 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/pp_psm.c
@@ -265,8 +265,6 @@ int psm_adjust_power_state_dynamic(struct pp_hwmgr *hwmgr, bool skip,
 	if (skip)
 		return 0;
 
-	phm_pre_display_configuration_changed(hwmgr);
-
 	phm_display_configuration_changed(hwmgr);
 
 	if (hwmgr->ps)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
index dd18cb710391..f95c5f50eb0f 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu10_hwmgr.c
@@ -216,12 +216,12 @@ static inline uint32_t convert_10k_to_mhz(uint32_t clock)
 	return (clock + 99) / 100;
 }
 
-static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock)
+static int smu10_set_min_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
 
 	if (smu10_data->need_min_deep_sleep_dcefclk &&
-	    smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) {
+		smu10_data->deep_sleep_dcefclk != convert_10k_to_mhz(clock)) {
 		smu10_data->deep_sleep_dcefclk = convert_10k_to_mhz(clock);
 		smum_send_msg_to_smc_with_parameter(hwmgr,
 					PPSMC_MSG_SetMinDeepSleepDcefclk,
@@ -230,6 +230,34 @@ static int smu10_set_deep_sleep_dcefclk(struct pp_hwmgr *hwmgr, uint32_t clock)
 	return 0;
 }
 
+static int smu10_set_hard_min_dcefclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock)
+{
+	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+
+	if (smu10_data->dcf_actual_hard_min_freq &&
+		smu10_data->dcf_actual_hard_min_freq != convert_10k_to_mhz(clock)) {
+		smu10_data->dcf_actual_hard_min_freq = convert_10k_to_mhz(clock);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetHardMinDcefclkByFreq,
+					smu10_data->dcf_actual_hard_min_freq);
+	}
+	return 0;
+}
+
+static int smu10_set_hard_min_fclk_by_freq(struct pp_hwmgr *hwmgr, uint32_t clock)
+{
+	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
+
+	if (smu10_data->f_actual_hard_min_freq &&
+		smu10_data->f_actual_hard_min_freq != convert_10k_to_mhz(clock)) {
+		smu10_data->f_actual_hard_min_freq = convert_10k_to_mhz(clock);
+		smum_send_msg_to_smc_with_parameter(hwmgr,
+					PPSMC_MSG_SetHardMinFclkByFreq,
+					smu10_data->f_actual_hard_min_freq);
+	}
+	return 0;
+}
+
 static int smu10_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count)
 {
 	struct smu10_hwmgr *smu10_data = (struct smu10_hwmgr *)(hwmgr->backend);
@@ -1206,7 +1234,7 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.get_max_high_clocks = smu10_get_max_high_clocks,
 	.read_sensor = smu10_read_sensor,
 	.set_active_display_count = smu10_set_active_display_count,
-	.set_deep_sleep_dcefclk = smu10_set_deep_sleep_dcefclk,
+	.set_min_deep_sleep_dcefclk = smu10_set_min_deep_sleep_dcefclk,
 	.dynamic_state_management_enable = smu10_enable_dpm_tasks,
 	.power_off_asic = smu10_power_off_asic,
 	.asic_setup = smu10_setup_asic_task,
@@ -1217,6 +1245,8 @@ static const struct pp_hwmgr_func smu10_hwmgr_funcs = {
 	.display_clock_voltage_request = smu10_display_clock_voltage_request,
 	.powergate_gfx = smu10_gfx_off_control,
 	.powergate_sdma = smu10_powergate_sdma,
+	.set_hard_min_dcefclk_by_freq = smu10_set_hard_min_dcefclk_by_freq,
+	.set_hard_min_fclk_by_freq = smu10_set_hard_min_fclk_by_freq,
 };
 
 int smu10_init_function_pointers(struct pp_hwmgr *hwmgr)
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
index 5dcd21d29dbf..c8f5c00dd1e7 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/smu7_hwmgr.c
@@ -77,8 +77,9 @@
 #define PCIE_BUS_CLK                10000
 #define TCLK                        (PCIE_BUS_CLK / 10)
 
-static const struct profile_mode_setting smu7_profiling[6] =
-					{{1, 0, 100, 30, 1, 0, 100, 10},
+static const struct profile_mode_setting smu7_profiling[7] =
+					{{0, 0, 0, 0, 0, 0, 0, 0},
+					 {1, 0, 100, 30, 1, 0, 100, 10},
 					 {1, 10, 0, 30, 0, 0, 0, 0},
 					 {0, 0, 0, 0, 1, 10, 16, 31},
 					 {1, 0, 11, 50, 1, 0, 100, 10},
@@ -2859,7 +2860,10 @@ static int smu7_vblank_too_short(struct pp_hwmgr *hwmgr,
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
-		switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
+		if (hwmgr->is_kicker)
+			switch_limit_us = data->is_memory_gddr5 ? 450 : 150;
+		else
+			switch_limit_us = data->is_memory_gddr5 ? 190 : 150;
 		break;
 	case CHIP_VEGAM:
 		switch_limit_us = 30;
@@ -3589,8 +3593,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 	}
 
 	if (i >= sclk_table->count) {
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-		sclk_table->dpm_levels[i-1].value = sclk;
+		if (sclk > sclk_table->dpm_levels[i-1].value) {
+			data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+			sclk_table->dpm_levels[i-1].value = sclk;
+		}
 	} else {
 	/* TODO: Check SCLK in DAL's minimum clocks
 	 * in case DeepSleep divider update is required.
@@ -3607,8 +3613,10 @@ static int smu7_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, cons
 	}
 
 	if (i >= mclk_table->count) {
-		data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-		mclk_table->dpm_levels[i-1].value = mclk;
+		if (mclk > mclk_table->dpm_levels[i-1].value) {
+			data->need_update_smu7_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+			mclk_table->dpm_levels[i-1].value = mclk;
+		}
 	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
@@ -4219,9 +4227,17 @@ static int smu7_check_mc_firmware(struct pp_hwmgr *hwmgr)
 	if (tmp & (1 << 23)) {
 		data->mem_latency_high = MEM_LATENCY_HIGH;
 		data->mem_latency_low = MEM_LATENCY_LOW;
+		if ((hwmgr->chip_id == CHIP_POLARIS10) ||
+		    (hwmgr->chip_id == CHIP_POLARIS11) ||
+		    (hwmgr->chip_id == CHIP_POLARIS12))
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableFFC);
 	} else {
 		data->mem_latency_high = 330;
 		data->mem_latency_low = 330;
+		if ((hwmgr->chip_id == CHIP_POLARIS10) ||
+		    (hwmgr->chip_id == CHIP_POLARIS11) ||
+		    (hwmgr->chip_id == CHIP_POLARIS12))
+			smum_send_msg_to_smc(hwmgr, PPSMC_MSG_DisableFFC);
 	}
 
 	return 0;
@@ -4874,7 +4890,8 @@ static int smu7_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
 	uint32_t i, size = 0;
 	uint32_t len;
 
-	static const char *profile_name[6] = {"3D_FULL_SCREEN",
+	static const char *profile_name[7] = {"BOOTUP_DEFAULT",
+					"3D_FULL_SCREEN",
 					"POWER_SAVING",
 					"VIDEO",
 					"VR",
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
index e2bc6e0c229f..91e3bbe6d61d 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_hwmgr.c
@@ -804,9 +804,9 @@ static int vega10_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	hwmgr->backend = data;
 
-	hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO];
-	hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO;
-	hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO;
+	hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
+	hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+	hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
 
 	vega10_set_default_registry_data(hwmgr);
 	data->disable_dpm_mask = 0xff;
@@ -3266,8 +3266,10 @@ static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, co
 	}
 
 	if (i >= sclk_table->count) {
-		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
-		sclk_table->dpm_levels[i-1].value = sclk;
+		if (sclk > sclk_table->dpm_levels[i-1].value) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_SCLK;
+			sclk_table->dpm_levels[i-1].value = sclk;
+		}
 	}
 
 	for (i = 0; i < mclk_table->count; i++) {
@@ -3276,8 +3278,10 @@ static int vega10_find_dpm_states_clocks_in_dpm_table(struct pp_hwmgr *hwmgr, co
 	}
 
 	if (i >= mclk_table->count) {
-		data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
-		mclk_table->dpm_levels[i-1].value = mclk;
+		if (mclk > mclk_table->dpm_levels[i-1].value) {
+			data->need_update_dpm_table |= DPMTABLE_OD_UPDATE_MCLK;
+			mclk_table->dpm_levels[i-1].value = mclk;
+		}
 	}
 
 	if (data->display_timing.num_existing_displays != hwmgr->display_config->num_display)
@@ -4664,13 +4668,15 @@ static int vega10_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
 {
 	struct vega10_hwmgr *data = hwmgr->backend;
 	uint32_t i, size = 0;
-	static const uint8_t profile_mode_setting[5][4] = {{70, 60, 1, 3,},
+	static const uint8_t profile_mode_setting[6][4] = {{70, 60, 0, 0,},
+						{70, 60, 1, 3,},
 						{90, 60, 0, 0,},
 						{70, 60, 0, 0,},
 						{70, 90, 0, 0,},
 						{30, 60, 0, 6,},
 						};
-	static const char *profile_name[6] = {"3D_FULL_SCREEN",
+	static const char *profile_name[7] = {"BOOTUP_DEFAULT",
+					"3D_FULL_SCREEN",
 					"POWER_SAVING",
 					"VIDEO",
 					"VR",
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
index b8747a5c9204..99d596dc0e89 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega10_processpptables.c
@@ -32,6 +32,7 @@
 #include "vega10_pptable.h"
 
 #define NUM_DSPCLK_LEVELS 8
+#define VEGA10_ENGINECLOCK_HARDMAX 198000
 
 static void set_hw_cap(struct pp_hwmgr *hwmgr, bool enable,
 		enum phm_platform_caps cap)
@@ -258,7 +259,26 @@ static int init_over_drive_limits(
 		struct pp_hwmgr *hwmgr,
 		const ATOM_Vega10_POWERPLAYTABLE *powerplay_table)
 {
-	hwmgr->platform_descriptor.overdriveLimit.engineClock =
+	const ATOM_Vega10_GFXCLK_Dependency_Table *gfxclk_dep_table =
+			(const ATOM_Vega10_GFXCLK_Dependency_Table *)
+			(((unsigned long) powerplay_table) +
+			le16_to_cpu(powerplay_table->usGfxclkDependencyTableOffset));
+	bool is_acg_enabled = false;
+	ATOM_Vega10_GFXCLK_Dependency_Record_V2 *patom_record_v2;
+
+	if (gfxclk_dep_table->ucRevId == 1) {
+		patom_record_v2 =
+			(ATOM_Vega10_GFXCLK_Dependency_Record_V2 *)gfxclk_dep_table->entries;
+		is_acg_enabled =
+			(bool)patom_record_v2[gfxclk_dep_table->ucNumEntries-1].ucACGEnable;
+	}
+
+	if (powerplay_table->ulMaxODEngineClock > VEGA10_ENGINECLOCK_HARDMAX &&
+		!is_acg_enabled)
+		hwmgr->platform_descriptor.overdriveLimit.engineClock =
+			VEGA10_ENGINECLOCK_HARDMAX;
+	else
+		hwmgr->platform_descriptor.overdriveLimit.engineClock =
 			le32_to_cpu(powerplay_table->ulMaxODEngineClock);
 	hwmgr->platform_descriptor.overdriveLimit.memoryClock =
 			le32_to_cpu(powerplay_table->ulMaxODMemoryClock);
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
index 54364444ecd1..0c8212902275 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega12_hwmgr.c
@@ -753,6 +753,22 @@ static int vega12_init_smc_table(struct pp_hwmgr *hwmgr)
 	return 0;
 }
 
+static int vega12_run_acg_btc(struct pp_hwmgr *hwmgr)
+{
+	uint32_t result;
+
+	PP_ASSERT_WITH_CODE(
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_RunAcgBtc) == 0,
+		"[Run_ACG_BTC] Attempt to run ACG BTC failed!",
+		return -EINVAL);
+
+	result = smum_get_argument(hwmgr);
+	PP_ASSERT_WITH_CODE(result == 1,
+			"Failed to run ACG BTC!", return -EINVAL);
+
+	return 0;
+}
+
 static int vega12_set_allowed_featuresmask(struct pp_hwmgr *hwmgr)
 {
 	struct vega12_hwmgr *data =
@@ -931,6 +947,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
 			"Failed to initialize SMC table!",
 			result = tmp_result);
 
+	tmp_result = vega12_run_acg_btc(hwmgr);
+	PP_ASSERT_WITH_CODE(!tmp_result,
+			"Failed to run ACG BTC!",
+			result = tmp_result);
+
 	result = vega12_enable_all_smu_features(hwmgr);
 	PP_ASSERT_WITH_CODE(!result,
 			"Failed to enable all smu features!",
diff --git a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
index 2679d1240fa1..82935a3bd950 100644
--- a/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
+++ b/drivers/gpu/drm/amd/powerplay/hwmgr/vega20_hwmgr.c
@@ -49,6 +49,10 @@
 #include "soc15_common.h"
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
+#include "nbio/nbio_7_4_sh_mask.h"
+
+#define smnPCIE_LC_SPEED_CNTL			0x11140290
+#define smnPCIE_LC_LINK_WIDTH_CNTL		0x11140288
 
 static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 {
@@ -130,7 +134,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
 	data->registry_data.disable_auto_wattman = 1;
 	data->registry_data.auto_wattman_debug = 0;
 	data->registry_data.auto_wattman_sample_period = 100;
-	data->registry_data.fclk_gfxclk_ratio = 0x3F6CCCCD;
+	data->registry_data.fclk_gfxclk_ratio = 0;
 	data->registry_data.auto_wattman_threshold = 50;
 	data->registry_data.gfxoff_controlled_by_driver = 1;
 	data->gfxoff_allowed = false;
@@ -386,9 +390,9 @@ static int vega20_hwmgr_backend_init(struct pp_hwmgr *hwmgr)
 
 	hwmgr->backend = data;
 
-	hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_VIDEO];
-	hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO;
-	hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_VIDEO;
+	hwmgr->workload_mask = 1 << hwmgr->workload_prority[PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT];
+	hwmgr->power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
+	hwmgr->default_power_profile_mode = PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT;
 
 	vega20_set_default_registry_data(hwmgr);
 
@@ -976,6 +980,9 @@ static int vega20_od8_set_feature_capabilities(
 	    pp_table->FanZeroRpmEnable)
 		od_settings->overdrive8_capabilities |= OD8_FAN_ZERO_RPM_CONTROL;
 
+	if (!od_settings->overdrive8_capabilities)
+		hwmgr->od_enabled = false;
+
 	return 0;
 }
 
@@ -1660,14 +1667,15 @@ static uint32_t vega20_find_highest_dpm_level(
 	return i;
 }
 
-static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
+static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr, uint32_t feature_mask)
 {
 	struct vega20_hwmgr *data =
 			(struct vega20_hwmgr *)(hwmgr->backend);
 	uint32_t min_freq;
 	int ret = 0;
 
-	if (data->smu_features[GNLD_DPM_GFXCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_GFXCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_GFXCLK_MASK)) {
 		min_freq = data->dpm_table.gfx_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
@@ -1676,23 +1684,18 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_UCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_UCLK_MASK)) {
 		min_freq = data->dpm_table.mem_table.dpm_state.soft_min_level;
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
 					hwmgr, PPSMC_MSG_SetSoftMinByFreq,
 					(PPCLK_UCLK << 16) | (min_freq & 0xffff))),
 					"Failed to set soft min memclk !",
 					return ret);
-
-		min_freq = data->dpm_table.mem_table.dpm_state.hard_min_level;
-		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
-					hwmgr, PPSMC_MSG_SetHardMinByFreq,
-					(PPCLK_UCLK << 16) | (min_freq & 0xffff))),
-					"Failed to set hard min memclk !",
-					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UVD].enabled) {
+	if (data->smu_features[GNLD_DPM_UVD].enabled &&
+	   (feature_mask & FEATURE_DPM_UVD_MASK)) {
 		min_freq = data->dpm_table.vclk_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1710,7 +1713,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_VCE].enabled) {
+	if (data->smu_features[GNLD_DPM_VCE].enabled &&
+	   (feature_mask & FEATURE_DPM_VCE_MASK)) {
 		min_freq = data->dpm_table.eclk_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1720,7 +1724,8 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_SOCCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_SOCCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_SOCCLK_MASK)) {
 		min_freq = data->dpm_table.soc_table.dpm_state.soft_min_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1733,14 +1738,15 @@ static int vega20_upload_dpm_min_level(struct pp_hwmgr *hwmgr)
 	return ret;
 }
 
-static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
+static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr, uint32_t feature_mask)
 {
 	struct vega20_hwmgr *data =
 			(struct vega20_hwmgr *)(hwmgr->backend);
 	uint32_t max_freq;
 	int ret = 0;
 
-	if (data->smu_features[GNLD_DPM_GFXCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_GFXCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_GFXCLK_MASK)) {
 		max_freq = data->dpm_table.gfx_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1750,7 +1756,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_UCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_UCLK_MASK)) {
 		max_freq = data->dpm_table.mem_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1760,7 +1767,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_UVD].enabled) {
+	if (data->smu_features[GNLD_DPM_UVD].enabled &&
+	   (feature_mask & FEATURE_DPM_UVD_MASK)) {
 		max_freq = data->dpm_table.vclk_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1777,7 +1785,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_VCE].enabled) {
+	if (data->smu_features[GNLD_DPM_VCE].enabled &&
+	   (feature_mask & FEATURE_DPM_VCE_MASK)) {
 		max_freq = data->dpm_table.eclk_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -1787,7 +1796,8 @@ static int vega20_upload_dpm_max_level(struct pp_hwmgr *hwmgr)
 					return ret);
 	}
 
-	if (data->smu_features[GNLD_DPM_SOCCLK].enabled) {
+	if (data->smu_features[GNLD_DPM_SOCCLK].enabled &&
+	   (feature_mask & FEATURE_DPM_SOCCLK_MASK)) {
 		max_freq = data->dpm_table.soc_table.dpm_state.soft_max_level;
 
 		PP_ASSERT_WITH_CODE(!(ret = smum_send_msg_to_smc_with_parameter(
@@ -2126,12 +2136,12 @@ static int vega20_force_dpm_highest(struct pp_hwmgr *hwmgr)
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 		data->dpm_table.mem_table.dpm_levels[soft_level].value;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to highest!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2158,12 +2168,12 @@ static int vega20_force_dpm_lowest(struct pp_hwmgr *hwmgr)
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 		data->dpm_table.mem_table.dpm_levels[soft_level].value;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to highest!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2176,12 +2186,12 @@ static int vega20_unforce_dpm_levels(struct pp_hwmgr *hwmgr)
 {
 	int ret = 0;
 
-	ret = vega20_upload_dpm_min_level(hwmgr);
+	ret = vega20_upload_dpm_min_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload DPM Bootup Levels!",
 			return ret);
 
-	ret = vega20_upload_dpm_max_level(hwmgr);
+	ret = vega20_upload_dpm_max_level(hwmgr, 0xFFFFFFFF);
 	PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload DPM Max Levels!",
 			return ret);
@@ -2234,17 +2244,24 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 		soft_min_level = mask ? (ffs(mask) - 1) : 0;
 		soft_max_level = mask ? (fls(mask) - 1) : 0;
 
+		if (soft_max_level >= data->dpm_table.gfx_table.count) {
+			pr_err("Clock level specified %d is over max allowed %d\n",
+					soft_max_level,
+					data->dpm_table.gfx_table.count - 1);
+			return -EINVAL;
+		}
+
 		data->dpm_table.gfx_table.dpm_state.soft_min_level =
 			data->dpm_table.gfx_table.dpm_levels[soft_min_level].value;
 		data->dpm_table.gfx_table.dpm_state.soft_max_level =
 			data->dpm_table.gfx_table.dpm_levels[soft_max_level].value;
 
-		ret = vega20_upload_dpm_min_level(hwmgr);
+		ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_GFXCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to lowest!",
 			return ret);
 
-		ret = vega20_upload_dpm_max_level(hwmgr);
+		ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_GFXCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2254,17 +2271,24 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 		soft_min_level = mask ? (ffs(mask) - 1) : 0;
 		soft_max_level = mask ? (fls(mask) - 1) : 0;
 
+		if (soft_max_level >= data->dpm_table.mem_table.count) {
+			pr_err("Clock level specified %d is over max allowed %d\n",
+					soft_max_level,
+					data->dpm_table.mem_table.count - 1);
+			return -EINVAL;
+		}
+
 		data->dpm_table.mem_table.dpm_state.soft_min_level =
 			data->dpm_table.mem_table.dpm_levels[soft_min_level].value;
 		data->dpm_table.mem_table.dpm_state.soft_max_level =
 			data->dpm_table.mem_table.dpm_levels[soft_max_level].value;
 
-		ret = vega20_upload_dpm_min_level(hwmgr);
+		ret = vega20_upload_dpm_min_level(hwmgr, FEATURE_DPM_UCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload boot level to lowest!",
 			return ret);
 
-		ret = vega20_upload_dpm_max_level(hwmgr);
+		ret = vega20_upload_dpm_max_level(hwmgr, FEATURE_DPM_UCLK_MASK);
 		PP_ASSERT_WITH_CODE(!ret,
 			"Failed to upload dpm max level to highest!",
 			return ret);
@@ -2272,6 +2296,18 @@ static int vega20_force_clock_level(struct pp_hwmgr *hwmgr,
 		break;
 
 	case PP_PCIE:
+		soft_min_level = mask ? (ffs(mask) - 1) : 0;
+		soft_max_level = mask ? (fls(mask) - 1) : 0;
+		if (soft_min_level >= NUM_LINK_LEVELS ||
+		    soft_max_level >= NUM_LINK_LEVELS)
+			return -EINVAL;
+
+		ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+			PPSMC_MSG_SetMinLinkDpmByIndex, soft_min_level);
+		PP_ASSERT_WITH_CODE(!ret,
+			"Failed to set min link dpm level!",
+			return ret);
+
 		break;
 
 	default:
@@ -2748,9 +2784,14 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 			data->od8_settings.od8_settings_array;
 	OverDriveTable_t *od_table =
 			&(data->smc_state_table.overdrive_table);
+	struct phm_ppt_v3_information *pptable_information =
+		(struct phm_ppt_v3_information *)hwmgr->pptable;
+	PPTable_t *pptable = (PPTable_t *)pptable_information->smc_pptable;
+	struct amdgpu_device *adev = hwmgr->adev;
 	struct pp_clock_levels_with_latency clocks;
 	int i, now, size = 0;
 	int ret = 0;
+	uint32_t gen_speed, lane_width;
 
 	switch (type) {
 	case PP_SCLK:
@@ -2788,6 +2829,28 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
 		break;
 
 	case PP_PCIE:
+		gen_speed = (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
+			     PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
+			    >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+		lane_width = (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
+			      PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+			    >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+		for (i = 0; i < NUM_LINK_LEVELS; i++)
+			size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
+					(pptable->PcieGenSpeed[i] == 0) ? "2.5GT/s," :
+					(pptable->PcieGenSpeed[i] == 1) ? "5.0GT/s," :
+					(pptable->PcieGenSpeed[i] == 2) ? "8.0GT/s," :
+					(pptable->PcieGenSpeed[i] == 3) ? "16.0GT/s," : "",
+					(pptable->PcieLaneCount[i] == 1) ? "x1" :
+					(pptable->PcieLaneCount[i] == 2) ? "x2" :
+					(pptable->PcieLaneCount[i] == 3) ? "x4" :
+					(pptable->PcieLaneCount[i] == 4) ? "x8" :
+					(pptable->PcieLaneCount[i] == 5) ? "x12" :
+					(pptable->PcieLaneCount[i] == 6) ? "x16" : "",
+					pptable->LclkFreq[i],
+					(gen_speed == pptable->PcieGenSpeed[i]) &&
+					(lane_width == pptable->PcieLaneCount[i]) ?
+					"*" : "");
 		break;
 
 	case OD_SCLK:
@@ -3208,6 +3271,9 @@ static int conv_power_profile_to_pplib_workload(int power_profile)
 	int pplib_workload = 0;
 
 	switch (power_profile) {
+	case PP_SMC_POWER_PROFILE_BOOTUP_DEFAULT:
+		pplib_workload = WORKLOAD_DEFAULT_BIT;
+		break;
 	case PP_SMC_POWER_PROFILE_FULLSCREEN3D:
 		pplib_workload = WORKLOAD_PPLIB_FULL_SCREEN_3D_BIT;
 		break;
@@ -3237,6 +3303,7 @@ static int vega20_get_power_profile_mode(struct pp_hwmgr *hwmgr, char *buf)
 	uint32_t i, size = 0;
 	uint16_t workload_type = 0;
 	static const char *profile_name[] = {
+					"BOOTUP_DEFAULT",
 					"3D_FULL_SCREEN",
 					"POWER_SAVING",
 					"VIDEO",
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
index 54fd0125d9cf..f4dab979a3a1 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hardwaremanager.h
@@ -463,5 +463,8 @@ extern int phm_display_clock_voltage_request(struct pp_hwmgr *hwmgr,
 
 extern int phm_get_max_high_clocks(struct pp_hwmgr *hwmgr, struct amd_pp_simple_clock_info *clocks);
 extern int phm_disable_smc_firmware_ctf(struct pp_hwmgr *hwmgr);
+
+extern int phm_set_active_display_count(struct pp_hwmgr *hwmgr, uint32_t count);
+
 #endif /* _HARDWARE_MANAGER_H_ */
 
diff --git a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
index fb0f96f7cdbc..8cb831b6a016 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/hwmgr.h
@@ -309,7 +309,7 @@ struct pp_hwmgr_func {
 	int (*avfs_control)(struct pp_hwmgr *hwmgr, bool enable);
 	int (*disable_smc_firmware_ctf)(struct pp_hwmgr *hwmgr);
 	int (*set_active_display_count)(struct pp_hwmgr *hwmgr, uint32_t count);
-	int (*set_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock);
+	int (*set_min_deep_sleep_dcefclk)(struct pp_hwmgr *hwmgr, uint32_t clock);
 	int (*start_thermal_controller)(struct pp_hwmgr *hwmgr, struct PP_TemperatureRange *range);
 	int (*notify_cac_buffer_info)(struct pp_hwmgr *hwmgr,
 					uint32_t virtual_addr_low,
@@ -332,6 +332,8 @@ struct pp_hwmgr_func {
 	int (*smus_notify_pwe)(struct pp_hwmgr *hwmgr);
 	int (*powergate_sdma)(struct pp_hwmgr *hwmgr, bool bgate);
 	int (*enable_mgpu_fan_boost)(struct pp_hwmgr *hwmgr);
+	int (*set_hard_min_dcefclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
+	int (*set_hard_min_fclk_by_freq)(struct pp_hwmgr *hwmgr, uint32_t clock);
 };
 
 struct pp_table_func {
@@ -703,7 +705,7 @@ enum PP_TABLE_VERSION {
 /**
  * The main hardware manager structure.
  */
-#define Workload_Policy_Max 5
+#define Workload_Policy_Max 6
 
 struct pp_hwmgr {
 	void *adev;
diff --git a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
index 62f36ba2435b..6e19f4c7cf8f 100644
--- a/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
+++ b/drivers/gpu/drm/amd/powerplay/inc/smu7_ppsmc.h
@@ -386,6 +386,8 @@ typedef uint16_t PPSMC_Result;
 #define PPSMC_MSG_AgmResetPsm                 ((uint16_t) 0x403)
 #define PPSMC_MSG_ReadVftCell                 ((uint16_t) 0x404)
 
+#define PPSMC_MSG_ApplyAvfsCksOffVoltage      ((uint16_t) 0x415)
+
 #define PPSMC_MSG_GFX_CU_PG_ENABLE            ((uint16_t) 0x280)
 #define PPSMC_MSG_GFX_CU_PG_DISABLE           ((uint16_t) 0x281)
 #define PPSMC_MSG_GetCurrPkgPwr               ((uint16_t) 0x282)
@@ -395,6 +397,9 @@ typedef uint16_t PPSMC_Result;
 
 #define PPSMC_MSG_SetVBITimeout               ((uint16_t) 0x306)
 
+#define PPSMC_MSG_EnableFFC                   ((uint16_t) 0x307)
+#define PPSMC_MSG_DisableFFC                  ((uint16_t) 0x308)
+
 #define PPSMC_MSG_EnableDpmDidt               ((uint16_t) 0x309)
 #define PPSMC_MSG_DisableDpmDidt              ((uint16_t) 0x30A)
 
diff --git a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
index 2b2c26616902..52abca065764 100644
--- a/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
+++ b/drivers/gpu/drm/amd/powerplay/smumgr/polaris10_smumgr.c
@@ -1528,8 +1528,21 @@ static int polaris10_populate_clock_stretcher_data_table(struct pp_hwmgr *hwmgr)
 	efuse = efuse >> 24;
 
 	if (hwmgr->chip_id == CHIP_POLARIS10) {
-		min = 1000;
-		max = 2300;
+		if (hwmgr->is_kicker) {
+			min = 1200;
+			max = 2500;
+		} else {
+			min = 1000;
+			max = 2300;
+		}
+	} else if (hwmgr->chip_id == CHIP_POLARIS11) {
+		if (hwmgr->is_kicker) {
+			min = 900;
+			max = 2100;
+		} else {
+			min = 1100;
+			max = 2100;
+		}
 	} else {
 		min = 1100;
 		max = 2100;
@@ -1626,6 +1639,7 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 {
 	struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
 	struct polaris10_smumgr *smu_data = (struct polaris10_smumgr *)(hwmgr->smu_backend);
+	struct amdgpu_device *adev = hwmgr->adev;
 
 	SMU74_Discrete_DpmTable  *table = &(smu_data->smc_state_table);
 	int result = 0;
@@ -1646,6 +1660,59 @@ static int polaris10_populate_avfs_parameters(struct pp_hwmgr *hwmgr)
 	result = atomctrl_get_avfs_information(hwmgr, &avfs_params);
 
 	if (0 == result) {
+		if (((adev->pdev->device == 0x67ef) &&
+		     ((adev->pdev->revision == 0xe0) ||
+		      (adev->pdev->revision == 0xe5))) ||
+		    ((adev->pdev->device == 0x67ff) &&
+		     ((adev->pdev->revision == 0xcf) ||
+		      (adev->pdev->revision == 0xef) ||
+		      (adev->pdev->revision == 0xff)))) {
+			avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1;
+			if ((adev->pdev->device == 0x67ef && adev->pdev->revision == 0xe5) ||
+			    (adev->pdev->device == 0x67ff && adev->pdev->revision == 0xef)) {
+				if ((avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0 == 0xEA522DD3) &&
+				    (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1 == 0x5645A) &&
+				    (avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2 == 0x33F9E) &&
+				    (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 == 0xFFFFC5CC) &&
+				    (avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 == 0x1B1A) &&
+				    (avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b == 0xFFFFFCED)) {
+					avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0   = 0xF718F1D4;
+					avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1   = 0x323FD;
+					avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2   = 0x1E455;
+					avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0;
+					avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0;
+					avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b  = 0x23;
+				}
+			}
+		} else if (hwmgr->chip_id == CHIP_POLARIS12 && !hwmgr->is_kicker) {
+			avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0   = 0xF6B024DD;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1   = 0x3005E;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2   = 0x18A5F;
+			avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0x315;
+			avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFED1;
+			avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b  = 0x3B;
+		} else if (((adev->pdev->device == 0x67df) &&
+			    ((adev->pdev->revision == 0xe0) ||
+			     (adev->pdev->revision == 0xe3) ||
+			     (adev->pdev->revision == 0xe4) ||
+			     (adev->pdev->revision == 0xe5) ||
+			     (adev->pdev->revision == 0xe7) ||
+			     (adev->pdev->revision == 0xef))) ||
+			   ((adev->pdev->device == 0x6fdf) &&
+			    ((adev->pdev->revision == 0xef) ||
+			     (adev->pdev->revision == 0xff)))) {
+			avfs_params.ucEnableApplyAVFS_CKS_OFF_Voltage = 1;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a0   = 0xF843B66B;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a1   = 0x59CB5;
+			avfs_params.ulGB_VDROOP_TABLE_CKSOFF_a2   = 0xFFFF287F;
+			avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_m1 = 0;
+			avfs_params.usAVFSGB_FUSE_TABLE_CKSOFF_m2 = 0xFF23;
+			avfs_params.ulAVFSGB_FUSE_TABLE_CKSOFF_b  = 0x58;
+		}
+	}
+
+	if (0 == result) {
 		table->BTCGB_VDROOP_TABLE[0].a0  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a0);
 		table->BTCGB_VDROOP_TABLE[0].a1  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a1);
 		table->BTCGB_VDROOP_TABLE[0].a2  = PP_HOST_TO_SMC_UL(avfs_params.ulGB_VDROOP_TABLE_CKSON_a2);
@@ -1984,6 +2051,12 @@ int polaris10_thermal_avfs_enable(struct pp_hwmgr *hwmgr)
 
 	smum_send_msg_to_smc(hwmgr, PPSMC_MSG_EnableAvfs);
 
+	/* Apply avfs cks-off voltages to avoid the overshoot
+	 * when switching to the highest sclk frequency
+	 */
+	if (data->apply_avfs_cks_off_voltage)
+		smum_send_msg_to_smc(hwmgr, PPSMC_MSG_ApplyAvfsCksOffVoltage);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/armada/armada_gem.c b/drivers/gpu/drm/armada/armada_gem.c
index 892c1d9304bb..642d0e70d0f8 100644
--- a/drivers/gpu/drm/armada/armada_gem.c
+++ b/drivers/gpu/drm/armada/armada_gem.c
@@ -334,7 +334,7 @@ int armada_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 
 	ptr = (char __user *)(uintptr_t)args->ptr;
 
-	if (!access_ok(VERIFY_READ, ptr, args->size))
+	if (!access_ok(ptr, args->size))
 		return -EFAULT;
 
 	ret = fault_in_pages_readable(ptr, args->size);
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index 0cd827e11fa2..de26df0c6044 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -263,6 +263,7 @@ static void ast_fbdev_destroy(struct drm_device *dev,
 {
 	struct ast_framebuffer *afb = &afbdev->afb;
 
+	drm_crtc_force_disable_all(dev);
 	drm_fb_helper_unregister_fbi(&afbdev->helper);
 
 	if (afb->obj) {
diff --git a/drivers/gpu/drm/ast/ast_main.c b/drivers/gpu/drm/ast/ast_main.c
index dac355812adc..373700c05a00 100644
--- a/drivers/gpu/drm/ast/ast_main.c
+++ b/drivers/gpu/drm/ast/ast_main.c
@@ -583,7 +583,8 @@ void ast_driver_unload(struct drm_device *dev)
 	drm_mode_config_cleanup(dev);
 
 	ast_mm_fini(ast);
-	pci_iounmap(dev->pdev, ast->ioregs);
+	if (ast->ioregs != ast->regs + AST_IO_MM_OFFSET)
+		pci_iounmap(dev->pdev, ast->ioregs);
 	pci_iounmap(dev->pdev, ast->regs);
 	kfree(ast);
 }
diff --git a/drivers/gpu/drm/ast/ast_mode.c b/drivers/gpu/drm/ast/ast_mode.c
index 7c6ac3cadb6b..8bb355d5d43d 100644
--- a/drivers/gpu/drm/ast/ast_mode.c
+++ b/drivers/gpu/drm/ast/ast_mode.c
@@ -973,9 +973,21 @@ static int get_clock(void *i2c_priv)
 {
 	struct ast_i2c_chan *i2c = i2c_priv;
 	struct ast_private *ast = i2c->dev->dev_private;
-	uint32_t val;
+	uint32_t val, val2, count, pass;
+
+	count = 0;
+	pass = 0;
+	val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+	do {
+		val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+		if (val == val2) {
+			pass++;
+		} else {
+			pass = 0;
+			val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4) & 0x01;
+		}
+	} while ((pass < 5) && (count++ < 0x10000));
 
-	val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x10) >> 4;
 	return val & 1 ? 1 : 0;
 }
 
@@ -983,9 +995,21 @@ static int get_data(void *i2c_priv)
 {
 	struct ast_i2c_chan *i2c = i2c_priv;
 	struct ast_private *ast = i2c->dev->dev_private;
-	uint32_t val;
+	uint32_t val, val2, count, pass;
+
+	count = 0;
+	pass = 0;
+	val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+	do {
+		val2 = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+		if (val == val2) {
+			pass++;
+		} else {
+			pass = 0;
+			val = (ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5) & 0x01;
+		}
+	} while ((pass < 5) && (count++ < 0x10000));
 
-	val = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x20) >> 5;
 	return val & 1 ? 1 : 0;
 }
 
@@ -998,7 +1022,7 @@ static void set_clock(void *i2c_priv, int clock)
 
 	for (i = 0; i < 0x10000; i++) {
 		ujcrb7 = ((clock & 0x01) ? 0 : 1);
-		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfe, ujcrb7);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf4, ujcrb7);
 		jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x01);
 		if (ujcrb7 == jtemp)
 			break;
@@ -1014,7 +1038,7 @@ static void set_data(void *i2c_priv, int data)
 
 	for (i = 0; i < 0x10000; i++) {
 		ujcrb7 = ((data & 0x01) ? 0 : 1) << 2;
-		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xfb, ujcrb7);
+		ast_set_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0xf1, ujcrb7);
 		jtemp = ast_get_index_reg_mask(ast, AST_IO_CRTC_PORT, 0xb7, 0x04);
 		if (ujcrb7 == jtemp)
 			break;
diff --git a/drivers/gpu/drm/bridge/tc358767.c b/drivers/gpu/drm/bridge/tc358767.c
index 8e28e738cb52..e6403b9549f1 100644
--- a/drivers/gpu/drm/bridge/tc358767.c
+++ b/drivers/gpu/drm/bridge/tc358767.c
@@ -98,6 +98,8 @@
 #define DP0_STARTVAL		0x064c
 #define DP0_ACTIVEVAL		0x0650
 #define DP0_SYNCVAL		0x0654
+#define SYNCVAL_HS_POL_ACTIVE_LOW	(1 << 15)
+#define SYNCVAL_VS_POL_ACTIVE_LOW	(1 << 31)
 #define DP0_MISC		0x0658
 #define TU_SIZE_RECOMMENDED		(63) /* LSCLK cycles per TU */
 #define BPC_6				(0 << 5)
@@ -142,6 +144,8 @@
 #define DP0_LTLOOPCTRL		0x06d8
 #define DP0_SNKLTCTRL		0x06e4
 
+#define DP1_SRCCTRL		0x07a0
+
 /* PHY */
 #define DP_PHY_CTRL		0x0800
 #define DP_PHY_RST			BIT(28)  /* DP PHY Global Soft Reset */
@@ -150,6 +154,7 @@
 #define PHY_M1_RST			BIT(12)  /* Reset PHY1 Main Channel */
 #define PHY_RDY				BIT(16)  /* PHY Main Channels Ready */
 #define PHY_M0_RST			BIT(8)   /* Reset PHY0 Main Channel */
+#define PHY_2LANE			BIT(2)   /* PHY Enable 2 lanes */
 #define PHY_A0_EN			BIT(1)   /* PHY Aux Channel0 Enable */
 #define PHY_M0_EN			BIT(0)   /* PHY Main Channel0 Enable */
 
@@ -540,6 +545,7 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	unsigned long rate;
 	u32 value;
 	int ret;
+	u32 dp_phy_ctrl;
 
 	rate = clk_get_rate(tc->refclk);
 	switch (rate) {
@@ -564,7 +570,10 @@ static int tc_aux_link_setup(struct tc_data *tc)
 	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
 	tc_write(SYS_PLLPARAM, value);
 
-	tc_write(DP_PHY_CTRL, BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN);
+	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN;
+	if (tc->link.base.num_lanes == 2)
+		dp_phy_ctrl |= PHY_2LANE;
+	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
 
 	/*
 	 * Initially PLLs are in bypass. Force PLL parameter update,
@@ -719,7 +728,9 @@ static int tc_set_video_mode(struct tc_data *tc, struct drm_display_mode *mode)
 
 	tc_write(DP0_ACTIVEVAL, (mode->vdisplay << 16) | (mode->hdisplay));
 
-	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0));
+	tc_write(DP0_SYNCVAL, (vsync_len << 16) | (hsync_len << 0) |
+		 ((mode->flags & DRM_MODE_FLAG_NHSYNC) ? SYNCVAL_HS_POL_ACTIVE_LOW : 0) |
+		 ((mode->flags & DRM_MODE_FLAG_NVSYNC) ? SYNCVAL_VS_POL_ACTIVE_LOW : 0));
 
 	tc_write(DPIPXLFMT, VS_POL_ACTIVE_LOW | HS_POL_ACTIVE_LOW |
 		 DE_POL_ACTIVE_HIGH | SUB_CFG_TYPE_CONFIG1 | DPI_BPP_RGB888);
@@ -829,12 +840,11 @@ static int tc_main_link_setup(struct tc_data *tc)
 	if (!tc->mode)
 		return -EINVAL;
 
-	/* from excel file - DP0_SrcCtrl */
-	tc_write(DP0_SRCCTRL, DP0_SRCCTRL_SCRMBLDIS | DP0_SRCCTRL_EN810B |
-		 DP0_SRCCTRL_LANESKEW | DP0_SRCCTRL_LANES_2 |
-		 DP0_SRCCTRL_BW27 | DP0_SRCCTRL_AUTOCORRECT);
-	/* from excel file - DP1_SrcCtrl */
-	tc_write(0x07a0, 0x00003083);
+	tc_write(DP0_SRCCTRL, tc_srcctrl(tc));
+	/* SSCG and BW27 on DP1 must be set to the same as on DP0 */
+	tc_write(DP1_SRCCTRL,
+		 (tc->link.spread ? DP0_SRCCTRL_SSCG : 0) |
+		 ((tc->link.base.rate != 162000) ? DP0_SRCCTRL_BW27 : 0));
 
 	rate = clk_get_rate(tc->refclk);
 	switch (rate) {
@@ -855,8 +865,11 @@ static int tc_main_link_setup(struct tc_data *tc)
 	}
 	value |= SYSCLK_SEL_LSCLK | LSCLK_DIV_2;
 	tc_write(SYS_PLLPARAM, value);
+
 	/* Setup Main Link */
-	dp_phy_ctrl = BGREN | PWR_SW_EN | BIT(2) | PHY_A0_EN |  PHY_M0_EN;
+	dp_phy_ctrl = BGREN | PWR_SW_EN | PHY_A0_EN | PHY_M0_EN;
+	if (tc->link.base.num_lanes == 2)
+		dp_phy_ctrl |= PHY_2LANE;
 	tc_write(DP_PHY_CTRL, dp_phy_ctrl);
 	msleep(100);
 
@@ -1105,10 +1118,20 @@ static bool tc_bridge_mode_fixup(struct drm_bridge *bridge,
 static enum drm_mode_status tc_connector_mode_valid(struct drm_connector *connector,
 				   struct drm_display_mode *mode)
 {
+	struct tc_data *tc = connector_to_tc(connector);
+	u32 req, avail;
+	u32 bits_per_pixel = 24;
+
 	/* DPI interface clock limitation: upto 154 MHz */
 	if (mode->clock > 154000)
 		return MODE_CLOCK_HIGH;
 
+	req = mode->clock * bits_per_pixel / 8;
+	avail = tc->link.base.num_lanes * tc->link.base.rate;
+
+	if (req > avail)
+		return MODE_BAD;
+
 	return MODE_OK;
 }
 
@@ -1186,7 +1209,8 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 	/* Create eDP connector */
 	drm_connector_helper_add(&tc->connector, &tc_connector_helper_funcs);
 	ret = drm_connector_init(drm, &tc->connector, &tc_connector_funcs,
-				 DRM_MODE_CONNECTOR_eDP);
+				 tc->panel ? DRM_MODE_CONNECTOR_eDP :
+				 DRM_MODE_CONNECTOR_DisplayPort);
 	if (ret)
 		return ret;
 
@@ -1195,6 +1219,10 @@ static int tc_bridge_attach(struct drm_bridge *bridge)
 
 	drm_display_info_set_bus_formats(&tc->connector.display_info,
 					 &bus_format, 1);
+	tc->connector.display_info.bus_flags =
+		DRM_BUS_FLAG_DE_HIGH |
+		DRM_BUS_FLAG_PIXDATA_NEGEDGE |
+		DRM_BUS_FLAG_SYNC_NEGEDGE;
 	drm_connector_attach_encoder(&tc->connector, tc->bridge.encoder);
 
 	return 0;
diff --git a/drivers/gpu/drm/bridge/ti-sn65dsi86.c b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
index 680566d97adc..10243965ee7c 100644
--- a/drivers/gpu/drm/bridge/ti-sn65dsi86.c
+++ b/drivers/gpu/drm/bridge/ti-sn65dsi86.c
@@ -54,7 +54,7 @@
 #define SN_AUX_ADDR_7_0_REG			0x76
 #define SN_AUX_LENGTH_REG			0x77
 #define SN_AUX_CMD_REG				0x78
-#define  AUX_CMD_SEND				BIT(1)
+#define  AUX_CMD_SEND				BIT(0)
 #define  AUX_CMD_REQ(x)				((x) << 4)
 #define SN_AUX_RDATA_REG(x)			(0x79 + (x))
 #define SN_SSC_CONFIG_REG			0x93
diff --git a/drivers/gpu/drm/drm_atomic_state_helper.c b/drivers/gpu/drm/drm_atomic_state_helper.c
index 60bd7d708e35..4985384e51f6 100644
--- a/drivers/gpu/drm/drm_atomic_state_helper.c
+++ b/drivers/gpu/drm/drm_atomic_state_helper.c
@@ -241,6 +241,7 @@ void __drm_atomic_helper_plane_duplicate_state(struct drm_plane *plane,
 
 	state->fence = NULL;
 	state->commit = NULL;
+	state->fb_damage_clips = NULL;
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_duplicate_state);
 
@@ -285,6 +286,8 @@ void __drm_atomic_helper_plane_destroy_state(struct drm_plane_state *state)
 
 	if (state->commit)
 		drm_crtc_commit_put(state->commit);
+
+	drm_property_blob_put(state->fb_damage_clips);
 }
 EXPORT_SYMBOL(__drm_atomic_helper_plane_destroy_state);
 
diff --git a/drivers/gpu/drm/drm_atomic_uapi.c b/drivers/gpu/drm/drm_atomic_uapi.c
index c40889888a16..9a1f41adfc67 100644
--- a/drivers/gpu/drm/drm_atomic_uapi.c
+++ b/drivers/gpu/drm/drm_atomic_uapi.c
@@ -1296,12 +1296,11 @@ int drm_mode_atomic_ioctl(struct drm_device *dev,
 			(arg->flags & DRM_MODE_PAGE_FLIP_EVENT))
 		return -EINVAL;
 
-	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
-
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
 		return -ENOMEM;
 
+	drm_modeset_acquire_init(&ctx, DRM_MODESET_ACQUIRE_INTERRUPTIBLE);
 	state->acquire_ctx = &ctx;
 	state->allow_modeset = !!(arg->flags & DRM_MODE_ATOMIC_ALLOW_MODESET);
 
diff --git a/drivers/gpu/drm/drm_auth.c b/drivers/gpu/drm/drm_auth.c
index d9c0f7573905..1669c42c40ed 100644
--- a/drivers/gpu/drm/drm_auth.c
+++ b/drivers/gpu/drm/drm_auth.c
@@ -142,6 +142,7 @@ static int drm_new_set_master(struct drm_device *dev, struct drm_file *fpriv)
 
 	lockdep_assert_held_once(&dev->master_mutex);
 
+	WARN_ON(fpriv->is_master);
 	old_master = fpriv->master;
 	fpriv->master = drm_master_create(dev);
 	if (!fpriv->master) {
@@ -170,6 +171,7 @@ out_err:
 	/* drop references and restore old master on failure */
 	drm_master_put(&fpriv->master);
 	fpriv->master = old_master;
+	fpriv->is_master = 0;
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/drm_damage_helper.c b/drivers/gpu/drm/drm_damage_helper.c
index 05c8e7267165..31032407254d 100644
--- a/drivers/gpu/drm/drm_damage_helper.c
+++ b/drivers/gpu/drm/drm_damage_helper.c
@@ -178,7 +178,7 @@ int drm_atomic_helper_dirtyfb(struct drm_framebuffer *fb,
 	state = drm_atomic_state_alloc(fb->dev);
 	if (!state) {
 		ret = -ENOMEM;
-		goto out;
+		goto out_drop_locks;
 	}
 	state->acquire_ctx = &ctx;
 
@@ -238,6 +238,7 @@ out:
 	kfree(rects);
 	drm_atomic_state_put(state);
 
+out_drop_locks:
 	drm_modeset_drop_locks(&ctx);
 	drm_modeset_acquire_fini(&ctx);
 
@@ -250,7 +251,7 @@ EXPORT_SYMBOL(drm_atomic_helper_dirtyfb);
  * drm_atomic_helper_damage_iter_init - Initialize the damage iterator.
  * @iter: The iterator to initialize.
  * @old_state: Old plane state for validation.
- * @new_state: Plane state from which to iterate the damage clips.
+ * @state: Plane state from which to iterate the damage clips.
  *
  * Initialize an iterator, which clips plane damage
  * &drm_plane_state.fb_damage_clips to plane &drm_plane_state.src. This iterator
diff --git a/drivers/gpu/drm/drm_dp_helper.c b/drivers/gpu/drm/drm_dp_helper.c
index 2d6c491a0542..516e82d0ed50 100644
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@@ -1273,6 +1273,8 @@ static const struct dpcd_quirk dpcd_quirk_list[] = {
 	{ OUI(0x00, 0x22, 0xb9), DEVICE_ID_ANY, true, BIT(DP_DPCD_QUIRK_CONSTANT_N) },
 	/* LG LP140WF6-SPM1 eDP panel */
 	{ OUI(0x00, 0x22, 0xb9), DEVICE_ID('s', 'i', 'v', 'a', 'r', 'T'), false, BIT(DP_DPCD_QUIRK_CONSTANT_N) },
+	/* Apple panels need some additional handling to support PSR */
+	{ OUI(0x00, 0x10, 0xfa), DEVICE_ID_ANY, false, BIT(DP_DPCD_QUIRK_NO_PSR) }
 };
 
 #undef OUI
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 5e9ca6f96379..d73703a695e8 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -71,7 +71,7 @@ MODULE_PARM_DESC(drm_fbdev_overalloc,
 #if IS_ENABLED(CONFIG_DRM_FBDEV_LEAK_PHYS_SMEM)
 static bool drm_leak_fbdev_smem = false;
 module_param_unsafe(drm_leak_fbdev_smem, bool, 0600);
-MODULE_PARM_DESC(fbdev_emulation,
+MODULE_PARM_DESC(drm_leak_fbdev_smem,
 		 "Allow unsafe leaking fbdev physical smem address [default=false]");
 #endif
 
@@ -1621,6 +1621,64 @@ static bool drm_fb_pixel_format_equal(const struct fb_var_screeninfo *var_1,
 	       var_1->transp.msb_right == var_2->transp.msb_right;
 }
 
+static void drm_fb_helper_fill_pixel_fmt(struct fb_var_screeninfo *var,
+					 u8 depth)
+{
+	switch (depth) {
+	case 8:
+		var->red.offset = 0;
+		var->green.offset = 0;
+		var->blue.offset = 0;
+		var->red.length = 8; /* 8bit DAC */
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 15:
+		var->red.offset = 10;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 5;
+		var->blue.length = 5;
+		var->transp.offset = 15;
+		var->transp.length = 1;
+		break;
+	case 16:
+		var->red.offset = 11;
+		var->green.offset = 5;
+		var->blue.offset = 0;
+		var->red.length = 5;
+		var->green.length = 6;
+		var->blue.length = 5;
+		var->transp.offset = 0;
+		break;
+	case 24:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 0;
+		var->transp.length = 0;
+		break;
+	case 32:
+		var->red.offset = 16;
+		var->green.offset = 8;
+		var->blue.offset = 0;
+		var->red.length = 8;
+		var->green.length = 8;
+		var->blue.length = 8;
+		var->transp.offset = 24;
+		var->transp.length = 8;
+		break;
+	default:
+		break;
+	}
+}
+
 /**
  * drm_fb_helper_check_var - implementation for &fb_ops.fb_check_var
  * @var: screeninfo to check
@@ -1632,9 +1690,14 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_framebuffer *fb = fb_helper->fb;
 
-	if (var->pixclock != 0 || in_dbg_master())
+	if (in_dbg_master())
 		return -EINVAL;
 
+	if (var->pixclock != 0) {
+		DRM_DEBUG("fbdev emulation doesn't support changing the pixel clock, value of pixclock is ignored\n");
+		var->pixclock = 0;
+	}
+
 	if ((drm_format_info_block_width(fb->format, 0) > 1) ||
 	    (drm_format_info_block_height(fb->format, 0) > 1))
 		return -EINVAL;
@@ -1655,6 +1718,20 @@ int drm_fb_helper_check_var(struct fb_var_screeninfo *var,
 	}
 
 	/*
+	 * Workaround for SDL 1.2, which is known to be setting all pixel format
+	 * fields values to zero in some cases. We treat this situation as a
+	 * kind of "use some reasonable autodetected values".
+	 */
+	if (!var->red.offset     && !var->green.offset    &&
+	    !var->blue.offset    && !var->transp.offset   &&
+	    !var->red.length     && !var->green.length    &&
+	    !var->blue.length    && !var->transp.length   &&
+	    !var->red.msb_right  && !var->green.msb_right &&
+	    !var->blue.msb_right && !var->transp.msb_right) {
+		drm_fb_helper_fill_pixel_fmt(var, fb->format->depth);
+	}
+
+	/*
 	 * drm fbdev emulation doesn't support changing the pixel format at all,
 	 * so reject all pixel format changing requests.
 	 */
@@ -1967,59 +2044,7 @@ void drm_fb_helper_fill_var(struct fb_info *info, struct drm_fb_helper *fb_helpe
 	info->var.yoffset = 0;
 	info->var.activate = FB_ACTIVATE_NOW;
 
-	switch (fb->format->depth) {
-	case 8:
-		info->var.red.offset = 0;
-		info->var.green.offset = 0;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8; /* 8bit DAC */
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 15:
-		info->var.red.offset = 10;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 5;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 15;
-		info->var.transp.length = 1;
-		break;
-	case 16:
-		info->var.red.offset = 11;
-		info->var.green.offset = 5;
-		info->var.blue.offset = 0;
-		info->var.red.length = 5;
-		info->var.green.length = 6;
-		info->var.blue.length = 5;
-		info->var.transp.offset = 0;
-		break;
-	case 24:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 0;
-		info->var.transp.length = 0;
-		break;
-	case 32:
-		info->var.red.offset = 16;
-		info->var.green.offset = 8;
-		info->var.blue.offset = 0;
-		info->var.red.length = 8;
-		info->var.green.length = 8;
-		info->var.blue.length = 8;
-		info->var.transp.offset = 24;
-		info->var.transp.length = 8;
-		break;
-	default:
-		break;
-	}
+	drm_fb_helper_fill_pixel_fmt(&info->var, fb->format->depth);
 
 	info->var.xres = fb_width;
 	info->var.yres = fb_height;
diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c
index ffa8dc35515f..46f48f245eb5 100644
--- a/drivers/gpu/drm/drm_file.c
+++ b/drivers/gpu/drm/drm_file.c
@@ -525,7 +525,7 @@ ssize_t drm_read(struct file *filp, char __user *buffer,
 	struct drm_device *dev = file_priv->minor->dev;
 	ssize_t ret;
 
-	if (!access_ok(VERIFY_WRITE, buffer, count))
+	if (!access_ok(buffer, count))
 		return -EFAULT;
 
 	ret = mutex_lock_interruptible(&file_priv->event_read_lock);
diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h
index c7a7d7ce5d1c..d9caf205e0b3 100644
--- a/drivers/gpu/drm/drm_internal.h
+++ b/drivers/gpu/drm/drm_internal.h
@@ -99,6 +99,8 @@ struct device *drm_sysfs_minor_alloc(struct drm_minor *minor);
 int drm_sysfs_connector_add(struct drm_connector *connector);
 void drm_sysfs_connector_remove(struct drm_connector *connector);
 
+void drm_sysfs_lease_event(struct drm_device *dev);
+
 /* drm_gem.c */
 int drm_gem_init(struct drm_device *dev);
 void drm_gem_destroy(struct drm_device *dev);
diff --git a/drivers/gpu/drm/drm_ioctl.c b/drivers/gpu/drm/drm_ioctl.c
index 94bd872d56c4..7e6746b2d704 100644
--- a/drivers/gpu/drm/drm_ioctl.c
+++ b/drivers/gpu/drm/drm_ioctl.c
@@ -37,6 +37,7 @@
 
 #include <linux/pci.h>
 #include <linux/export.h>
+#include <linux/nospec.h>
 
 /**
  * DOC: getunique and setversion story
@@ -800,13 +801,17 @@ long drm_ioctl(struct file *filp,
 
 	if (is_driver_ioctl) {
 		/* driver ioctl */
-		if (nr - DRM_COMMAND_BASE >= dev->driver->num_ioctls)
+		unsigned int index = nr - DRM_COMMAND_BASE;
+
+		if (index >= dev->driver->num_ioctls)
 			goto err_i1;
-		ioctl = &dev->driver->ioctls[nr - DRM_COMMAND_BASE];
+		index = array_index_nospec(index, dev->driver->num_ioctls);
+		ioctl = &dev->driver->ioctls[index];
 	} else {
 		/* core ioctl */
 		if (nr >= DRM_CORE_IOCTL_COUNT)
 			goto err_i1;
+		nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT);
 		ioctl = &drm_ioctls[nr];
 	}
 
@@ -888,6 +893,7 @@ bool drm_ioctl_flags(unsigned int nr, unsigned int *flags)
 
 	if (nr >= DRM_CORE_IOCTL_COUNT)
 		return false;
+	nr = array_index_nospec(nr, DRM_CORE_IOCTL_COUNT);
 
 	*flags = drm_ioctls[nr].flags;
 	return true;
diff --git a/drivers/gpu/drm/drm_lease.c b/drivers/gpu/drm/drm_lease.c
index 3650d3c46718..99cba8ea5d82 100644
--- a/drivers/gpu/drm/drm_lease.c
+++ b/drivers/gpu/drm/drm_lease.c
@@ -292,7 +292,7 @@ void drm_lease_destroy(struct drm_master *master)
 
 	if (master->lessor) {
 		/* Tell the master to check the lessee list */
-		drm_sysfs_hotplug_event(dev);
+		drm_sysfs_lease_event(dev);
 		drm_master_put(&master->lessor);
 	}
 
diff --git a/drivers/gpu/drm/drm_mode_object.c b/drivers/gpu/drm/drm_mode_object.c
index cd9bc0ce9be0..004191d01772 100644
--- a/drivers/gpu/drm/drm_mode_object.c
+++ b/drivers/gpu/drm/drm_mode_object.c
@@ -459,11 +459,11 @@ static int set_property_atomic(struct drm_mode_object *obj,
 	struct drm_modeset_acquire_ctx ctx;
 	int ret;
 
-	drm_modeset_acquire_init(&ctx, 0);
-
 	state = drm_atomic_state_alloc(dev);
 	if (!state)
 		return -ENOMEM;
+
+	drm_modeset_acquire_init(&ctx, 0);
 	state->acquire_ctx = &ctx;
 retry:
 	if (prop == state->dev->mode_config.dpms_property) {
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index a9d9df6c85ad..693748ad8b88 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -61,8 +61,9 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 		return NULL;
 
 	dmah->size = size;
-	dmah->vaddr = dma_zalloc_coherent(&dev->pdev->dev, size, &dmah->busaddr,
-						GFP_KERNEL | __GFP_COMP);
+	dmah->vaddr = dma_alloc_coherent(&dev->pdev->dev, size,
+					 &dmah->busaddr,
+					 GFP_KERNEL | __GFP_COMP);
 
 	if (dmah->vaddr == NULL) {
 		kfree(dmah);
diff --git a/drivers/gpu/drm/drm_sysfs.c b/drivers/gpu/drm/drm_sysfs.c
index b3c1daad1169..ecb7b33002bb 100644
--- a/drivers/gpu/drm/drm_sysfs.c
+++ b/drivers/gpu/drm/drm_sysfs.c
@@ -301,6 +301,16 @@ void drm_sysfs_connector_remove(struct drm_connector *connector)
 	connector->kdev = NULL;
 }
 
+void drm_sysfs_lease_event(struct drm_device *dev)
+{
+	char *event_string = "LEASE=1";
+	char *envp[] = { event_string, NULL };
+
+	DRM_DEBUG("generating lease event\n");
+
+	kobject_uevent_env(&dev->primary->kdev->kobj, KOBJ_CHANGE, envp);
+}
+
 /**
  * drm_sysfs_hotplug_event - generate a DRM uevent
  * @dev: DRM device
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
index 7fea74861a87..160ce3c060a5 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@@ -439,6 +439,4 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 
 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);
-
-	gpu->lastctx = cmdbuf->ctx;
 }
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.c b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
index 52802e6049e0..18c27f795cf6 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@@ -72,14 +72,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
 
-		if (gpu) {
-			mutex_lock(&gpu->lock);
-			if (gpu->lastctx == ctx)
-				gpu->lastctx = NULL;
-			mutex_unlock(&gpu->lock);
-
+		if (gpu)
 			drm_sched_entity_destroy(&ctx->sched_entity[i]);
-		}
 	}
 
 	kfree(ctx);
@@ -345,7 +339,6 @@ static int etnaviv_ioctl_gem_userptr(struct drm_device *dev, void *data,
 	struct drm_file *file)
 {
 	struct drm_etnaviv_gem_userptr *args = data;
-	int access;
 
 	if (args->flags & ~(ETNA_USERPTR_READ|ETNA_USERPTR_WRITE) ||
 	    args->flags == 0)
@@ -357,12 +350,7 @@ static int etnaviv_ioctl_gem_userptr(struct drm_device *dev, void *data,
 	    args->user_ptr & ~PAGE_MASK)
 		return -EINVAL;
 
-	if (args->flags & ETNA_USERPTR_WRITE)
-		access = VERIFY_WRITE;
-	else
-		access = VERIFY_READ;
-
-	if (!access_ok(access, (void __user *)(unsigned long)args->user_ptr,
+	if (!access_ok((void __user *)(unsigned long)args->user_ptr,
 		       args->user_size))
 		return -EFAULT;
 
@@ -523,7 +511,7 @@ static int etnaviv_bind(struct device *dev)
 	if (!priv) {
 		dev_err(dev, "failed to allocate private data\n");
 		ret = -ENOMEM;
-		goto out_unref;
+		goto out_put;
 	}
 	drm->dev_private = priv;
 
@@ -549,7 +537,7 @@ out_register:
 	component_unbind_all(dev, drm);
 out_bind:
 	kfree(priv);
-out_unref:
+out_put:
 	drm_dev_put(drm);
 
 	return ret;
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_drv.h b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
index 8d02d1b7dcf5..4bf698de5996 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@@ -107,17 +107,6 @@ static inline size_t size_vstruct(size_t nelem, size_t elem_size, size_t base)
 	return base + nelem * elem_size;
 }
 
-/* returns true if fence a comes after fence b */
-static inline bool fence_after(u32 a, u32 b)
-{
-	return (s32)(a - b) > 0;
-}
-
-static inline bool fence_after_eq(u32 a, u32 b)
-{
-	return (s32)(a - b) >= 0;
-}
-
 /*
  * Etnaviv timeouts are specified wrt CLOCK_MONOTONIC, not jiffies.
  * We need to calculate the timeout in terms of number of jiffies
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_dump.c b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
index 9146e30e24a6..3fbb4855396c 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@@ -118,6 +118,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
+	unsigned long flags;
 
 	/* Only catch the first event, or when manually re-armed */
 	if (!etnaviv_dump_core)
@@ -134,13 +135,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		    mmu_size + gpu->buffer.size;
 
 	/* Add in the active command buffers */
-	spin_lock(&gpu->sched.job_list_lock);
+	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		file_size += submit->cmdbuf.size;
 		n_obj++;
 	}
-	spin_unlock(&gpu->sched.job_list_lock);
+	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Add in the active buffer objects */
 	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
@@ -182,14 +183,14 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 			      gpu->buffer.size,
 			      etnaviv_cmdbuf_get_va(&gpu->buffer));
 
-	spin_lock(&gpu->sched.job_list_lock);
+	spin_lock_irqsave(&gpu->sched.job_list_lock, flags);
 	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
 		submit = to_etnaviv_submit(s_job);
 		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
 				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
 				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
 	}
-	spin_unlock(&gpu->sched.job_list_lock);
+	spin_unlock_irqrestore(&gpu->sched.job_list_lock, flags);
 
 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
index f225fbc6edd2..6904535475de 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@@ -3,10 +3,12 @@
  * Copyright (C) 2015-2018 Etnaviv Project
  */
 
+#include <linux/clk.h>
 #include <linux/component.h>
 #include <linux/dma-fence.h>
 #include <linux/moduleparam.h>
 #include <linux/of_device.h>
+#include <linux/regulator/consumer.h>
 #include <linux/thermal.h>
 
 #include "etnaviv_cmdbuf.h"
@@ -976,7 +978,6 @@ int etnaviv_gpu_debugfs(struct etnaviv_gpu *gpu, struct seq_file *m)
 
 void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 {
-	unsigned long flags;
 	unsigned int i = 0;
 
 	dev_err(gpu->dev, "recover hung GPU!\n");
@@ -989,15 +990,13 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)
 	etnaviv_hw_reset(gpu);
 
 	/* complete all events, the GPU won't do it after the reset */
-	spin_lock_irqsave(&gpu->event_spinlock, flags);
+	spin_lock(&gpu->event_spinlock);
 	for_each_set_bit_from(i, gpu->event_bitmap, ETNA_NR_EVENTS)
 		complete(&gpu->event_free);
 	bitmap_zero(gpu->event_bitmap, ETNA_NR_EVENTS);
-	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
-	gpu->completed_fence = gpu->active_fence;
+	spin_unlock(&gpu->event_spinlock);
 
 	etnaviv_gpu_hw_init(gpu);
-	gpu->lastctx = NULL;
 	gpu->exec_state = -1;
 
 	mutex_unlock(&gpu->lock);
@@ -1032,7 +1031,7 @@ static bool etnaviv_fence_signaled(struct dma_fence *fence)
 {
 	struct etnaviv_fence *f = to_etnaviv_fence(fence);
 
-	return fence_completed(f->gpu, f->base.seqno);
+	return (s32)(f->gpu->completed_fence - f->base.seqno) >= 0;
 }
 
 static void etnaviv_fence_release(struct dma_fence *fence)
@@ -1071,6 +1070,12 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 	return &f->base;
 }
 
+/* returns true if fence a comes after fence b */
+static inline bool fence_after(u32 a, u32 b)
+{
+	return (s32)(a - b) > 0;
+}
+
 /*
  * event management:
  */
@@ -1078,7 +1083,7 @@ static struct dma_fence *etnaviv_gpu_fence_alloc(struct etnaviv_gpu *gpu)
 static int event_alloc(struct etnaviv_gpu *gpu, unsigned nr_events,
 	unsigned int *events)
 {
-	unsigned long flags, timeout = msecs_to_jiffies(10 * 10000);
+	unsigned long timeout = msecs_to_jiffies(10 * 10000);
 	unsigned i, acquired = 0;
 
 	for (i = 0; i < nr_events; i++) {
@@ -1095,7 +1100,7 @@ static int event_alloc(struct etnaviv_gpu *gpu, unsigned nr_events,
 		timeout = ret;
 	}
 
-	spin_lock_irqsave(&gpu->event_spinlock, flags);
+	spin_lock(&gpu->event_spinlock);
 
 	for (i = 0; i < nr_events; i++) {
 		int event = find_first_zero_bit(gpu->event_bitmap, ETNA_NR_EVENTS);
@@ -1105,7 +1110,7 @@ static int event_alloc(struct etnaviv_gpu *gpu, unsigned nr_events,
 		set_bit(event, gpu->event_bitmap);
 	}
 
-	spin_unlock_irqrestore(&gpu->event_spinlock, flags);
+	spin_unlock(&gpu->event_spinlock);
 
 	return 0;
 
@@ -1118,18 +1123,11 @@ out:
 
 static void event_free(struct etnaviv_gpu *gpu, unsigned int event)
 {
-	unsigned long flags;
-
-	spin_lock_irqsave(&gpu->event_spinlock, flags);
-
 	if (!test_bit(event, gpu->event_bitmap)) {
 		dev_warn(gpu->dev, "event %u is already marked as free",
 			 event);
-		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
 	} else {
 		clear_bit(event, gpu->event_bitmap);
-		spin_unlock_irqrestore(&gpu->event_spinlock, flags);
-
 		complete(&gpu->event_free);
 	}
 }
@@ -1306,8 +1304,6 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 		goto out_unlock;
 	}
 
-	gpu->active_fence = gpu_fence->seqno;
-
 	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
 		kref_get(&submit->refcount);
@@ -1549,7 +1545,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);
 
-	gpu->lastctx = NULL;
 	gpu->exec_state = -1;
 
 	mutex_unlock(&gpu->lock);
@@ -1806,8 +1801,8 @@ static int etnaviv_gpu_rpm_suspend(struct device *dev)
 	struct etnaviv_gpu *gpu = dev_get_drvdata(dev);
 	u32 idle, mask;
 
-	/* If we have outstanding fences, we're not idle */
-	if (gpu->completed_fence != gpu->active_fence)
+	/* If there are any jobs in the HW queue, we're not idle */
+	if (atomic_read(&gpu->sched.hw_rq_count))
 		return -EBUSY;
 
 	/* Check whether the hardware (except FE) is idle */
diff --git a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
index 9a75a6937268..9bcf151f706b 100644
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@@ -6,9 +6,6 @@
 #ifndef __ETNAVIV_GPU_H__
 #define __ETNAVIV_GPU_H__
 
-#include <linux/clk.h>
-#include <linux/regulator/consumer.h>
-
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 
@@ -88,6 +85,8 @@ struct etnaviv_event {
 
 struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_cmdbuf;
+struct regulator;
+struct clk;
 
 #define ETNA_NR_EVENTS 30
 
@@ -98,7 +97,6 @@ struct etnaviv_gpu {
 	struct mutex lock;
 	struct etnaviv_chip_identity identity;
 	enum etnaviv_sec_mode sec_mode;
-	struct etnaviv_file_private *lastctx;
 	struct workqueue_struct *wq;
 	struct drm_gpu_scheduler sched;
 
@@ -121,7 +119,6 @@ struct etnaviv_gpu {
 	struct mutex fence_lock;
 	struct idr fence_idr;
 	u32 next_fence;
-	u32 active_fence;
 	u32 completed_fence;
 	wait_queue_head_t fence_event;
 	u64 fence_context;
@@ -161,11 +158,6 @@ static inline u32 gpu_read(struct etnaviv_gpu *gpu, u32 reg)
 	return readl(gpu->mmio + reg);
 }
 
-static inline bool fence_completed(struct etnaviv_gpu *gpu, u32 fence)
-{
-	return fence_after_eq(gpu->completed_fence, fence);
-}
-
 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value);
 
 int etnaviv_gpu_init(struct etnaviv_gpu *gpu);
diff --git a/drivers/gpu/drm/exynos/exynos_drm_fimd.c b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
index e3d6a8584715..786a8ee6f10f 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_fimd.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_fimd.c
@@ -228,6 +228,21 @@ static const uint32_t fimd_formats[] = {
 	DRM_FORMAT_ARGB8888,
 };
 
+static const unsigned int capabilities[WINDOWS_NR] = {
+	0,
+	EXYNOS_DRM_PLANE_CAP_WIN_BLEND | EXYNOS_DRM_PLANE_CAP_PIX_BLEND,
+	EXYNOS_DRM_PLANE_CAP_WIN_BLEND | EXYNOS_DRM_PLANE_CAP_PIX_BLEND,
+	EXYNOS_DRM_PLANE_CAP_WIN_BLEND | EXYNOS_DRM_PLANE_CAP_PIX_BLEND,
+	EXYNOS_DRM_PLANE_CAP_WIN_BLEND | EXYNOS_DRM_PLANE_CAP_PIX_BLEND,
+};
+
+static inline void fimd_set_bits(struct fimd_context *ctx, u32 reg, u32 mask,
+				 u32 val)
+{
+	val = (val & mask) | (readl(ctx->regs + reg) & ~mask);
+	writel(val, ctx->regs + reg);
+}
+
 static int fimd_enable_vblank(struct exynos_drm_crtc *crtc)
 {
 	struct fimd_context *ctx = crtc->ctx;
@@ -551,13 +566,88 @@ static void fimd_commit(struct exynos_drm_crtc *crtc)
 	writel(val, ctx->regs + VIDCON0);
 }
 
+static void fimd_win_set_bldeq(struct fimd_context *ctx, unsigned int win,
+			       unsigned int alpha, unsigned int pixel_alpha)
+{
+	u32 mask = BLENDEQ_A_FUNC_F(0xf) | BLENDEQ_B_FUNC_F(0xf);
+	u32 val = 0;
+
+	switch (pixel_alpha) {
+	case DRM_MODE_BLEND_PIXEL_NONE:
+	case DRM_MODE_BLEND_COVERAGE:
+		val |= BLENDEQ_A_FUNC_F(BLENDEQ_ALPHA_A);
+		val |= BLENDEQ_B_FUNC_F(BLENDEQ_ONE_MINUS_ALPHA_A);
+		break;
+	case DRM_MODE_BLEND_PREMULTI:
+	default:
+		if (alpha != DRM_BLEND_ALPHA_OPAQUE) {
+			val |= BLENDEQ_A_FUNC_F(BLENDEQ_ALPHA0);
+			val |= BLENDEQ_B_FUNC_F(BLENDEQ_ONE_MINUS_ALPHA_A);
+		} else {
+			val |= BLENDEQ_A_FUNC_F(BLENDEQ_ONE);
+			val |= BLENDEQ_B_FUNC_F(BLENDEQ_ONE_MINUS_ALPHA_A);
+		}
+		break;
+	}
+	fimd_set_bits(ctx, BLENDEQx(win), mask, val);
+}
 
-static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
-				uint32_t pixel_format, int width)
+static void fimd_win_set_bldmod(struct fimd_context *ctx, unsigned int win,
+				unsigned int alpha, unsigned int pixel_alpha)
 {
-	unsigned long val;
+	u32 win_alpha_l = (alpha >> 8) & 0xf;
+	u32 win_alpha_h = alpha >> 12;
+	u32 val = 0;
 
-	val = WINCONx_ENWIN;
+	switch (pixel_alpha) {
+	case DRM_MODE_BLEND_PIXEL_NONE:
+		break;
+	case DRM_MODE_BLEND_COVERAGE:
+	case DRM_MODE_BLEND_PREMULTI:
+	default:
+		val |= WINCON1_ALPHA_SEL;
+		val |= WINCON1_BLD_PIX;
+		val |= WINCON1_ALPHA_MUL;
+		break;
+	}
+	fimd_set_bits(ctx, WINCON(win), WINCONx_BLEND_MODE_MASK, val);
+
+	/* OSD alpha */
+	val = VIDISD14C_ALPHA0_R(win_alpha_h) |
+		VIDISD14C_ALPHA0_G(win_alpha_h) |
+		VIDISD14C_ALPHA0_B(win_alpha_h) |
+		VIDISD14C_ALPHA1_R(0x0) |
+		VIDISD14C_ALPHA1_G(0x0) |
+		VIDISD14C_ALPHA1_B(0x0);
+	writel(val, ctx->regs + VIDOSD_C(win));
+
+	val = VIDW_ALPHA_R(win_alpha_l) | VIDW_ALPHA_G(win_alpha_l) |
+		VIDW_ALPHA_B(win_alpha_l);
+	writel(val, ctx->regs + VIDWnALPHA0(win));
+
+	val = VIDW_ALPHA_R(0x0) | VIDW_ALPHA_G(0x0) |
+		VIDW_ALPHA_B(0x0);
+	writel(val, ctx->regs + VIDWnALPHA1(win));
+
+	fimd_set_bits(ctx, BLENDCON, BLENDCON_NEW_MASK,
+			BLENDCON_NEW_8BIT_ALPHA_VALUE);
+}
+
+static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
+				struct drm_framebuffer *fb, int width)
+{
+	struct exynos_drm_plane plane = ctx->planes[win];
+	struct exynos_drm_plane_state *state =
+		to_exynos_plane_state(plane.base.state);
+	uint32_t pixel_format = fb->format->format;
+	unsigned int alpha = state->base.alpha;
+	u32 val = WINCONx_ENWIN;
+	unsigned int pixel_alpha;
+
+	if (fb->format->has_alpha)
+		pixel_alpha = state->base.pixel_blend_mode;
+	else
+		pixel_alpha = DRM_MODE_BLEND_PIXEL_NONE;
 
 	/*
 	 * In case of s3c64xx, window 0 doesn't support alpha channel.
@@ -591,8 +681,7 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
 		break;
 	case DRM_FORMAT_ARGB8888:
 	default:
-		val |= WINCON1_BPPMODE_25BPP_A1888
-			| WINCON1_BLD_PIX | WINCON1_ALPHA_SEL;
+		val |= WINCON1_BPPMODE_25BPP_A1888;
 		val |= WINCONx_WSWP;
 		val |= WINCONx_BURSTLEN_16WORD;
 		break;
@@ -610,25 +699,12 @@ static void fimd_win_set_pixfmt(struct fimd_context *ctx, unsigned int win,
 		val &= ~WINCONx_BURSTLEN_MASK;
 		val |= WINCONx_BURSTLEN_4WORD;
 	}
-
-	writel(val, ctx->regs + WINCON(win));
+	fimd_set_bits(ctx, WINCON(win), ~WINCONx_BLEND_MODE_MASK, val);
 
 	/* hardware window 0 doesn't support alpha channel. */
 	if (win != 0) {
-		/* OSD alpha */
-		val = VIDISD14C_ALPHA0_R(0xf) |
-			VIDISD14C_ALPHA0_G(0xf) |
-			VIDISD14C_ALPHA0_B(0xf) |
-			VIDISD14C_ALPHA1_R(0xf) |
-			VIDISD14C_ALPHA1_G(0xf) |
-			VIDISD14C_ALPHA1_B(0xf);
-
-		writel(val, ctx->regs + VIDOSD_C(win));
-
-		val = VIDW_ALPHA_R(0xf) | VIDW_ALPHA_G(0xf) |
-			VIDW_ALPHA_G(0xf);
-		writel(val, ctx->regs + VIDWnALPHA0(win));
-		writel(val, ctx->regs + VIDWnALPHA1(win));
+		fimd_win_set_bldmod(ctx, win, alpha, pixel_alpha);
+		fimd_win_set_bldeq(ctx, win, alpha, pixel_alpha);
 	}
 }
 
@@ -785,7 +861,7 @@ static void fimd_update_plane(struct exynos_drm_crtc *crtc,
 		DRM_DEBUG_KMS("osd size = 0x%x\n", (unsigned int)val);
 	}
 
-	fimd_win_set_pixfmt(ctx, win, fb->format->format, state->src.w);
+	fimd_win_set_pixfmt(ctx, win, fb, state->src.w);
 
 	/* hardware window 0 doesn't support color key. */
 	if (win != 0)
@@ -987,6 +1063,7 @@ static int fimd_bind(struct device *dev, struct device *master, void *data)
 		ctx->configs[i].num_pixel_formats = ARRAY_SIZE(fimd_formats);
 		ctx->configs[i].zpos = i;
 		ctx->configs[i].type = fimd_win_types[i];
+		ctx->configs[i].capabilities = capabilities[i];
 		ret = exynos_plane_init(drm_dev, &ctx->planes[i], i,
 					&ctx->configs[i]);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/Kconfig b/drivers/gpu/drm/i915/Kconfig
index 33a458b7f1fc..148be8e1a090 100644
--- a/drivers/gpu/drm/i915/Kconfig
+++ b/drivers/gpu/drm/i915/Kconfig
@@ -131,5 +131,5 @@ config DRM_I915_GVT_KVMGT
 menu "drm/i915 Debugging"
 depends on DRM_I915
 depends on EXPERT
-source drivers/gpu/drm/i915/Kconfig.debug
+source "drivers/gpu/drm/i915/Kconfig.debug"
 endmenu
diff --git a/drivers/gpu/drm/i915/gvt/aperture_gm.c b/drivers/gpu/drm/i915/gvt/aperture_gm.c
index fe754022e356..359d37d5c958 100644
--- a/drivers/gpu/drm/i915/gvt/aperture_gm.c
+++ b/drivers/gpu/drm/i915/gvt/aperture_gm.c
@@ -61,10 +61,12 @@ static int alloc_gm(struct intel_vgpu *vgpu, bool high_gm)
 	}
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
+	mmio_hw_access_pre(dev_priv);
 	ret = i915_gem_gtt_insert(&dev_priv->ggtt.vm, node,
 				  size, I915_GTT_PAGE_SIZE,
 				  I915_COLOR_UNEVICTABLE,
 				  start, end, flags);
+	mmio_hw_access_post(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	if (ret)
 		gvt_err("fail to alloc %s gm space from host\n",
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 77edbfcb0f75..77ae634eb11c 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -1900,11 +1900,11 @@ static struct cmd_info cmd_info[] = {
 
 	{"MI_URB_CLEAR", OP_MI_URB_CLEAR, F_LEN_VAR, R_RCS, D_ALL, 0, 8, NULL},
 
-	{"ME_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL,
+	{"MI_SEMAPHORE_SIGNAL", OP_MI_SEMAPHORE_SIGNAL, F_LEN_VAR, R_ALL,
 		D_BDW_PLUS, 0, 8, NULL},
 
-	{"ME_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL, D_BDW_PLUS,
-		ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait},
+	{"MI_SEMAPHORE_WAIT", OP_MI_SEMAPHORE_WAIT, F_LEN_VAR, R_ALL,
+		D_BDW_PLUS, ADDR_FIX_1(2), 8, cmd_handler_mi_semaphore_wait},
 
 	{"MI_STORE_DATA_IMM", OP_MI_STORE_DATA_IMM, F_LEN_VAR, R_ALL, D_BDW_PLUS,
 		ADDR_FIX_1(1), 10, cmd_handler_mi_store_data_imm},
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
index 481896fb712a..85e6736f0a32 100644
--- a/drivers/gpu/drm/i915/gvt/fb_decoder.c
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -235,7 +235,7 @@ int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
 		plane->bpp = skl_pixel_formats[fmt].bpp;
 		plane->drm_format = skl_pixel_formats[fmt].drm_format;
 	} else {
-		plane->tiled = !!(val & DISPPLANE_TILED);
+		plane->tiled = val & DISPPLANE_TILED;
 		fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK);
 		plane->bpp = bdw_pixel_formats[fmt].bpp;
 		plane->drm_format = bdw_pixel_formats[fmt].drm_format;
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 58e166effa45..c7103dd2d8d5 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -2447,10 +2447,11 @@ static void intel_vgpu_destroy_all_ppgtt_mm(struct intel_vgpu *vgpu)
 
 static void intel_vgpu_destroy_ggtt_mm(struct intel_vgpu *vgpu)
 {
-	struct intel_gvt_partial_pte *pos;
+	struct intel_gvt_partial_pte *pos, *next;
 
-	list_for_each_entry(pos,
-			&vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list, list) {
+	list_for_each_entry_safe(pos, next,
+				 &vgpu->gtt.ggtt_mm->ggtt_mm.partial_pte_list,
+				 list) {
 		gvt_dbg_mm("partial PTE update on hold 0x%lx : 0x%llx\n",
 			pos->offset, pos->data);
 		kfree(pos);
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 6ef5a7fc70df..733a2a0d0c30 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -437,7 +437,7 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 
 	ret = intel_gvt_debugfs_init(gvt);
 	if (ret)
-		gvt_err("debugfs registeration failed, go on.\n");
+		gvt_err("debugfs registration failed, go on.\n");
 
 	gvt_dbg_core("gvt device initialization is done\n");
 	dev_priv->gvt = gvt;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 31f6cdbe5c42..b4ab1dad0143 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -159,6 +159,10 @@ struct intel_vgpu_submission {
 	struct kmem_cache *workloads;
 	atomic_t running_workload_num;
 	struct i915_gem_context *shadow_ctx;
+	union {
+		u64 i915_context_pml4;
+		u64 i915_context_pdps[GEN8_3LVL_PDPES];
+	};
 	DECLARE_BITMAP(shadow_ctx_desc_updated, I915_NUM_ENGINES);
 	DECLARE_BITMAP(tlb_handle_pending, I915_NUM_ENGINES);
 	void *ring_scan_buffer[I915_NUM_ENGINES];
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index aa280bb07125..e9f343b124b0 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -475,6 +475,7 @@ static i915_reg_t force_nonpriv_white_list[] = {
 	_MMIO(0x7704),
 	_MMIO(0x7708),
 	_MMIO(0x770c),
+	_MMIO(0x83a8),
 	_MMIO(0xb110),
 	GEN8_L3SQCREG4,//_MMIO(0xb118)
 	_MMIO(0xe100),
@@ -2798,6 +2799,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x21f0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index 5af11cf1b482..e1675a00df12 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -41,7 +41,7 @@ struct intel_gvt_mpt {
 	int (*host_init)(struct device *dev, void *gvt, const void *ops);
 	void (*host_exit)(struct device *dev, void *gvt);
 	int (*attach_vgpu)(void *vgpu, unsigned long *handle);
-	void (*detach_vgpu)(unsigned long handle);
+	void (*detach_vgpu)(void *vgpu);
 	int (*inject_msi)(unsigned long handle, u32 addr, u16 data);
 	unsigned long (*from_virt_to_mfn)(void *p);
 	int (*enable_page_track)(unsigned long handle, u64 gfn);
diff --git a/drivers/gpu/drm/i915/gvt/interrupt.c b/drivers/gpu/drm/i915/gvt/interrupt.c
index 5daa23ae566b..6b9d1354ff29 100644
--- a/drivers/gpu/drm/i915/gvt/interrupt.c
+++ b/drivers/gpu/drm/i915/gvt/interrupt.c
@@ -126,7 +126,7 @@ static const char * const irq_name[INTEL_GVT_EVENT_MAX] = {
 	[FDI_RX_INTERRUPTS_TRANSCODER_C] = "FDI RX Interrupts Combined C",
 	[AUDIO_CP_CHANGE_TRANSCODER_C] = "Audio CP Change Transcoder C",
 	[AUDIO_CP_REQUEST_TRANSCODER_C] = "Audio CP Request Transcoder C",
-	[ERR_AND_DBG] = "South Error and Debug Interupts Combined",
+	[ERR_AND_DBG] = "South Error and Debug Interrupts Combined",
 	[GMBUS] = "Gmbus",
 	[SDVO_B_HOTPLUG] = "SDVO B hotplug",
 	[CRT_HOTPLUG] = "CRT Hotplug",
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index c1072143da1d..dd3dfd00f4e6 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -996,7 +996,7 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 {
 	unsigned int index;
 	u64 virtaddr;
-	unsigned long req_size, pgoff = 0;
+	unsigned long req_size, pgoff, req_start;
 	pgprot_t pg_prot;
 	struct intel_vgpu *vgpu = mdev_get_drvdata(mdev);
 
@@ -1014,7 +1014,17 @@ static int intel_vgpu_mmap(struct mdev_device *mdev, struct vm_area_struct *vma)
 	pg_prot = vma->vm_page_prot;
 	virtaddr = vma->vm_start;
 	req_size = vma->vm_end - vma->vm_start;
-	pgoff = vgpu_aperture_pa_base(vgpu) >> PAGE_SHIFT;
+	pgoff = vma->vm_pgoff &
+		((1U << (VFIO_PCI_OFFSET_SHIFT - PAGE_SHIFT)) - 1);
+	req_start = pgoff << PAGE_SHIFT;
+
+	if (!intel_vgpu_in_aperture(vgpu, req_start))
+		return -EINVAL;
+	if (req_start + req_size >
+	    vgpu_aperture_offset(vgpu) + vgpu_aperture_sz(vgpu))
+		return -EINVAL;
+
+	pgoff = (gvt_aperture_pa_base(vgpu->gvt) >> PAGE_SHIFT) + pgoff;
 
 	return remap_pfn_range(vma, virtaddr, pgoff, req_size, pg_prot);
 }
@@ -1662,9 +1672,21 @@ static int kvmgt_attach_vgpu(void *vgpu, unsigned long *handle)
 	return 0;
 }
 
-static void kvmgt_detach_vgpu(unsigned long handle)
+static void kvmgt_detach_vgpu(void *p_vgpu)
 {
-	/* nothing to do here */
+	int i;
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+
+	if (!vgpu->vdev.region)
+		return;
+
+	for (i = 0; i < vgpu->vdev.num_regions; i++)
+		if (vgpu->vdev.region[i].ops->release)
+			vgpu->vdev.region[i].ops->release(vgpu,
+					&vgpu->vdev.region[i]);
+	vgpu->vdev.num_regions = 0;
+	kfree(vgpu->vdev.region);
+	vgpu->vdev.region = NULL;
 }
 
 static int kvmgt_inject_msi(unsigned long handle, u32 addr, u16 data)
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
index 36a5147cd01e..d6e02c15ef97 100644
--- a/drivers/gpu/drm/i915/gvt/mmio_context.c
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -158,6 +158,8 @@ static void load_render_mocs(struct drm_i915_private *dev_priv)
 	int ring_id, i;
 
 	for (ring_id = 0; ring_id < ARRAY_SIZE(regs); ring_id++) {
+		if (!HAS_ENGINE(dev_priv, ring_id))
+			continue;
 		offset.reg = regs[ring_id];
 		for (i = 0; i < GEN9_MOCS_SIZE; i++) {
 			gen9_render_mocs.control_table[ring_id][i] =
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index 67f19992b226..3ed34123d8d1 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -101,7 +101,7 @@ static inline void intel_gvt_hypervisor_detach_vgpu(struct intel_vgpu *vgpu)
 	if (!intel_gvt_host.mpt->detach_vgpu)
 		return;
 
-	intel_gvt_host.mpt->detach_vgpu(vgpu->handle);
+	intel_gvt_host.mpt->detach_vgpu(vgpu);
 }
 
 #define MSI_CAP_CONTROL(offset) (offset + 2)
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index b8fbe3fabea3..55bb7885e228 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -332,6 +332,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 
 	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
 	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
+
+	wa_ctx->indirect_ctx.obj = NULL;
+	wa_ctx->indirect_ctx.shadow_va = NULL;
 }
 
 static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
@@ -356,6 +359,33 @@ static int set_context_ppgtt_from_shadow(struct intel_vgpu_workload *workload,
 	return 0;
 }
 
+static int
+intel_gvt_workload_req_alloc(struct intel_vgpu_workload *workload)
+{
+	struct intel_vgpu *vgpu = workload->vgpu;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
+	struct i915_request *rq;
+	int ret = 0;
+
+	lockdep_assert_held(&dev_priv->drm.struct_mutex);
+
+	if (workload->req)
+		goto out;
+
+	rq = i915_request_alloc(engine, shadow_ctx);
+	if (IS_ERR(rq)) {
+		gvt_vgpu_err("fail to allocate gem request\n");
+		ret = PTR_ERR(rq);
+		goto out;
+	}
+	workload->req = i915_request_get(rq);
+out:
+	return ret;
+}
+
 /**
  * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
  * shadow it as well, include ringbuffer,wa_ctx and ctx.
@@ -372,12 +402,11 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_engine_cs *engine = dev_priv->engine[workload->ring_id];
 	struct intel_context *ce;
-	struct i915_request *rq;
 	int ret;
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	if (workload->req)
+	if (workload->shadow)
 		return 0;
 
 	ret = set_context_ppgtt_from_shadow(workload, shadow_ctx);
@@ -417,22 +446,8 @@ int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
 			goto err_shadow;
 	}
 
-	rq = i915_request_alloc(engine, shadow_ctx);
-	if (IS_ERR(rq)) {
-		gvt_vgpu_err("fail to allocate gem request\n");
-		ret = PTR_ERR(rq);
-		goto err_shadow;
-	}
-	workload->req = i915_request_get(rq);
-
-	ret = populate_shadow_context(workload);
-	if (ret)
-		goto err_req;
-
+	workload->shadow = true;
 	return 0;
-err_req:
-	rq = fetch_and_zero(&workload->req);
-	i915_request_put(rq);
 err_shadow:
 	release_shadow_wa_ctx(&workload->wa_ctx);
 err_unpin:
@@ -671,23 +686,31 @@ static int dispatch_workload(struct intel_vgpu_workload *workload)
 	mutex_lock(&vgpu->vgpu_lock);
 	mutex_lock(&dev_priv->drm.struct_mutex);
 
+	ret = intel_gvt_workload_req_alloc(workload);
+	if (ret)
+		goto err_req;
+
 	ret = intel_gvt_scan_and_shadow_workload(workload);
 	if (ret)
 		goto out;
 
-	ret = prepare_workload(workload);
+	ret = populate_shadow_context(workload);
+	if (ret) {
+		release_shadow_wa_ctx(&workload->wa_ctx);
+		goto out;
+	}
 
+	ret = prepare_workload(workload);
 out:
-	if (ret)
-		workload->status = ret;
-
 	if (!IS_ERR_OR_NULL(workload->req)) {
 		gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
 				ring_id, workload->req);
 		i915_request_add(workload->req);
 		workload->dispatched = true;
 	}
-
+err_req:
+	if (ret)
+		workload->status = ret;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	mutex_unlock(&vgpu->vgpu_lock);
 	return ret;
@@ -891,11 +914,6 @@ static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
 
 	list_del_init(&workload->list);
 
-	if (!workload->status) {
-		release_shadow_batch_buffer(workload);
-		release_shadow_wa_ctx(&workload->wa_ctx);
-	}
-
 	if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id))) {
 		/* if workload->status is not successful means HW GPU
 		 * has occurred GPU hang or something wrong with i915/GVT,
@@ -1079,6 +1097,21 @@ err:
 	return ret;
 }
 
+static void
+i915_context_ppgtt_root_restore(struct intel_vgpu_submission *s)
+{
+	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
+	int i;
+
+	if (i915_vm_is_48bit(&i915_ppgtt->vm))
+		px_dma(&i915_ppgtt->pml4) = s->i915_context_pml4;
+	else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++)
+			px_dma(i915_ppgtt->pdp.page_directory[i]) =
+						s->i915_context_pdps[i];
+	}
+}
+
 /**
  * intel_vgpu_clean_submission - free submission-related resource for vGPU
  * @vgpu: a vGPU
@@ -1091,6 +1124,7 @@ void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
 	struct intel_vgpu_submission *s = &vgpu->submission;
 
 	intel_vgpu_select_submission_ops(vgpu, ALL_ENGINES, 0);
+	i915_context_ppgtt_root_restore(s);
 	i915_gem_context_put(s->shadow_ctx);
 	kmem_cache_destroy(s->workloads);
 }
@@ -1116,6 +1150,21 @@ void intel_vgpu_reset_submission(struct intel_vgpu *vgpu,
 	s->ops->reset(vgpu, engine_mask);
 }
 
+static void
+i915_context_ppgtt_root_save(struct intel_vgpu_submission *s)
+{
+	struct i915_hw_ppgtt *i915_ppgtt = s->shadow_ctx->ppgtt;
+	int i;
+
+	if (i915_vm_is_48bit(&i915_ppgtt->vm))
+		s->i915_context_pml4 = px_dma(&i915_ppgtt->pml4);
+	else {
+		for (i = 0; i < GEN8_3LVL_PDPES; i++)
+			s->i915_context_pdps[i] =
+				px_dma(i915_ppgtt->pdp.page_directory[i]);
+	}
+}
+
 /**
  * intel_vgpu_setup_submission - setup submission-related resource for vGPU
  * @vgpu: a vGPU
@@ -1138,6 +1187,8 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	if (IS_ERR(s->shadow_ctx))
 		return PTR_ERR(s->shadow_ctx);
 
+	i915_context_ppgtt_root_save(s);
+
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
 	s->workloads = kmem_cache_create_usercopy("gvt-g_vgpu_workload",
@@ -1230,6 +1281,9 @@ void intel_vgpu_destroy_workload(struct intel_vgpu_workload *workload)
 {
 	struct intel_vgpu_submission *s = &workload->vgpu->submission;
 
+	release_shadow_batch_buffer(workload);
+	release_shadow_wa_ctx(&workload->wa_ctx);
+
 	if (workload->shadow_mm)
 		intel_vgpu_mm_put(workload->shadow_mm);
 
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index ca5529d0e48e..2065cba59aab 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -83,6 +83,7 @@ struct intel_vgpu_workload {
 	struct i915_request *req;
 	/* if this workload has been dispatched to i915? */
 	bool dispatched;
+	bool shadow;      /* if workload has done shadow of guest request */
 	int status;
 
 	struct intel_vgpu_mm *shadow_mm;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 38dcee1ca062..40a61ef9aac1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -984,8 +984,8 @@ static int i915_gpu_info_open(struct inode *inode, struct file *file)
 	intel_runtime_pm_get(i915);
 	gpu = i915_capture_gpu_state(i915);
 	intel_runtime_pm_put(i915);
-	if (!gpu)
-		return -ENOMEM;
+	if (IS_ERR(gpu))
+		return PTR_ERR(gpu);
 
 	file->private_data = gpu;
 	return 0;
@@ -1018,7 +1018,13 @@ i915_error_state_write(struct file *filp,
 
 static int i915_error_state_open(struct inode *inode, struct file *file)
 {
-	file->private_data = i915_first_error_state(inode->i_private);
+	struct i915_gpu_state *error;
+
+	error = i915_first_error_state(inode->i_private);
+	if (IS_ERR(error))
+		return PTR_ERR(error);
+
+	file->private_data  = error;
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index d36a9755ad91..216f52b744a6 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -1282,8 +1282,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
 	if (args->size == 0)
 		return 0;
 
-	if (!access_ok(VERIFY_WRITE,
-		       u64_to_user_ptr(args->data_ptr),
+	if (!access_ok(u64_to_user_ptr(args->data_ptr),
 		       args->size))
 		return -EFAULT;
 
@@ -1609,9 +1608,7 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
 	if (args->size == 0)
 		return 0;
 
-	if (!access_ok(VERIFY_READ,
-		       u64_to_user_ptr(args->data_ptr),
-		       args->size))
+	if (!access_ok(u64_to_user_ptr(args->data_ptr), args->size))
 		return -EFAULT;
 
 	obj = i915_gem_object_lookup(file, args->handle);
@@ -2559,7 +2556,7 @@ static int i915_gem_object_get_pages_gtt(struct drm_i915_gem_object *obj)
 	 * If there's no chance of allocating enough pages for the whole
 	 * object, bail early.
 	 */
-	if (page_count > totalram_pages)
+	if (page_count > totalram_pages())
 		return -ENOMEM;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 10a4afb4f235..485b259127c3 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -26,7 +26,7 @@
  *
  */
 
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
 #include <linux/reservation.h>
 #include <linux/sync_file.h>
 #include <linux/uaccess.h>
@@ -1268,7 +1268,7 @@ relocate_entry(struct i915_vma *vma,
 		else if (gen >= 4)
 			len = 4;
 		else
-			len = 6;
+			len = 3;
 
 		batch = reloc_gpu(eb, vma, len);
 		if (IS_ERR(batch))
@@ -1309,11 +1309,6 @@ relocate_entry(struct i915_vma *vma,
 			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
 			*batch++ = addr;
 			*batch++ = target_offset;
-
-			/* And again for good measure (blb/pnv) */
-			*batch++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
-			*batch++ = addr;
-			*batch++ = target_offset;
 		}
 
 		goto out;
@@ -1452,7 +1447,7 @@ static int eb_relocate_vma(struct i915_execbuffer *eb, struct i915_vma *vma)
 	 * to read. However, if the array is not writable the user loses
 	 * the updated relocation values.
 	 */
-	if (unlikely(!access_ok(VERIFY_READ, urelocs, remain*sizeof(*urelocs))))
+	if (unlikely(!access_ok(urelocs, remain*sizeof(*urelocs))))
 		return -EFAULT;
 
 	do {
@@ -1559,7 +1554,7 @@ static int check_relocations(const struct drm_i915_gem_exec_object2 *entry)
 
 	addr = u64_to_user_ptr(entry->relocs_ptr);
 	size *= sizeof(struct drm_i915_gem_relocation_entry);
-	if (!access_ok(VERIFY_READ, addr, size))
+	if (!access_ok(addr, size))
 		return -EFAULT;
 
 	end = addr + size;
@@ -1610,6 +1605,7 @@ static int eb_copy_relocations(const struct i915_execbuffer *eb)
 					     (char __user *)urelocs + copied,
 					     len)) {
 end_user:
+				user_access_end();
 				kvfree(relocs);
 				err = -EFAULT;
 				goto err;
@@ -1628,7 +1624,9 @@ end_user:
 		 * happened we would make the mistake of assuming that the
 		 * relocations were valid.
 		 */
-		user_access_begin();
+		if (!user_access_begin(urelocs, size))
+			goto end_user;
+
 		for (copied = 0; copied < nreloc; copied++)
 			unsafe_put_user(-1,
 					&urelocs[copied].presumed_offset,
@@ -2095,7 +2093,7 @@ get_fence_array(struct drm_i915_gem_execbuffer2 *args,
 		return ERR_PTR(-EINVAL);
 
 	user = u64_to_user_ptr(args->cliprects_ptr);
-	if (!access_ok(VERIFY_READ, user, nfences * sizeof(*user)))
+	if (!access_ok(user, nfences * sizeof(*user)))
 		return ERR_PTR(-EFAULT);
 
 	fences = kvmalloc_array(nfences, sizeof(*fences),
@@ -2610,7 +2608,16 @@ i915_gem_execbuffer2_ioctl(struct drm_device *dev, void *data,
 		unsigned int i;
 
 		/* Copy the new buffer offsets back to the user's exec list. */
-		user_access_begin();
+		/*
+		 * Note: count * sizeof(*user_exec_list) does not overflow,
+		 * because we checked 'count' in check_buffer_count().
+		 *
+		 * And this range already got effectively checked earlier
+		 * when we did the "copy_from_user()" above.
+		 */
+		if (!user_access_begin(user_exec_list, count * sizeof(*user_exec_list)))
+			goto end_user;
+
 		for (i = 0; i < args->buffer_count; i++) {
 			if (!(exec2_list[i].offset & UPDATE))
 				continue;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index add1fe7aeb93..bd17dd1f5da5 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2075,6 +2075,7 @@ static struct i915_vma *pd_vma_create(struct gen6_hw_ppgtt *ppgtt, int size)
 int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
 {
 	struct gen6_hw_ppgtt *ppgtt = to_gen6_ppgtt(base);
+	int err;
 
 	/*
 	 * Workaround the limited maximum vma->pin_count and the aliasing_ppgtt
@@ -2090,9 +2091,17 @@ int gen6_ppgtt_pin(struct i915_hw_ppgtt *base)
 	 * allocator works in address space sizes, so it's multiplied by page
 	 * size. We allocate at the top of the GTT to avoid fragmentation.
 	 */
-	return i915_vma_pin(ppgtt->vma,
-			    0, GEN6_PD_ALIGN,
-			    PIN_GLOBAL | PIN_HIGH);
+	err = i915_vma_pin(ppgtt->vma,
+			   0, GEN6_PD_ALIGN,
+			   PIN_GLOBAL | PIN_HIGH);
+	if (err)
+		goto unpin;
+
+	return 0;
+
+unpin:
+	ppgtt->pin_count = 0;
+	return err;
 }
 
 void gen6_ppgtt_unpin(struct i915_hw_ppgtt *base)
diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c
index 2c9b284036d1..9558582c105e 100644
--- a/drivers/gpu/drm/i915/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/i915_gem_userptr.c
@@ -113,27 +113,25 @@ static void del_object(struct i915_mmu_object *mo)
 }
 
 static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
-						       struct mm_struct *mm,
-						       unsigned long start,
-						       unsigned long end,
-						       bool blockable)
+			const struct mmu_notifier_range *range)
 {
 	struct i915_mmu_notifier *mn =
 		container_of(_mn, struct i915_mmu_notifier, mn);
 	struct i915_mmu_object *mo;
 	struct interval_tree_node *it;
 	LIST_HEAD(cancelled);
+	unsigned long end;
 
 	if (RB_EMPTY_ROOT(&mn->objects.rb_root))
 		return 0;
 
 	/* interval ranges are inclusive, but invalidate range is exclusive */
-	end--;
+	end = range->end - 1;
 
 	spin_lock(&mn->lock);
-	it = interval_tree_iter_first(&mn->objects, start, end);
+	it = interval_tree_iter_first(&mn->objects, range->start, end);
 	while (it) {
-		if (!blockable) {
+		if (!range->blockable) {
 			spin_unlock(&mn->lock);
 			return -EAGAIN;
 		}
@@ -151,7 +149,7 @@ static int i915_gem_userptr_mn_invalidate_range_start(struct mmu_notifier *_mn,
 			queue_work(mn->wq, &mo->work);
 
 		list_add(&mo->link, &cancelled);
-		it = interval_tree_iter_next(it, start, end);
+		it = interval_tree_iter_next(it, range->start, end);
 	}
 	list_for_each_entry(mo, &cancelled, link)
 		del_object(mo);
@@ -791,8 +789,7 @@ i915_gem_userptr_ioctl(struct drm_device *dev,
 	if (offset_in_page(args->user_ptr | args->user_size))
 		return -EINVAL;
 
-	if (!access_ok(args->flags & I915_USERPTR_READ_ONLY ? VERIFY_READ : VERIFY_WRITE,
-		       (char __user *)(unsigned long)args->user_ptr, args->user_size))
+	if (!access_ok((char __user *)(unsigned long)args->user_ptr, args->user_size))
 		return -EFAULT;
 
 	if (args->flags & I915_USERPTR_READ_ONLY) {
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 07465123c166..3f9ce403c755 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -1907,9 +1907,16 @@ i915_capture_gpu_state(struct drm_i915_private *i915)
 {
 	struct i915_gpu_state *error;
 
+	/* Check if GPU capture has been disabled */
+	error = READ_ONCE(i915->gpu_error.first_error);
+	if (IS_ERR(error))
+		return error;
+
 	error = kzalloc(sizeof(*error), GFP_ATOMIC);
-	if (!error)
-		return NULL;
+	if (!error) {
+		i915_disable_error_state(i915, -ENOMEM);
+		return ERR_PTR(-ENOMEM);
+	}
 
 	kref_init(&error->ref);
 	error->i915 = i915;
@@ -1945,11 +1952,8 @@ void i915_capture_error_state(struct drm_i915_private *i915,
 		return;
 
 	error = i915_capture_gpu_state(i915);
-	if (!error) {
-		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
-		i915_disable_error_state(i915, -ENOMEM);
+	if (IS_ERR(error))
 		return;
-	}
 
 	i915_error_capture_msg(i915, error, engine_mask, error_msg);
 	DRM_INFO("%s\n", error->error_msg);
@@ -1987,7 +1991,7 @@ i915_first_error_state(struct drm_i915_private *i915)
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
-	if (error)
+	if (!IS_ERR_OR_NULL(error))
 		i915_gpu_state_get(error);
 	spin_unlock_irq(&i915->gpu_error.lock);
 
@@ -2000,10 +2004,11 @@ void i915_reset_error_state(struct drm_i915_private *i915)
 
 	spin_lock_irq(&i915->gpu_error.lock);
 	error = i915->gpu_error.first_error;
-	i915->gpu_error.first_error = NULL;
+	if (error != ERR_PTR(-ENODEV)) /* if disabled, always disabled */
+		i915->gpu_error.first_error = NULL;
 	spin_unlock_irq(&i915->gpu_error.lock);
 
-	if (!IS_ERR(error))
+	if (!IS_ERR_OR_NULL(error))
 		i915_gpu_state_put(error);
 }
 
diff --git a/drivers/gpu/drm/i915/i915_ioc32.c b/drivers/gpu/drm/i915/i915_ioc32.c
index 0e5c580d117c..e869daf9c8a9 100644
--- a/drivers/gpu/drm/i915/i915_ioc32.c
+++ b/drivers/gpu/drm/i915/i915_ioc32.c
@@ -52,7 +52,7 @@ static int compat_i915_getparam(struct file *file, unsigned int cmd,
 		return -EFAULT;
 
 	request = compat_alloc_user_space(sizeof(*request));
-	if (!access_ok(VERIFY_WRITE, request, sizeof(*request)) ||
+	if (!access_ok(request, sizeof(*request)) ||
 	    __put_user(req32.param, &request->param) ||
 	    __put_user((void __user *)(unsigned long)req32.value,
 		       &request->value))
diff --git a/drivers/gpu/drm/i915/i915_perf.c b/drivers/gpu/drm/i915/i915_perf.c
index 4529edfdcfc8..2b2eb57ca71f 100644
--- a/drivers/gpu/drm/i915/i915_perf.c
+++ b/drivers/gpu/drm/i915/i915_perf.c
@@ -3052,7 +3052,7 @@ static struct i915_oa_reg *alloc_oa_regs(struct drm_i915_private *dev_priv,
 	if (!n_regs)
 		return NULL;
 
-	if (!access_ok(VERIFY_READ, regs, n_regs * sizeof(u32) * 2))
+	if (!access_ok(regs, n_regs * sizeof(u32) * 2))
 		return ERR_PTR(-EFAULT);
 
 	/* No is_valid function means we're not allowing any register to be programmed. */
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index 6fc4b8eeab42..fe56465cdfd6 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -46,7 +46,7 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	if (topo.flags != 0)
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_WRITE, u64_to_user_ptr(query_item->data_ptr),
+	if (!access_ok(u64_to_user_ptr(query_item->data_ptr),
 		       total_length))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/i915/i915_sysfs.c b/drivers/gpu/drm/i915/i915_sysfs.c
index 535caebd9813..c0cfe7ae2ba5 100644
--- a/drivers/gpu/drm/i915/i915_sysfs.c
+++ b/drivers/gpu/drm/i915/i915_sysfs.c
@@ -521,7 +521,9 @@ static ssize_t error_state_read(struct file *filp, struct kobject *kobj,
 	ssize_t ret;
 
 	gpu = i915_first_error_state(i915);
-	if (gpu) {
+	if (IS_ERR(gpu)) {
+		ret = PTR_ERR(gpu);
+	} else if (gpu) {
 		ret = i915_gpu_state_copy_to_buffer(gpu, buf, off, count);
 		i915_gpu_state_put(gpu);
 	} else {
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 07c861884c70..3da9c0f9e948 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -46,7 +46,7 @@
 #include <drm/drm_plane_helper.h>
 #include <drm/drm_rect.h>
 #include <drm/drm_atomic_uapi.h>
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
 #include <linux/reservation.h>
 
 /* Primary plane formats for gen <= 3 */
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d7fa301b5ec7..eab9341a5152 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -303,6 +303,7 @@ static void __unwind_incomplete_requests(struct intel_engine_cs *engine)
 	 */
 	if (!(prio & I915_PRIORITY_NEWCLIENT)) {
 		prio |= I915_PRIORITY_NEWCLIENT;
+		active->sched.attr.priority = prio;
 		list_move_tail(&active->sched.link,
 			       i915_sched_lookup_priolist(engine, prio));
 	}
@@ -398,8 +399,13 @@ static u64 execlists_update_context(struct i915_request *rq)
 	 * may not be visible to the HW prior to the completion of the UC
 	 * register write and that we may begin execution from the context
 	 * before its image is complete leading to invalid PD chasing.
+	 *
+	 * Furthermore, Braswell, at least, wants a full mb to be sure that
+	 * the writes are coherent in memory (visible to the GPU) prior to
+	 * execution, and not just visible to other CPUs (as is the result of
+	 * wmb).
 	 */
-	wmb();
+	mb();
 	return ce->lrc_desc;
 }
 
@@ -640,6 +646,9 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 		int i;
 
 		priolist_for_each_request_consume(rq, rn, p, i) {
+			GEM_BUG_ON(last &&
+				   need_preempt(engine, last, rq_prio(rq)));
+
 			/*
 			 * Can we combine this request with the current port?
 			 * It has to be the same context/ringbuffer and not
@@ -2239,6 +2248,8 @@ static int logical_ring_init(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
+	intel_engine_init_workarounds(engine);
+
 	if (HAS_LOGICAL_RING_ELSQ(i915)) {
 		execlists->submit_reg = i915->regs +
 			i915_mmio_reg_offset(RING_EXECLIST_SQ_CONTENTS(engine));
@@ -2305,7 +2316,6 @@ int logical_render_ring_init(struct intel_engine_cs *engine)
 	}
 
 	intel_engine_init_whitelist(engine);
-	intel_engine_init_workarounds(engine);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index 419e56342523..f71970df9936 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -274,10 +274,16 @@ void intel_psr_init_dpcd(struct intel_dp *intel_dp)
 	DRM_DEBUG_KMS("eDP panel supports PSR version %x\n",
 		      intel_dp->psr_dpcd[0]);
 
+	if (drm_dp_has_quirk(&intel_dp->desc, DP_DPCD_QUIRK_NO_PSR)) {
+		DRM_DEBUG_KMS("PSR support not currently available for this panel\n");
+		return;
+	}
+
 	if (!(intel_dp->edp_dpcd[1] & DP_EDP_SET_POWER_CAP)) {
 		DRM_DEBUG_KMS("Panel lacks power state control, PSR cannot be enabled\n");
 		return;
 	}
+
 	dev_priv->psr.sink_support = true;
 	dev_priv->psr.sink_sync_latency =
 		intel_dp_get_sink_sync_latency(intel_dp);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index c5eb26a7ee79..fbeaec3994e7 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -69,19 +69,28 @@ unsigned int intel_ring_update_space(struct intel_ring *ring)
 static int
 gen2_render_ring_flush(struct i915_request *rq, u32 mode)
 {
+	unsigned int num_store_dw;
 	u32 cmd, *cs;
 
 	cmd = MI_FLUSH;
-
+	num_store_dw = 0;
 	if (mode & EMIT_INVALIDATE)
 		cmd |= MI_READ_FLUSH;
+	if (mode & EMIT_FLUSH)
+		num_store_dw = 4;
 
-	cs = intel_ring_begin(rq, 2);
+	cs = intel_ring_begin(rq, 2 + 3 * num_store_dw);
 	if (IS_ERR(cs))
 		return PTR_ERR(cs);
 
 	*cs++ = cmd;
-	*cs++ = MI_NOOP;
+	while (num_store_dw--) {
+		*cs++ = MI_STORE_DWORD_IMM | MI_MEM_VIRTUAL;
+		*cs++ = i915_scratch_offset(rq->i915);
+		*cs++ = 0;
+	}
+	*cs++ = MI_FLUSH | MI_NO_WRITE_FLUSH;
+
 	intel_ring_advance(rq, cs);
 
 	return 0;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 69fe86b30fbb..a9ed0ecc94e2 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -170,7 +170,7 @@ static int igt_ppgtt_alloc(void *arg)
 	 * This should ensure that we do not run into the oomkiller during
 	 * the test and take down the machine wilfully.
 	 */
-	limit = totalram_pages << PAGE_SHIFT;
+	limit = totalram_pages() << PAGE_SHIFT;
 	limit = min(ppgtt->vm.total, limit);
 
 	/* Check we can allocate the entire range */
@@ -1244,7 +1244,7 @@ static int exercise_mock(struct drm_i915_private *i915,
 				     u64 hole_start, u64 hole_end,
 				     unsigned long end_time))
 {
-	const u64 limit = totalram_pages << PAGE_SHIFT;
+	const u64 limit = totalram_pages() << PAGE_SHIFT;
 	struct i915_gem_context *ctx;
 	struct i915_hw_ppgtt *ppgtt;
 	IGT_TIMEOUT(end_time);
diff --git a/drivers/gpu/drm/mediatek/mtk_dsi.c b/drivers/gpu/drm/mediatek/mtk_dsi.c
index 66df1b177959..27b507eb4a99 100644
--- a/drivers/gpu/drm/mediatek/mtk_dsi.c
+++ b/drivers/gpu/drm/mediatek/mtk_dsi.c
@@ -818,10 +818,13 @@ static int mtk_dsi_create_conn_enc(struct drm_device *drm, struct mtk_dsi *dsi)
 	dsi->encoder.possible_crtcs = 1;
 
 	/* If there's a bridge, attach to it and let it create the connector */
-	ret = drm_bridge_attach(&dsi->encoder, dsi->bridge, NULL);
-	if (ret) {
-		DRM_ERROR("Failed to attach bridge to drm\n");
-
+	if (dsi->bridge) {
+		ret = drm_bridge_attach(&dsi->encoder, dsi->bridge, NULL);
+		if (ret) {
+			DRM_ERROR("Failed to attach bridge to drm\n");
+			goto err_encoder_cleanup;
+		}
+	} else {
 		/* Otherwise create our own connector and attach to a panel */
 		ret = mtk_dsi_create_connector(drm, dsi);
 		if (ret)
diff --git a/drivers/gpu/drm/meson/meson_crtc.c b/drivers/gpu/drm/meson/meson_crtc.c
index d78168f979db..4f5c67f70c4d 100644
--- a/drivers/gpu/drm/meson/meson_crtc.c
+++ b/drivers/gpu/drm/meson/meson_crtc.c
@@ -106,6 +106,8 @@ static void meson_crtc_atomic_enable(struct drm_crtc *crtc,
 	writel_bits_relaxed(VPP_POSTBLEND_ENABLE, VPP_POSTBLEND_ENABLE,
 			    priv->io_base + _REG(VPP_MISC));
 
+	drm_crtc_vblank_on(crtc);
+
 	priv->viu.osd1_enabled = true;
 }
 
@@ -117,6 +119,8 @@ static void meson_crtc_atomic_disable(struct drm_crtc *crtc,
 
 	DRM_DEBUG_DRIVER("\n");
 
+	drm_crtc_vblank_off(crtc);
+
 	priv->viu.osd1_enabled = false;
 	priv->viu.osd1_commit = false;
 
diff --git a/drivers/gpu/drm/meson/meson_drv.c b/drivers/gpu/drm/meson/meson_drv.c
index 3ee4d4a4ecba..12ff47b13668 100644
--- a/drivers/gpu/drm/meson/meson_drv.c
+++ b/drivers/gpu/drm/meson/meson_drv.c
@@ -75,6 +75,10 @@ static const struct drm_mode_config_funcs meson_mode_config_funcs = {
 	.fb_create           = drm_gem_fb_create,
 };
 
+static const struct drm_mode_config_helper_funcs meson_mode_config_helpers = {
+	.atomic_commit_tail = drm_atomic_helper_commit_tail_rpm,
+};
+
 static irqreturn_t meson_irq(int irq, void *arg)
 {
 	struct drm_device *dev = arg;
@@ -266,6 +270,7 @@ static int meson_drv_bind_master(struct device *dev, bool has_components)
 	drm->mode_config.max_width = 3840;
 	drm->mode_config.max_height = 2160;
 	drm->mode_config.funcs = &meson_mode_config_funcs;
+	drm->mode_config.helper_private	= &meson_mode_config_helpers;
 
 	/* Hardware Initialization */
 
@@ -388,8 +393,10 @@ static int meson_probe_remote(struct platform_device *pdev,
 		remote_node = of_graph_get_remote_port_parent(ep);
 		if (!remote_node ||
 		    remote_node == parent || /* Ignore parent endpoint */
-		    !of_device_is_available(remote_node))
+		    !of_device_is_available(remote_node)) {
+			of_node_put(remote_node);
 			continue;
+		}
 
 		count += meson_probe_remote(pdev, match, remote, remote_node);
 
@@ -408,10 +415,13 @@ static int meson_drv_probe(struct platform_device *pdev)
 
 	for_each_endpoint_of_node(np, ep) {
 		remote = of_graph_get_remote_port_parent(ep);
-		if (!remote || !of_device_is_available(remote))
+		if (!remote || !of_device_is_available(remote)) {
+			of_node_put(remote);
 			continue;
+		}
 
 		count += meson_probe_remote(pdev, &match, np, remote);
+		of_node_put(remote);
 	}
 
 	if (count && !match)
diff --git a/drivers/gpu/drm/meson/meson_dw_hdmi.c b/drivers/gpu/drm/meson/meson_dw_hdmi.c
index d8c5cc34e22e..807111ebfdd9 100644
--- a/drivers/gpu/drm/meson/meson_dw_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_dw_hdmi.c
@@ -696,6 +696,7 @@ static const struct regmap_config meson_dw_hdmi_regmap_config = {
 	.reg_read = meson_dw_hdmi_reg_read,
 	.reg_write = meson_dw_hdmi_reg_write,
 	.max_register = 0x10000,
+	.fast_io = true,
 };
 
 static bool meson_hdmi_connector_is_available(struct device *dev)
diff --git a/drivers/gpu/drm/meson/meson_venc.c b/drivers/gpu/drm/meson/meson_venc.c
index e95e0e7a7fa1..0ba04f6813e6 100644
--- a/drivers/gpu/drm/meson/meson_venc.c
+++ b/drivers/gpu/drm/meson/meson_venc.c
@@ -71,6 +71,7 @@
  */
 
 /* HHI Registers */
+#define HHI_GCLK_MPEG2		0x148 /* 0x52 offset in data sheet */
 #define HHI_VDAC_CNTL0		0x2F4 /* 0xbd offset in data sheet */
 #define HHI_VDAC_CNTL1		0x2F8 /* 0xbe offset in data sheet */
 #define HHI_HDMI_PHY_CNTL0	0x3a0 /* 0xe8 offset in data sheet */
@@ -840,6 +841,7 @@ struct meson_hdmi_venc_vic_mode {
 	{ 5, &meson_hdmi_encp_mode_1080i60 },
 	{ 20, &meson_hdmi_encp_mode_1080i50 },
 	{ 32, &meson_hdmi_encp_mode_1080p24 },
+	{ 33, &meson_hdmi_encp_mode_1080p50 },
 	{ 34, &meson_hdmi_encp_mode_1080p30 },
 	{ 31, &meson_hdmi_encp_mode_1080p50 },
 	{ 16, &meson_hdmi_encp_mode_1080p60 },
@@ -1659,10 +1661,12 @@ unsigned int meson_venci_get_field(struct meson_drm *priv)
 void meson_venc_enable_vsync(struct meson_drm *priv)
 {
 	writel_relaxed(2, priv->io_base + _REG(VENC_INTCTRL));
+	regmap_update_bits(priv->hhi, HHI_GCLK_MPEG2, BIT(25), BIT(25));
 }
 
 void meson_venc_disable_vsync(struct meson_drm *priv)
 {
+	regmap_update_bits(priv->hhi, HHI_GCLK_MPEG2, BIT(25), 0);
 	writel_relaxed(0, priv->io_base + _REG(VENC_INTCTRL));
 }
 
diff --git a/drivers/gpu/drm/meson/meson_viu.c b/drivers/gpu/drm/meson/meson_viu.c
index 0ba87ff95530..e46e05f50bad 100644
--- a/drivers/gpu/drm/meson/meson_viu.c
+++ b/drivers/gpu/drm/meson/meson_viu.c
@@ -184,18 +184,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
 	if (lut_sel == VIU_LUT_OSD_OETF) {
 		writel(0, priv->io_base + _REG(addr_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
 		writel(r_map[OSD_OETF_LUT_SIZE - 1] | (g_map[0] << 16),
 			priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16),
 				priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_OETF_LUT_SIZE / 2); i++)
 			writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
@@ -211,18 +211,18 @@ void meson_viu_set_osd_lut(struct meson_drm *priv, enum viu_lut_sel_e lut_sel,
 	} else if (lut_sel == VIU_LUT_OSD_EOTF) {
 		writel(0, priv->io_base + _REG(addr_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(r_map[i * 2] | (r_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
 		writel(r_map[OSD_EOTF_LUT_SIZE - 1] | (g_map[0] << 16),
 			priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(g_map[i * 2 + 1] | (g_map[i * 2 + 2] << 16),
 				priv->io_base + _REG(data_port));
 
-		for (i = 0; i < 20; i++)
+		for (i = 0; i < (OSD_EOTF_LUT_SIZE / 2); i++)
 			writel(b_map[i * 2] | (b_map[i * 2 + 1] << 16),
 				priv->io_base + _REG(data_port));
 
diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
index e942316b3ba7..ce1b3cc4bf6d 100644
--- a/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
+++ b/drivers/gpu/drm/msm/adreno/a6xx_gmu.c
@@ -927,26 +927,6 @@ static int a6xx_gmu_memory_probe(struct a6xx_gmu *gmu)
 	return ret;
 }
 
-/* Get the list of RPMh voltage levels from cmd-db */
-static int a6xx_gmu_rpmh_arc_cmds(const char *id, void *vals, int size)
-{
-	u32 len = cmd_db_read_aux_data_len(id);
-
-	if (!len)
-		return 0;
-
-	if (WARN_ON(len > size))
-		return -EINVAL;
-
-	cmd_db_read_aux_data(id, vals, len);
-
-	/*
-	 * The data comes back as an array of unsigned shorts so adjust the
-	 * count accordingly
-	 */
-	return len >> 1;
-}
-
 /* Return the 'arc-level' for the given frequency */
 static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq)
 {
@@ -974,11 +954,30 @@ static u32 a6xx_gmu_get_arc_level(struct device *dev, unsigned long freq)
 }
 
 static int a6xx_gmu_rpmh_arc_votes_init(struct device *dev, u32 *votes,
-		unsigned long *freqs, int freqs_count,
-		u16 *pri, int pri_count,
-		u16 *sec, int sec_count)
+		unsigned long *freqs, int freqs_count, const char *id)
 {
 	int i, j;
+	const u16 *pri, *sec;
+	size_t pri_count, sec_count;
+
+	pri = cmd_db_read_aux_data(id, &pri_count);
+	if (IS_ERR(pri))
+		return PTR_ERR(pri);
+	/*
+	 * The data comes back as an array of unsigned shorts so adjust the
+	 * count accordingly
+	 */
+	pri_count >>= 1;
+	if (!pri_count)
+		return -EINVAL;
+
+	sec = cmd_db_read_aux_data("mx.lvl", &sec_count);
+	if (IS_ERR(sec))
+		return PTR_ERR(sec);
+
+	sec_count >>= 1;
+	if (!sec_count)
+		return -EINVAL;
 
 	/* Construct a vote for each frequency */
 	for (i = 0; i < freqs_count; i++) {
@@ -1037,25 +1036,15 @@ static int a6xx_gmu_rpmh_votes_init(struct a6xx_gmu *gmu)
 	struct a6xx_gpu *a6xx_gpu = container_of(gmu, struct a6xx_gpu, gmu);
 	struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
 	struct msm_gpu *gpu = &adreno_gpu->base;
-
-	u16 gx[16], cx[16], mx[16];
-	u32 gxcount, cxcount, mxcount;
 	int ret;
 
-	/* Get the list of available voltage levels for each component */
-	gxcount = a6xx_gmu_rpmh_arc_cmds("gfx.lvl", gx, sizeof(gx));
-	cxcount = a6xx_gmu_rpmh_arc_cmds("cx.lvl", cx, sizeof(cx));
-	mxcount = a6xx_gmu_rpmh_arc_cmds("mx.lvl", mx, sizeof(mx));
-
 	/* Build the GX votes */
 	ret = a6xx_gmu_rpmh_arc_votes_init(&gpu->pdev->dev, gmu->gx_arc_votes,
-		gmu->gpu_freqs, gmu->nr_gpu_freqs,
-		gx, gxcount, mx, mxcount);
+		gmu->gpu_freqs, gmu->nr_gpu_freqs, "gfx.lvl");
 
 	/* Build the CX votes */
 	ret |= a6xx_gmu_rpmh_arc_votes_init(gmu->dev, gmu->cx_arc_votes,
-		gmu->gmu_freqs, gmu->nr_gmu_freqs,
-		cx, cxcount, mx, mxcount);
+		gmu->gmu_freqs, gmu->nr_gmu_freqs, "cx.lvl");
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
index a6f0c38a0a95..9be7c355debd 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_crtc.c
@@ -1421,7 +1421,6 @@ struct drm_crtc *dpu_crtc_init(struct drm_device *dev, struct drm_plane *plane,
 				NULL);
 
 	drm_crtc_helper_add(crtc, &dpu_crtc_helper_funcs);
-	plane->crtc = crtc;
 
 	/* save user friendly CRTC name for later */
 	snprintf(dpu_crtc->name, DPU_CRTC_NAME_SIZE, "crtc%u", crtc->base.id);
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
index 0dda4a603685..36158b7d99cd 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_encoder.c
@@ -459,8 +459,6 @@ static void dpu_encoder_destroy(struct drm_encoder *drm_enc)
 
 	drm_encoder_cleanup(drm_enc);
 	mutex_destroy(&dpu_enc->enc_lock);
-
-	kfree(dpu_enc);
 }
 
 void dpu_encoder_helper_split_config(
diff --git a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
index 0aa9b8e1ae70..0874f0a53bf9 100644
--- a/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
+++ b/drivers/gpu/drm/msm/disp/dpu1/dpu_formats.c
@@ -216,7 +216,7 @@ static const struct dpu_format dpu_format_map[] = {
 	INTERLEAVED_RGB_FMT(XBGR8888,
 		COLOR_8BIT, COLOR_8BIT, COLOR_8BIT, COLOR_8BIT,
 		C2_R_Cr, C0_G_Y, C1_B_Cb, C3_ALPHA, 4,
-		true, 4, 0,
+		false, 4, 0,
 		DPU_FETCH_LINEAR, 1),
 
 	INTERLEAVED_RGB_FMT(RGBA8888,
diff --git a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
index e41f278e4e62..aabab6311043 100644
--- a/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
+++ b/drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
@@ -39,6 +39,8 @@
 #define DSI_PIXEL_PLL_CLK		1
 #define NUM_PROVIDED_CLKS		2
 
+#define VCO_REF_CLK_RATE		19200000
+
 struct dsi_pll_regs {
 	u32 pll_prop_gain_rate;
 	u32 pll_lockdet_rate;
@@ -316,7 +318,7 @@ static int dsi_pll_10nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
 	    parent_rate);
 
 	pll_10nm->vco_current_rate = rate;
-	pll_10nm->vco_ref_clk_rate = parent_rate;
+	pll_10nm->vco_ref_clk_rate = VCO_REF_CLK_RATE;
 
 	dsi_pll_setup_config(pll_10nm);
 
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.c b/drivers/gpu/drm/msm/hdmi/hdmi.c
index f0725761b327..e247d6942a49 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.c
@@ -332,6 +332,12 @@ int msm_hdmi_modeset_init(struct hdmi *hdmi,
 		goto fail;
 	}
 
+	ret = msm_hdmi_hpd_enable(hdmi->connector);
+	if (ret < 0) {
+		DRM_DEV_ERROR(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret);
+		goto fail;
+	}
+
 	encoder->bridge = hdmi->bridge;
 
 	priv->bridges[priv->num_bridges++]       = hdmi->bridge;
@@ -571,7 +577,7 @@ static int msm_hdmi_bind(struct device *dev, struct device *master, void *data)
 {
 	struct drm_device *drm = dev_get_drvdata(master);
 	struct msm_drm_private *priv = drm->dev_private;
-	static struct hdmi_platform_config *hdmi_cfg;
+	struct hdmi_platform_config *hdmi_cfg;
 	struct hdmi *hdmi;
 	struct device_node *of_node = dev->of_node;
 	int i, err;
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi.h b/drivers/gpu/drm/msm/hdmi/hdmi.h
index accc9a61611d..5c5df6ab2a57 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi.h
+++ b/drivers/gpu/drm/msm/hdmi/hdmi.h
@@ -245,6 +245,7 @@ void msm_hdmi_bridge_destroy(struct drm_bridge *bridge);
 
 void msm_hdmi_connector_irq(struct drm_connector *connector);
 struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi);
+int msm_hdmi_hpd_enable(struct drm_connector *connector);
 
 /*
  * i2c adapter for ddc:
diff --git a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
index 99f2f1038258..a6eeab2c4dc3 100644
--- a/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
+++ b/drivers/gpu/drm/msm/hdmi/hdmi_connector.c
@@ -167,8 +167,9 @@ static void enable_hpd_clocks(struct hdmi *hdmi, bool enable)
 	}
 }
 
-static int hpd_enable(struct hdmi_connector *hdmi_connector)
+int msm_hdmi_hpd_enable(struct drm_connector *connector)
 {
+	struct hdmi_connector *hdmi_connector = to_hdmi_connector(connector);
 	struct hdmi *hdmi = hdmi_connector->hdmi;
 	const struct hdmi_platform_config *config = hdmi->config;
 	struct device *dev = &hdmi->pdev->dev;
@@ -450,7 +451,6 @@ struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
 {
 	struct drm_connector *connector = NULL;
 	struct hdmi_connector *hdmi_connector;
-	int ret;
 
 	hdmi_connector = kzalloc(sizeof(*hdmi_connector), GFP_KERNEL);
 	if (!hdmi_connector)
@@ -471,12 +471,6 @@ struct drm_connector *msm_hdmi_connector_init(struct hdmi *hdmi)
 	connector->interlace_allowed = 0;
 	connector->doublescan_allowed = 0;
 
-	ret = hpd_enable(hdmi_connector);
-	if (ret) {
-		dev_err(&hdmi->pdev->dev, "failed to enable HPD: %d\n", ret);
-		return ERR_PTR(ret);
-	}
-
 	drm_connector_attach_encoder(connector, hdmi->encoder);
 
 	return connector;
diff --git a/drivers/gpu/drm/msm/msm_atomic.c b/drivers/gpu/drm/msm/msm_atomic.c
index ff291dd0744b..f5b1256e32b6 100644
--- a/drivers/gpu/drm/msm/msm_atomic.c
+++ b/drivers/gpu/drm/msm/msm_atomic.c
@@ -34,7 +34,12 @@ static void msm_atomic_wait_for_commit_done(struct drm_device *dev,
 		if (!new_crtc_state->active)
 			continue;
 
+		if (drm_crtc_vblank_get(crtc))
+			continue;
+
 		kms->funcs->wait_for_crtc_commit_done(kms, crtc);
+
+		drm_crtc_vblank_put(crtc);
 	}
 }
 
diff --git a/drivers/gpu/drm/msm/msm_debugfs.c b/drivers/gpu/drm/msm/msm_debugfs.c
index 42a2cba78998..fb423d309e91 100644
--- a/drivers/gpu/drm/msm/msm_debugfs.c
+++ b/drivers/gpu/drm/msm/msm_debugfs.c
@@ -84,7 +84,7 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
 	if (ret)
-		return ret;
+		goto free_priv;
 
 	pm_runtime_get_sync(&gpu->pdev->dev);
 	show_priv->state = gpu->funcs->gpu_state_get(gpu);
@@ -94,13 +94,20 @@ static int msm_gpu_open(struct inode *inode, struct file *file)
 
 	if (IS_ERR(show_priv->state)) {
 		ret = PTR_ERR(show_priv->state);
-		kfree(show_priv);
-		return ret;
+		goto free_priv;
 	}
 
 	show_priv->dev = dev;
 
-	return single_open(file, msm_gpu_show, show_priv);
+	ret = single_open(file, msm_gpu_show, show_priv);
+	if (ret)
+		goto free_priv;
+
+	return 0;
+
+free_priv:
+	kfree(show_priv);
+	return ret;
 }
 
 static const struct file_operations msm_gpu_fops = {
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index 6265be8f4119..d2cdc7b553fe 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -563,17 +563,18 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 			kthread_run(kthread_worker_fn,
 				&priv->disp_thread[i].worker,
 				"crtc_commit:%d", priv->disp_thread[i].crtc_id);
-		ret = sched_setscheduler(priv->disp_thread[i].thread,
-							SCHED_FIFO, &param);
-		if (ret)
-			pr_warn("display thread priority update failed: %d\n",
-									ret);
-
 		if (IS_ERR(priv->disp_thread[i].thread)) {
 			DRM_DEV_ERROR(dev, "failed to create crtc_commit kthread\n");
 			priv->disp_thread[i].thread = NULL;
+			goto err_msm_uninit;
 		}
 
+		ret = sched_setscheduler(priv->disp_thread[i].thread,
+					 SCHED_FIFO, &param);
+		if (ret)
+			dev_warn(dev, "disp_thread set priority failed: %d\n",
+				 ret);
+
 		/* initialize event thread */
 		priv->event_thread[i].crtc_id = priv->crtcs[i]->base.id;
 		kthread_init_worker(&priv->event_thread[i].worker);
@@ -582,6 +583,11 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 			kthread_run(kthread_worker_fn,
 				&priv->event_thread[i].worker,
 				"crtc_event:%d", priv->event_thread[i].crtc_id);
+		if (IS_ERR(priv->event_thread[i].thread)) {
+			DRM_DEV_ERROR(dev, "failed to create crtc_event kthread\n");
+			priv->event_thread[i].thread = NULL;
+			goto err_msm_uninit;
+		}
 
 		/**
 		 * event thread should also run at same priority as disp_thread
@@ -591,34 +597,10 @@ static int msm_drm_init(struct device *dev, struct drm_driver *drv)
 		 * failure at crtc commit level.
 		 */
 		ret = sched_setscheduler(priv->event_thread[i].thread,
-							SCHED_FIFO, &param);
+					 SCHED_FIFO, &param);
 		if (ret)
-			pr_warn("display event thread priority update failed: %d\n",
-									ret);
-
-		if (IS_ERR(priv->event_thread[i].thread)) {
-			dev_err(dev, "failed to create crtc_event kthread\n");
-			priv->event_thread[i].thread = NULL;
-		}
-
-		if ((!priv->disp_thread[i].thread) ||
-				!priv->event_thread[i].thread) {
-			/* clean up previously created threads if any */
-			for ( ; i >= 0; i--) {
-				if (priv->disp_thread[i].thread) {
-					kthread_stop(
-						priv->disp_thread[i].thread);
-					priv->disp_thread[i].thread = NULL;
-				}
-
-				if (priv->event_thread[i].thread) {
-					kthread_stop(
-						priv->event_thread[i].thread);
-					priv->event_thread[i].thread = NULL;
-				}
-			}
-			goto err_msm_uninit;
-		}
+			dev_warn(dev, "event_thread set priority failed:%d\n",
+				 ret);
 	}
 
 	ret = drm_vblank_init(ddev, priv->num_crtcs);
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 3cbed4acb0f4..12b983fc0b56 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -77,7 +77,7 @@ void msm_gem_submit_free(struct msm_gem_submit *submit)
 static inline unsigned long __must_check
 copy_from_user_inatomic(void *to, const void __user *from, unsigned long n)
 {
-	if (access_ok(VERIFY_READ, from, n))
+	if (access_ok(from, n))
 		return __copy_from_user_inatomic(to, from, n);
 	return -EFAULT;
 }
@@ -321,6 +321,9 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 	uint32_t *ptr;
 	int ret = 0;
 
+	if (!nr_relocs)
+		return 0;
+
 	if (offset % 4) {
 		DRM_ERROR("non-aligned cmdstream buffer: %u\n", offset);
 		return -EINVAL;
@@ -415,7 +418,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	struct msm_file_private *ctx = file->driver_priv;
 	struct msm_gem_submit *submit;
 	struct msm_gpu *gpu = priv->gpu;
-	struct dma_fence *in_fence = NULL;
 	struct sync_file *sync_file = NULL;
 	struct msm_gpu_submitqueue *queue;
 	struct msm_ringbuffer *ring;
@@ -453,6 +455,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		args->nr_bos, args->nr_cmds);
 
 	if (args->flags & MSM_SUBMIT_FENCE_FD_IN) {
+		struct dma_fence *in_fence;
+
 		in_fence = sync_file_get_fence(args->fence_fd);
 
 		if (!in_fence)
@@ -462,11 +466,13 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 		 * Wait if the fence is from a foreign context, or if the fence
 		 * array contains any fence from a foreign context.
 		 */
-		if (!dma_fence_match_context(in_fence, ring->fctx->context)) {
+		ret = 0;
+		if (!dma_fence_match_context(in_fence, ring->fctx->context))
 			ret = dma_fence_wait(in_fence, true);
-			if (ret)
-				return ret;
-		}
+
+		dma_fence_put(in_fence);
+		if (ret)
+			return ret;
 	}
 
 	ret = mutex_lock_interruptible(&dev->struct_mutex);
@@ -595,8 +601,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	}
 
 out:
-	if (in_fence)
-		dma_fence_put(in_fence);
 	submit_cleanup(submit);
 	if (ret)
 		msm_gem_submit_free(submit);
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index abefba5b14e7..10babd18e286 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -347,6 +347,10 @@ static void msm_gpu_crashstate_capture(struct msm_gpu *gpu,
 {
 	struct msm_gpu_state *state;
 
+	/* Check if the target supports capturing crash state */
+	if (!gpu->funcs->gpu_state_get)
+		return;
+
 	/* Only save one crash state at a time */
 	if (gpu->crashstate)
 		return;
@@ -439,10 +443,9 @@ static void recover_worker(struct work_struct *work)
 	if (submit) {
 		struct task_struct *task;
 
-		rcu_read_lock();
-		task = pid_task(submit->pid, PIDTYPE_PID);
+		task = get_pid_task(submit->pid, PIDTYPE_PID);
 		if (task) {
-			comm = kstrdup(task->comm, GFP_ATOMIC);
+			comm = kstrdup(task->comm, GFP_KERNEL);
 
 			/*
 			 * So slightly annoying, in other paths like
@@ -455,10 +458,10 @@ static void recover_worker(struct work_struct *work)
 			 * about the submit going away.
 			 */
 			mutex_unlock(&dev->struct_mutex);
-			cmd = kstrdup_quotable_cmdline(task, GFP_ATOMIC);
+			cmd = kstrdup_quotable_cmdline(task, GFP_KERNEL);
+			put_task_struct(task);
 			mutex_lock(&dev->struct_mutex);
 		}
-		rcu_read_unlock();
 
 		if (comm && cmd) {
 			DRM_DEV_ERROR(dev->dev, "%s: offending task: %s (%s)\n",
diff --git a/drivers/gpu/drm/msm/msm_iommu.c b/drivers/gpu/drm/msm/msm_iommu.c
index 9c313cb129ee..4d62790cd425 100644
--- a/drivers/gpu/drm/msm/msm_iommu.c
+++ b/drivers/gpu/drm/msm/msm_iommu.c
@@ -66,7 +66,7 @@ static int msm_iommu_map(struct msm_mmu *mmu, uint64_t iova,
 //	pm_runtime_get_sync(mmu->dev);
 	ret = iommu_map_sg(iommu->domain, iova, sgt->sgl, sgt->nents, prot);
 //	pm_runtime_put_sync(mmu->dev);
-	WARN_ON(ret < 0);
+	WARN_ON(!ret);
 
 	return (ret == len) ? 0 : -EINVAL;
 }
diff --git a/drivers/gpu/drm/msm/msm_rd.c b/drivers/gpu/drm/msm/msm_rd.c
index d990b5f5154c..d21172933d92 100644
--- a/drivers/gpu/drm/msm/msm_rd.c
+++ b/drivers/gpu/drm/msm/msm_rd.c
@@ -321,10 +321,11 @@ static void snapshot_buf(struct msm_rd_state *rd,
 		uint64_t iova, uint32_t size)
 {
 	struct msm_gem_object *obj = submit->bos[idx].obj;
+	unsigned offset = 0;
 	const char *buf;
 
 	if (iova) {
-		buf += iova - submit->bos[idx].iova;
+		offset = iova - submit->bos[idx].iova;
 	} else {
 		iova = submit->bos[idx].iova;
 		size = obj->base.size;
@@ -345,6 +346,8 @@ static void snapshot_buf(struct msm_rd_state *rd,
 	if (IS_ERR(buf))
 		return;
 
+	buf += offset;
+
 	rd_write_section(rd, RD_BUFFER_CONTENTS, buf, size);
 
 	msm_gem_put_vaddr(&obj->base);
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 4b75ad40dd80..432c440223bb 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -4,7 +4,8 @@ config DRM_NOUVEAU
         select FW_LOADER
 	select DRM_KMS_HELPER
 	select DRM_TTM
-	select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT
+	select BACKLIGHT_CLASS_DEVICE if DRM_NOUVEAU_BACKLIGHT
+	select BACKLIGHT_LCD_SUPPORT if DRM_NOUVEAU_BACKLIGHT
 	select ACPI_VIDEO if ACPI && X86 && BACKLIGHT_CLASS_DEVICE && INPUT
 	select X86_PLATFORM_DEVICES if ACPI && X86
 	select ACPI_WMI if ACPI && X86
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 70dce544984e..1727d399833c 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -67,7 +67,7 @@ nv04_display_create(struct drm_device *dev)
 	for (i = 0; i < dcb->entries; i++) {
 		struct dcb_output *dcbent = &dcb->entry[i];
 
-		connector = nouveau_connector_create(dev, dcbent->connector);
+		connector = nouveau_connector_create(dev, dcbent);
 		if (IS_ERR(connector))
 			continue;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/Kbuild b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
index 849b0f45afb8..3d074aa31173 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/Kbuild
+++ b/drivers/gpu/drm/nouveau/dispnv50/Kbuild
@@ -7,6 +7,7 @@ nouveau-y += dispnv50/core827d.o
 nouveau-y += dispnv50/core907d.o
 nouveau-y += dispnv50/core917d.o
 nouveau-y += dispnv50/corec37d.o
+nouveau-y += dispnv50/corec57d.o
 
 nouveau-y += dispnv50/dac507d.o
 nouveau-y += dispnv50/dac907d.o
@@ -23,12 +24,14 @@ nouveau-y += dispnv50/head827d.o
 nouveau-y += dispnv50/head907d.o
 nouveau-y += dispnv50/head917d.o
 nouveau-y += dispnv50/headc37d.o
+nouveau-y += dispnv50/headc57d.o
 
 nouveau-y += dispnv50/wimm.o
 nouveau-y += dispnv50/wimmc37b.o
 
 nouveau-y += dispnv50/wndw.o
 nouveau-y += dispnv50/wndwc37e.o
+nouveau-y += dispnv50/wndwc57e.o
 
 nouveau-y += dispnv50/base.o
 nouveau-y += dispnv50/base507c.o
diff --git a/drivers/gpu/drm/nouveau/dispnv50/atom.h b/drivers/gpu/drm/nouveau/dispnv50/atom.h
index 908feb1fc60f..a194990d2b0d 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/atom.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/atom.h
@@ -54,9 +54,10 @@ struct nv50_head_atom {
 		u64 offset:40;
 		u8 buffer:1;
 		u8 mode:4;
-		u8 size:2;
+		u16 size:11;
 		u8 range:2;
 		u8 output_mode:2;
+		void (*load)(struct drm_color_lut *, int size, void __iomem *);
 	} olut;
 
 	struct {
@@ -169,9 +170,11 @@ struct nv50_wndw_atom {
 			u8  buffer:1;
 			u8  enable:2;
 			u8  mode:4;
-			u8  size:2;
+			u16 size:11;
 			u8  range:2;
 			u8  output_mode:2;
+			void (*load)(struct drm_color_lut *, int size,
+				     void __iomem *);
 		} i;
 	} xlut;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/base907c.c b/drivers/gpu/drm/nouveau/dispnv50/base907c.c
index a562fc94ce59..049ce6da321c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/base907c.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/base907c.c
@@ -80,6 +80,7 @@ base907c_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	asyw->xlut.i.mode = 7;
 	asyw->xlut.i.enable = 2;
+	asyw->xlut.i.load = head907d_olut_load;
 }
 
 const struct nv50_wndw_func
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.c b/drivers/gpu/drm/nouveau/dispnv50/core.c
index f3c49adb1bdb..c25e0ebe3c92 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.c
@@ -42,6 +42,7 @@ nv50_core_new(struct nouveau_drm *drm, struct nv50_core **pcore)
 		int version;
 		int (*new)(struct nouveau_drm *, s32, struct nv50_core **);
 	} cores[] = {
+		{ TU104_DISP_CORE_CHANNEL_DMA, 0, corec57d_new },
 		{ GV100_DISP_CORE_CHANNEL_DMA, 0, corec37d_new },
 		{ GP102_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
 		{ GP100_DISP_CORE_CHANNEL_DMA, 0, core917d_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/core.h b/drivers/gpu/drm/nouveau/dispnv50/core.h
index 8470df9dd13d..df8336b593f7 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/core.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/core.h
@@ -46,5 +46,9 @@ extern const struct nv50_outp_func sor907d;
 int core917d_new(struct nouveau_drm *, s32, struct nv50_core **);
 
 int corec37d_new(struct nouveau_drm *, s32, struct nv50_core **);
+int corec37d_ntfy_wait_done(struct nouveau_bo *, u32, struct nvif_device *);
+void corec37d_update(struct nv50_core *, u32 *, bool);
 extern const struct nv50_outp_func sorc37d;
+
+int corec57d_new(struct nouveau_drm *, s32, struct nv50_core **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
index b5c17c948918..7860774b65bc 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/corec37d.c
@@ -24,7 +24,7 @@
 
 #include <nouveau_bo.h>
 
-static void
+void
 corec37d_update(struct nv50_core *core, u32 *interlock, bool ntfy)
 {
 	u32 *push;
@@ -71,7 +71,7 @@ corec37d_ntfy_init(struct nouveau_bo *bo, u32 offset)
 	nouveau_bo_wr32(bo, offset / 4 + 3, 0x00000000);
 }
 
-void
+static void
 corec37d_init(struct nv50_core *core)
 {
 	const u32 windows = 8; /*XXX*/
diff --git a/drivers/gpu/drm/nouveau/dispnv50/corec57d.c b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c
new file mode 100644
index 000000000000..b606d68cda10
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/corec57d.c
@@ -0,0 +1,61 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "core.h"
+#include "head.h"
+
+static void
+corec57d_init(struct nv50_core *core)
+{
+	const u32 windows = 8; /*XXX*/
+	u32 *push, i;
+	if ((push = evo_wait(&core->chan, 2 + 6 * windows + 2))) {
+		evo_mthd(push, 0x0208, 1);
+		evo_data(push, core->chan.sync.handle);
+		for (i = 0; i < windows; i++) {
+			evo_mthd(push, 0x1000 + (i * 0x080), 3);
+			evo_data(push, i >> 1);
+			evo_data(push, 0x0000000f);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x1010 + (i * 0x080), 1);
+			evo_data(push, 0x00117fff);
+		}
+		evo_mthd(push, 0x0200, 1);
+		evo_data(push, 0x00000001);
+		evo_kick(push, &core->chan);
+	}
+}
+
+static const struct nv50_core_func
+corec57d = {
+	.init = corec57d_init,
+	.ntfy_init = corec37d_ntfy_init,
+	.ntfy_wait_done = corec37d_ntfy_wait_done,
+	.update = corec37d_update,
+	.head = &headc57d,
+	.sor = &sorc37d,
+};
+
+int
+corec57d_new(struct nouveau_drm *drm, s32 oclass, struct nv50_core **pcore)
+{
+	return core507d_new_(&corec57d, drm, oclass, pcore);
+}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/curs.c b/drivers/gpu/drm/nouveau/dispnv50/curs.c
index f592087338c4..cb6e4d2b1b45 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/curs.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/curs.c
@@ -31,6 +31,7 @@ nv50_curs_new(struct nouveau_drm *drm, int head, struct nv50_wndw **pwndw)
 		int version;
 		int (*new)(struct nouveau_drm *, int, s32, struct nv50_wndw **);
 	} curses[] = {
+		{ TU104_DISP_CURSOR, 0, cursc37a_new },
 		{ GV100_DISP_CURSOR, 0, cursc37a_new },
 		{ GK104_DISP_CURSOR, 0, curs907a_new },
 		{ GF110_DISP_CURSOR, 0, curs907a_new },
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.c b/drivers/gpu/drm/nouveau/dispnv50/disp.c
index 6cbbae3f438b..134701a837c8 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.c
@@ -198,6 +198,22 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 /******************************************************************************
  * EVO channel helpers
  *****************************************************************************/
+static void
+evo_flush(struct nv50_dmac *dmac)
+{
+	/* Push buffer fetches are not coherent with BAR1, we need to ensure
+	 * writes have been flushed right through to VRAM before writing PUT.
+	 */
+	if (dmac->push.type & NVIF_MEM_VRAM) {
+		struct nvif_device *device = dmac->base.device;
+		nvif_wr32(&device->object, 0x070000, 0x00000001);
+		nvif_msec(device, 2000,
+			if (!(nvif_rd32(&device->object, 0x070000) & 0x00000002))
+				break;
+		);
+	}
+}
+
 u32 *
 evo_wait(struct nv50_dmac *evoc, int nr)
 {
@@ -208,6 +224,7 @@ evo_wait(struct nv50_dmac *evoc, int nr)
 	mutex_lock(&dmac->lock);
 	if (put + nr >= (PAGE_SIZE / 4) - 8) {
 		dmac->ptr[put] = 0x20000000;
+		evo_flush(dmac);
 
 		nvif_wr32(&dmac->base.user, 0x0000, 0x00000000);
 		if (nvif_msec(device, 2000,
@@ -230,17 +247,7 @@ evo_kick(u32 *push, struct nv50_dmac *evoc)
 {
 	struct nv50_dmac *dmac = evoc;
 
-	/* Push buffer fetches are not coherent with BAR1, we need to ensure
-	 * writes have been flushed right through to VRAM before writing PUT.
-	 */
-	if (dmac->push.type & NVIF_MEM_VRAM) {
-		struct nvif_device *device = dmac->base.device;
-		nvif_wr32(&device->object, 0x070000, 0x00000001);
-		nvif_msec(device, 2000,
-			if (!(nvif_rd32(&device->object, 0x070000) & 0x00000002))
-				break;
-		);
-	}
+	evo_flush(dmac);
 
 	nvif_wr32(&dmac->base.user, 0x0000, (push - dmac->ptr) << 2);
 	mutex_unlock(&dmac->lock);
@@ -1255,8 +1262,16 @@ nv50_mstm_fini(struct nv50_mstm *mstm)
 static void
 nv50_mstm_init(struct nv50_mstm *mstm)
 {
-	if (mstm && mstm->mgr.mst_state)
-		drm_dp_mst_topology_mgr_resume(&mstm->mgr);
+	int ret;
+
+	if (!mstm || !mstm->mgr.mst_state)
+		return;
+
+	ret = drm_dp_mst_topology_mgr_resume(&mstm->mgr);
+	if (ret == -1) {
+		drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
+		drm_kms_helper_hotplug_event(mstm->mgr.dev);
+	}
 }
 
 static void
@@ -1264,6 +1279,7 @@ nv50_mstm_del(struct nv50_mstm **pmstm)
 {
 	struct nv50_mstm *mstm = *pmstm;
 	if (mstm) {
+		drm_dp_mst_topology_mgr_destroy(&mstm->mgr);
 		kfree(*pmstm);
 		*pmstm = NULL;
 	}
@@ -2293,7 +2309,7 @@ nv50_display_create(struct drm_device *dev)
 
 	/* create encoder/connector objects based on VBIOS DCB table */
 	for (i = 0, dcbe = &dcb->entry[0]; i < dcb->entries; i++, dcbe++) {
-		connector = nouveau_connector_create(dev, dcbe->connector);
+		connector = nouveau_connector_create(dev, dcbe);
 		if (IS_ERR(connector))
 			continue;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/disp.h b/drivers/gpu/drm/nouveau/dispnv50/disp.h
index e48c5eb35b49..2216c58620c2 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/disp.h
@@ -45,6 +45,8 @@ struct nv50_disp_interlock {
 
 void corec37d_ntfy_init(struct nouveau_bo *, u32);
 
+void head907d_olut_load(struct drm_color_lut *, int size, void __iomem *);
+
 struct nv50_chan {
 	struct nvif_object user;
 	struct nvif_device *device;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.c b/drivers/gpu/drm/nouveau/dispnv50/head.c
index 4f57e5379796..ac97ebce5b35 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.c
@@ -50,9 +50,9 @@ nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 	if (asyh->set.core   ) head->func->core_set(head, asyh);
 	if (asyh->set.olut   ) {
 		asyh->olut.offset = nv50_lut_load(&head->olut,
-						  asyh->olut.mode <= 1,
 						  asyh->olut.buffer,
-						  asyh->state.gamma_lut);
+						  asyh->state.gamma_lut,
+						  asyh->olut.load);
 		head->func->olut_set(head, asyh);
 	}
 	if (asyh->set.curs   ) head->func->curs_set(head, asyh);
@@ -210,7 +210,7 @@ nv50_head_atomic_check_lut(struct nv50_head *head,
 		}
 	}
 
-	if (!olut) {
+	if (!olut && !head->func->olut_identity) {
 		asyh->olut.handle = 0;
 		return 0;
 	}
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head.h b/drivers/gpu/drm/nouveau/dispnv50/head.h
index 37b3248c6dae..d1c002f534d4 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/head.h
@@ -21,6 +21,7 @@ struct nv50_head_func {
 	void (*view)(struct nv50_head *, struct nv50_head_atom *);
 	void (*mode)(struct nv50_head *, struct nv50_head_atom *);
 	void (*olut)(struct nv50_head *, struct nv50_head_atom *);
+	bool olut_identity;
 	void (*olut_set)(struct nv50_head *, struct nv50_head_atom *);
 	void (*olut_clr)(struct nv50_head *);
 	void (*core_calc)(struct nv50_head *, struct nv50_head_atom *);
@@ -75,4 +76,14 @@ int head917d_curs_layout(struct nv50_head *, struct nv50_wndw_atom *,
 			 struct nv50_head_atom *);
 
 extern const struct nv50_head_func headc37d;
+void headc37d_view(struct nv50_head *, struct nv50_head_atom *);
+void headc37d_core_set(struct nv50_head *, struct nv50_head_atom *);
+void headc37d_core_clr(struct nv50_head *);
+int headc37d_curs_format(struct nv50_head *, struct nv50_wndw_atom *,
+			 struct nv50_head_atom *);
+void headc37d_curs_set(struct nv50_head *, struct nv50_head_atom *);
+void headc37d_curs_clr(struct nv50_head *);
+void headc37d_dither(struct nv50_head *, struct nv50_head_atom *);
+
+extern const struct nv50_head_func headc57d;
 #endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head507d.c b/drivers/gpu/drm/nouveau/dispnv50/head507d.c
index 51bc5996fd37..7561be5ca707 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head507d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head507d.c
@@ -254,6 +254,23 @@ head507d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 	}
 }
 
+static void
+head507d_olut_load(struct drm_color_lut *in, int size, void __iomem *mem)
+{
+	for (; size--; in++, mem += 8) {
+		writew(drm_color_lut_extract(in->  red, 11) << 3, mem + 0);
+		writew(drm_color_lut_extract(in->green, 11) << 3, mem + 2);
+		writew(drm_color_lut_extract(in-> blue, 11) << 3, mem + 4);
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(readw(mem - 8), mem + 0);
+	writew(readw(mem - 6), mem + 2);
+	writew(readw(mem - 4), mem + 4);
+}
+
 void
 head507d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
@@ -261,6 +278,8 @@ head507d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
 		asyh->olut.mode = 0;
 	else
 		asyh->olut.mode = 1;
+
+	asyh->olut.load = head507d_olut_load;
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/dispnv50/head907d.c b/drivers/gpu/drm/nouveau/dispnv50/head907d.c
index 633907163eb1..c2d09dd97b1f 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/head907d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/head907d.c
@@ -214,9 +214,27 @@ head907d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 }
 
 void
+head907d_olut_load(struct drm_color_lut *in, int size, void __iomem *mem)
+{
+	for (; size--; in++, mem += 8) {
+		writew(drm_color_lut_extract(in->  red, 14) + 0x6000, mem + 0);
+		writew(drm_color_lut_extract(in->green, 14) + 0x6000, mem + 2);
+		writew(drm_color_lut_extract(in-> blue, 14) + 0x6000, mem + 4);
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(readw(mem - 8), mem + 0);
+	writew(readw(mem - 6), mem + 2);
+	writew(readw(mem - 4), mem + 4);
+}
+
+void
 head907d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
 	asyh->olut.mode = 7;
+	asyh->olut.load = head907d_olut_load;
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
index 989c14083066..ef6a99d95a9c 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/headc37d.c
@@ -65,7 +65,7 @@ headc37d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
 	}
 }
 
-static void
+void
 headc37d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
@@ -79,7 +79,7 @@ headc37d_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
 	}
 }
 
-static void
+void
 headc37d_curs_clr(struct nv50_head *head)
 {
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
@@ -93,7 +93,7 @@ headc37d_curs_clr(struct nv50_head *head)
 	}
 }
 
-static void
+void
 headc37d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
@@ -112,7 +112,7 @@ headc37d_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 	}
 }
 
-static int
+int
 headc37d_curs_format(struct nv50_head *head, struct nv50_wndw_atom *asyw,
 		     struct nv50_head_atom *asyh)
 {
@@ -155,6 +155,7 @@ headc37d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
 	asyh->olut.size = 0;
 	asyh->olut.range = 0;
 	asyh->olut.output_mode = 1;
+	asyh->olut.load = head907d_olut_load;
 }
 
 static void
@@ -181,7 +182,7 @@ headc37d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
 	}
 }
 
-static void
+void
 headc37d_view(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
 	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
diff --git a/drivers/gpu/drm/nouveau/dispnv50/headc57d.c b/drivers/gpu/drm/nouveau/dispnv50/headc57d.c
new file mode 100644
index 000000000000..32a7f9e85fb0
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/headc57d.c
@@ -0,0 +1,206 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "head.h"
+#include "atom.h"
+#include "core.h"
+
+static void
+headc57d_or(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		/*XXX: This is a dirty hack until OR depth handling is
+		 *     improved later for deep colour etc.
+		 */
+		switch (asyh->or.depth) {
+		case 6: asyh->or.depth = 5; break;
+		case 5: asyh->or.depth = 4; break;
+		case 2: asyh->or.depth = 1; break;
+		case 0:	asyh->or.depth = 4; break;
+		default:
+			WARN_ON(1);
+			break;
+		}
+
+		evo_mthd(push, 0x2004 + (head->base.index * 0x400), 1);
+		evo_data(push, 0xfc000001 |
+			       asyh->or.depth << 4 |
+			       asyh->or.nvsync << 3 |
+			       asyh->or.nhsync << 2);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc57d_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x2000 + (head->base.index * 0x400), 1);
+#if 0
+		evo_data(push, 0x80000000 |
+			       asyh->procamp.sat.sin << 16 |
+			       asyh->procamp.sat.cos << 4);
+#else
+		evo_data(push, 0);
+#endif
+		evo_kick(push, core);
+	}
+}
+
+void
+headc57d_olut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		evo_mthd(push, 0x2288 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
+}
+
+void
+headc57d_olut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		evo_mthd(push, 0x2280 + (head->base.index * 0x400), 4);
+		evo_data(push, asyh->olut.size << 8 |
+			       asyh->olut.mode << 2 |
+			       asyh->olut.output_mode);
+		evo_data(push, 0xffffffff); /* FP_NORM_SCALE. */
+		evo_data(push, asyh->olut.handle);
+		evo_data(push, asyh->olut.offset >> 8);
+		evo_kick(push, core);
+	}
+}
+
+static void
+headc57d_olut_load_8(struct drm_color_lut *in, int size, void __iomem *mem)
+{
+	memset_io(mem, 0x00, 0x20); /* VSS header. */
+	mem += 0x20;
+
+	while (size--) {
+		u16 r = drm_color_lut_extract(in->  red + 0, 16);
+		u16 g = drm_color_lut_extract(in->green + 0, 16);
+		u16 b = drm_color_lut_extract(in-> blue + 0, 16);
+		u16 ri = 0, gi = 0, bi = 0, i;
+
+		if (in++, size) {
+			ri = (drm_color_lut_extract(in->  red, 16) - r) / 4;
+			gi = (drm_color_lut_extract(in->green, 16) - g) / 4;
+			bi = (drm_color_lut_extract(in-> blue, 16) - b) / 4;
+		}
+
+		for (i = 0; i < 4; i++, mem += 8) {
+			writew(r + ri * i, mem + 0);
+			writew(g + gi * i, mem + 2);
+			writew(b + bi * i, mem + 4);
+		}
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(readw(mem - 8), mem + 0);
+	writew(readw(mem - 6), mem + 2);
+	writew(readw(mem - 4), mem + 4);
+}
+
+static void
+headc57d_olut_load(struct drm_color_lut *in, int size, void __iomem *mem)
+{
+	memset_io(mem, 0x00, 0x20); /* VSS header. */
+	mem += 0x20;
+
+	for (; size--; in++, mem += 0x08) {
+		writew(drm_color_lut_extract(in->  red, 16), mem + 0);
+		writew(drm_color_lut_extract(in->green, 16), mem + 2);
+		writew(drm_color_lut_extract(in-> blue, 16), mem + 4);
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(readw(mem - 8), mem + 0);
+	writew(readw(mem - 6), mem + 2);
+	writew(readw(mem - 4), mem + 4);
+}
+
+void
+headc57d_olut(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	asyh->olut.mode = 2; /* DIRECT10 */
+	asyh->olut.size = 4 /* VSS header. */ + 1024 + 1 /* Entries. */;
+	asyh->olut.output_mode = 1; /* INTERPOLATE_ENABLE. */
+	if (asyh->state.gamma_lut &&
+	    asyh->state.gamma_lut->length / sizeof(struct drm_color_lut) == 256)
+		asyh->olut.load = headc57d_olut_load_8;
+	else
+		asyh->olut.load = headc57d_olut_load;
+}
+
+static void
+headc57d_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->core->chan;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 *push;
+	if ((push = evo_wait(core, 12))) {
+		evo_mthd(push, 0x2064 + (head->base.index * 0x400), 5);
+		evo_data(push, (m->v.active  << 16) | m->h.active );
+		evo_data(push, (m->v.synce   << 16) | m->h.synce  );
+		evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
+		evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
+		evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
+		evo_mthd(push, 0x200c + (head->base.index * 0x400), 1);
+		evo_data(push, m->clock * 1000);
+		evo_mthd(push, 0x2028 + (head->base.index * 0x400), 1);
+		evo_data(push, m->clock * 1000);
+		/*XXX: HEAD_USAGE_BOUNDS, doesn't belong here. */
+		evo_mthd(push, 0x2030 + (head->base.index * 0x400), 1);
+		evo_data(push, 0x00001014);
+		evo_kick(push, core);
+	}
+}
+
+const struct nv50_head_func
+headc57d = {
+	.view = headc37d_view,
+	.mode = headc57d_mode,
+	.olut = headc57d_olut,
+	.olut_identity = true,
+	.olut_set = headc57d_olut_set,
+	.olut_clr = headc57d_olut_clr,
+	.curs_layout = head917d_curs_layout,
+	.curs_format = headc37d_curs_format,
+	.curs_set = headc37d_curs_set,
+	.curs_clr = headc37d_curs_clr,
+	.dither = headc37d_dither,
+	.procamp = headc57d_procamp,
+	.or = headc57d_or,
+};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.c b/drivers/gpu/drm/nouveau/dispnv50/lut.c
index a6b96ae2a22f..994def4fd51a 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/lut.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/lut.c
@@ -29,45 +29,29 @@
 #include <nvif/class.h>
 
 u32
-nv50_lut_load(struct nv50_lut *lut, bool legacy, int buffer,
-	      struct drm_property_blob *blob)
+nv50_lut_load(struct nv50_lut *lut, int buffer, struct drm_property_blob *blob,
+	      void (*load)(struct drm_color_lut *, int, void __iomem *))
 {
-	struct drm_color_lut *in = (struct drm_color_lut *)blob->data;
+	struct drm_color_lut *in = blob ? blob->data : NULL;
 	void __iomem *mem = lut->mem[buffer].object.map.ptr;
-	const int size = blob->length / sizeof(*in);
-	int bits, shift, i;
-	u16 zero, r, g, b;
-	u32 addr = lut->mem[buffer].addr;
-
-	/* This can't happen.. But it shuts the compiler up. */
-	if (WARN_ON(size != 256))
-		return 0;
+	const u32 addr = lut->mem[buffer].addr;
+	int i;
 
-	if (legacy) {
-		bits = 11;
-		shift = 3;
-		zero = 0x0000;
+	if (!in) {
+		in = kvmalloc_array(1024, sizeof(*in), GFP_KERNEL);
+		if (!WARN_ON(!in)) {
+			for (i = 0; i < 1024; i++) {
+				in[i].red   =
+				in[i].green =
+				in[i].blue  = (i << 16) >> 10;
+			}
+			load(in, 1024, mem);
+			kvfree(in);
+		}
 	} else {
-		bits = 14;
-		shift = 0;
-		zero = 0x6000;
-	}
-
-	for (i = 0; i < size; i++) {
-		r = (drm_color_lut_extract(in[i].  red, bits) + zero) << shift;
-		g = (drm_color_lut_extract(in[i].green, bits) + zero) << shift;
-		b = (drm_color_lut_extract(in[i]. blue, bits) + zero) << shift;
-		writew(r, mem + (i * 0x08) + 0);
-		writew(g, mem + (i * 0x08) + 2);
-		writew(b, mem + (i * 0x08) + 4);
+		load(in, blob->length / sizeof(*in), mem);
 	}
 
-	/* INTERPOLATE modes require a "next" entry to interpolate with,
-	 * so we replicate the last entry to deal with this for now.
-	 */
-	writew(r, mem + (i * 0x08) + 0);
-	writew(g, mem + (i * 0x08) + 2);
-	writew(b, mem + (i * 0x08) + 4);
 	return addr;
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv50/lut.h b/drivers/gpu/drm/nouveau/dispnv50/lut.h
index 6d7b8352e4cb..b3b9040cfe9a 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/lut.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/lut.h
@@ -2,6 +2,7 @@
 #define __NV50_KMS_LUT_H__
 #include <nvif/mem.h>
 struct drm_property_blob;
+struct drm_color_lut;
 struct nv50_disp;
 
 struct nv50_lut {
@@ -10,6 +11,6 @@ struct nv50_lut {
 
 int nv50_lut_init(struct nv50_disp *, struct nvif_mmu *, struct nv50_lut *);
 void nv50_lut_fini(struct nv50_lut *);
-u32 nv50_lut_load(struct nv50_lut *, bool legacy, int buffer,
-		  struct drm_property_blob *);
+u32 nv50_lut_load(struct nv50_lut *, int buffer, struct drm_property_blob *,
+		  void (*)(struct drm_color_lut *, int size, void __iomem *));
 #endif
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wimm.c b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
index fc36e0696407..bc9eeaf212ae 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wimm.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wimm.c
@@ -31,6 +31,7 @@ nv50_wimm_init(struct nouveau_drm *drm, struct nv50_wndw *wndw)
 		int version;
 		int (*init)(struct nouveau_drm *, s32, struct nv50_wndw *);
 	} wimms[] = {
+		{ TU104_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{ GV100_DISP_WINDOW_IMM_CHANNEL_DMA, 0, wimmc37b_init },
 		{}
 	};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.c b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
index 2187922e8dc2..ba9eea2ff16b 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.c
@@ -139,10 +139,8 @@ nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 *interlock,
 	if (asyw->set.xlut ) {
 		if (asyw->ilut) {
 			asyw->xlut.i.offset =
-				nv50_lut_load(&wndw->ilut,
-					      asyw->xlut.i.mode <= 1,
-					      asyw->xlut.i.buffer,
-					      asyw->ilut);
+				nv50_lut_load(&wndw->ilut, asyw->xlut.i.buffer,
+					      asyw->ilut, asyw->xlut.i.load);
 		}
 		wndw->func->xlut_set(wndw, asyw);
 	}
@@ -322,6 +320,11 @@ nv50_wndw_atomic_check_lut(struct nv50_wndw *wndw,
 		asyh->wndw.olut &= ~BIT(wndw->id);
 	}
 
+	if (!ilut && wndw->func->ilut_identity) {
+		static struct drm_property_blob dummy = {};
+		ilut = &dummy;
+	}
+
 	/* Recalculate LUT state. */
 	memset(&asyw->xlut, 0x00, sizeof(asyw->xlut));
 	if ((asyw->ilut = wndw->func->ilut ? ilut : NULL)) {
@@ -623,6 +626,7 @@ nv50_wndw_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
 		int (*new)(struct nouveau_drm *, enum drm_plane_type,
 			   int, s32, struct nv50_wndw **);
 	} wndws[] = {
+		{ TU104_DISP_WINDOW_CHANNEL_DMA, 0, wndwc57e_new },
 		{ GV100_DISP_WINDOW_CHANNEL_DMA, 0, wndwc37e_new },
 		{}
 	};
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndw.h b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
index b0b6428034b0..03f3d8dc235a 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndw.h
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndw.h
@@ -65,6 +65,7 @@ struct nv50_wndw_func {
 	int (*ntfy_wait_begun)(struct nouveau_bo *, u32 offset,
 			       struct nvif_device *);
 	void (*ilut)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	bool ilut_identity;
 	bool olut_core;
 	void (*xlut_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
 	void (*xlut_clr)(struct nv50_wndw *);
@@ -90,6 +91,23 @@ extern const struct nv50_wimm_func curs507a;
 
 int wndwc37e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
 		 struct nv50_wndw **);
+int wndwc37e_new_(const struct nv50_wndw_func *, struct nouveau_drm *,
+		  enum drm_plane_type type, int index, s32 oclass, u32 heads,
+		  struct nv50_wndw **);
+int wndwc37e_acquire(struct nv50_wndw *, struct nv50_wndw_atom *,
+		     struct nv50_head_atom *);
+void wndwc37e_release(struct nv50_wndw *, struct nv50_wndw_atom *,
+		      struct nv50_head_atom *);
+void wndwc37e_sema_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void wndwc37e_sema_clr(struct nv50_wndw *);
+void wndwc37e_ntfy_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void wndwc37e_ntfy_clr(struct nv50_wndw *);
+void wndwc37e_image_set(struct nv50_wndw *, struct nv50_wndw_atom *);
+void wndwc37e_image_clr(struct nv50_wndw *);
+void wndwc37e_update(struct nv50_wndw *, u32 *);
+
+int wndwc57e_new(struct nouveau_drm *, enum drm_plane_type, int, s32,
+		 struct nv50_wndw **);
 
 int nv50_wndw_new(struct nouveau_drm *, enum drm_plane_type, int index,
 		  struct nv50_wndw **);
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
index 44afb0f069a5..e52a85c83f7a 100644
--- a/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc37e.c
@@ -61,9 +61,10 @@ wndwc37e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	asyw->xlut.i.size = 0;
 	asyw->xlut.i.range = 0;
 	asyw->xlut.i.output_mode = 1;
+	asyw->xlut.i.load = head907d_olut_load;
 }
 
-static void
+void
 wndwc37e_image_clr(struct nv50_wndw *wndw)
 {
 	u32 *push;
@@ -76,7 +77,7 @@ wndwc37e_image_clr(struct nv50_wndw *wndw)
 	}
 }
 
-static void
+void
 wndwc37e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	u32 *push;
@@ -117,7 +118,7 @@ wndwc37e_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	evo_kick(push, &wndw->wndw);
 }
 
-static void
+void
 wndwc37e_ntfy_clr(struct nv50_wndw *wndw)
 {
 	u32 *push;
@@ -128,7 +129,7 @@ wndwc37e_ntfy_clr(struct nv50_wndw *wndw)
 	}
 }
 
-static void
+void
 wndwc37e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	u32 *push;
@@ -140,7 +141,7 @@ wndwc37e_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	}
 }
 
-static void
+void
 wndwc37e_sema_clr(struct nv50_wndw *wndw)
 {
 	u32 *push;
@@ -151,7 +152,7 @@ wndwc37e_sema_clr(struct nv50_wndw *wndw)
 	}
 }
 
-static void
+void
 wndwc37e_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
 	u32 *push;
@@ -165,7 +166,7 @@ wndwc37e_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 	}
 }
 
-static void
+void
 wndwc37e_update(struct nv50_wndw *wndw, u32 *interlock)
 {
 	u32 *push;
@@ -183,13 +184,13 @@ wndwc37e_update(struct nv50_wndw *wndw, u32 *interlock)
 	}
 }
 
-static void
+void
 wndwc37e_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
 		 struct nv50_head_atom *asyh)
 {
 }
 
-static int
+int
 wndwc37e_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
 		 struct nv50_head_atom *asyh)
 {
@@ -236,7 +237,7 @@ wndwc37e = {
 	.update = wndwc37e_update,
 };
 
-static int
+int
 wndwc37e_new_(const struct nv50_wndw_func *func, struct nouveau_drm *drm,
 	      enum drm_plane_type type, int index, s32 oclass, u32 heads,
 	      struct nv50_wndw **pwndw)
diff --git a/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
new file mode 100644
index 000000000000..ba89f1a5fcfa
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/dispnv50/wndwc57e.c
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "wndw.h"
+#include "atom.h"
+
+#include <drm/drm_atomic_helper.h>
+#include <drm/drm_plane_helper.h>
+#include <nouveau_bo.h>
+
+#include <nvif/clc37e.h>
+
+static void
+wndwc57e_ilut_clr(struct nv50_wndw *wndw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 2))) {
+		evo_mthd(push, 0x0444, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static void
+wndwc57e_ilut_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u32 *push;
+	if ((push = evo_wait(&wndw->wndw, 4))) {
+		evo_mthd(push, 0x0440, 3);
+		evo_data(push, asyw->xlut.i.size << 8 |
+			       asyw->xlut.i.mode << 2 |
+			       asyw->xlut.i.output_mode);
+		evo_data(push, asyw->xlut.handle);
+		evo_data(push, asyw->xlut.i.offset >> 8);
+		evo_kick(push, &wndw->wndw);
+	}
+}
+
+static u16
+fixedU0_16_FP16(u16 fixed)
+{
+        int sign = 0, exp = 0, man = 0;
+        if (fixed) {
+                while (--exp && !(fixed & 0x8000))
+                        fixed <<= 1;
+                man = ((fixed << 1) & 0xffc0) >> 6;
+                exp += 15;
+        }
+        return (sign << 15) | (exp << 10) | man;
+}
+
+static void
+wndwc57e_ilut_load(struct drm_color_lut *in, int size, void __iomem *mem)
+{
+	memset_io(mem, 0x00, 0x20); /* VSS header. */
+	mem += 0x20;
+
+	for (; size--; in++, mem += 0x08) {
+		u16 r = fixedU0_16_FP16(drm_color_lut_extract(in->  red, 16));
+		u16 g = fixedU0_16_FP16(drm_color_lut_extract(in->green, 16));
+		u16 b = fixedU0_16_FP16(drm_color_lut_extract(in-> blue, 16));
+		writew(r, mem + 0);
+		writew(g, mem + 2);
+		writew(b, mem + 4);
+	}
+
+	/* INTERPOLATE modes require a "next" entry to interpolate with,
+	 * so we replicate the last entry to deal with this for now.
+	 */
+	writew(readw(mem - 8), mem + 0);
+	writew(readw(mem - 6), mem + 2);
+	writew(readw(mem - 4), mem + 4);
+}
+
+static void
+wndwc57e_ilut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	u16 size = asyw->ilut->length / sizeof(struct drm_color_lut);
+	if (size == 256) {
+		asyw->xlut.i.mode = 1; /* DIRECT8. */
+	} else {
+		asyw->xlut.i.mode = 2; /* DIRECT10. */
+		size = 1024;
+	}
+	asyw->xlut.i.size = 4 /* VSS header. */ + size + 1 /* Entries. */;
+	asyw->xlut.i.output_mode = 0; /* INTERPOLATE_DISABLE. */
+	asyw->xlut.i.load = wndwc57e_ilut_load;
+}
+
+static const struct nv50_wndw_func
+wndwc57e = {
+	.acquire = wndwc37e_acquire,
+	.release = wndwc37e_release,
+	.sema_set = wndwc37e_sema_set,
+	.sema_clr = wndwc37e_sema_clr,
+	.ntfy_set = wndwc37e_ntfy_set,
+	.ntfy_clr = wndwc37e_ntfy_clr,
+	.ntfy_reset = corec37d_ntfy_init,
+	.ntfy_wait_begun = base507c_ntfy_wait_begun,
+	.ilut = wndwc57e_ilut,
+	.ilut_identity = true,
+	.xlut_set = wndwc57e_ilut_set,
+	.xlut_clr = wndwc57e_ilut_clr,
+	.image_set = wndwc37e_image_set,
+	.image_clr = wndwc37e_image_clr,
+	.update = wndwc37e_update,
+};
+
+int
+wndwc57e_new(struct nouveau_drm *drm, enum drm_plane_type type, int index,
+	     s32 oclass, struct nv50_wndw **pwndw)
+{
+	return wndwc37e_new_(&wndwc57e, drm, type, index, oclass,
+			     BIT(index >> 1), pwndw);
+}
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
index 4f5233107f5f..4cbed0329367 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl0080.h
@@ -32,6 +32,7 @@ struct nv_device_info_v0 {
 #define NV_DEVICE_INFO_V0_MAXWELL                                          0x09
 #define NV_DEVICE_INFO_V0_PASCAL                                           0x0a
 #define NV_DEVICE_INFO_V0_VOLTA                                            0x0b
+#define NV_DEVICE_INFO_V0_TURING                                           0x0c
 	__u8  family;
 	__u8  pad06[2];
 	__u64 ram_size;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index fbfcffc5feb2..81401eb970ea 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -4,12 +4,13 @@
 
 struct kepler_channel_gpfifo_a_v0 {
 	__u8  version;
-	__u8  pad01[1];
+	__u8  priv;
 	__u16 chid;
 	__u32 ilength;
 	__u64 ioffset;
 	__u64 runlist;
 	__u64 vmm;
+	__u64 inst;
 };
 
 #define NVA06F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index 6db56bd7d67e..1d82cbf70cf4 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -68,7 +68,8 @@
 #define KEPLER_CHANNEL_GPFIFO_B                       /* cla06f.h */ 0x0000a16f
 #define MAXWELL_CHANNEL_GPFIFO_A                      /* cla06f.h */ 0x0000b06f
 #define PASCAL_CHANNEL_GPFIFO_A                       /* cla06f.h */ 0x0000c06f
-#define VOLTA_CHANNEL_GPFIFO_A                        /* cla06f.h */ 0x0000c36f
+#define VOLTA_CHANNEL_GPFIFO_A                        /* clc36f.h */ 0x0000c36f
+#define TURING_CHANNEL_GPFIFO_A                       /* clc36f.h */ 0x0000c46f
 
 #define NV50_DISP                                     /* cl5070.h */ 0x00005070
 #define G82_DISP                                      /* cl5070.h */ 0x00008270
@@ -83,6 +84,7 @@
 #define GP100_DISP                                    /* cl5070.h */ 0x00009770
 #define GP102_DISP                                    /* cl5070.h */ 0x00009870
 #define GV100_DISP                                    /* cl5070.h */ 0x0000c370
+#define TU104_DISP                                    /* cl5070.h */ 0x0000c570
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -95,6 +97,7 @@
 #define GF110_DISP_CURSOR                             /* cl507a.h */ 0x0000907a
 #define GK104_DISP_CURSOR                             /* cl507a.h */ 0x0000917a
 #define GV100_DISP_CURSOR                             /* cl507a.h */ 0x0000c37a
+#define TU104_DISP_CURSOR                             /* cl507a.h */ 0x0000c57a
 
 #define NV50_DISP_OVERLAY                             /* cl507b.h */ 0x0000507b
 #define G82_DISP_OVERLAY                              /* cl507b.h */ 0x0000827b
@@ -103,6 +106,7 @@
 #define GK104_DISP_OVERLAY                            /* cl507b.h */ 0x0000917b
 
 #define GV100_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c37b
+#define TU104_DISP_WINDOW_IMM_CHANNEL_DMA             /* clc37b.h */ 0x0000c57b
 
 #define NV50_DISP_BASE_CHANNEL_DMA                    /* cl507c.h */ 0x0000507c
 #define G82_DISP_BASE_CHANNEL_DMA                     /* cl507c.h */ 0x0000827c
@@ -125,6 +129,7 @@
 #define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
 #define GP102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 #define GV100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c37d
+#define TU104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000c57d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -134,6 +139,7 @@
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
 #define GV100_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c37e
+#define TU104_DISP_WINDOW_CHANNEL_DMA                 /* clc37e.h */ 0x0000c57e
 
 #define NV50_TESLA                                                   0x00005097
 #define G82_TESLA                                                    0x00008297
@@ -183,6 +189,7 @@
 #define PASCAL_DMA_COPY_A                                            0x0000c0b5
 #define PASCAL_DMA_COPY_B                                            0x0000c1b5
 #define VOLTA_DMA_COPY_A                                             0x0000c3b5
+#define TURING_DMA_COPY_A                                            0x0000c5b5
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/clc36f.h b/drivers/gpu/drm/nouveau/include/nvif/clc36f.h
new file mode 100644
index 000000000000..6b14d7e3f6bb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvif/clc36f.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NVIF_CLC36F_H__
+#define __NVIF_CLC36F_H__
+
+struct volta_channel_gpfifo_a_v0 {
+	__u8  version;
+	__u8  priv;
+	__u16 chid;
+	__u32 ilength;
+	__u64 ioffset;
+	__u64 runlist;
+	__u64 vmm;
+	__u64 inst;
+	__u32 token;
+};
+
+#define NVC36F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NVC36F_V0_NTFY_KILLED                                              0x01
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index d83d834b7452..72e4dc1f0236 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -61,7 +61,11 @@ enum nvkm_devidx {
 	NVKM_ENGINE_NVENC2,
 	NVKM_ENGINE_NVENC_LAST = NVKM_ENGINE_NVENC2,
 
-	NVKM_ENGINE_NVDEC,
+	NVKM_ENGINE_NVDEC0,
+	NVKM_ENGINE_NVDEC1,
+	NVKM_ENGINE_NVDEC2,
+	NVKM_ENGINE_NVDEC_LAST = NVKM_ENGINE_NVDEC2,
+
 	NVKM_ENGINE_PM,
 	NVKM_ENGINE_SEC,
 	NVKM_ENGINE_SEC2,
@@ -114,6 +118,7 @@ struct nvkm_device {
 		GM100    = 0x110,
 		GP100    = 0x130,
 		GV100    = 0x140,
+		TU100    = 0x160,
 	} card_type;
 	u32 chipset;
 	u8  chiprev;
@@ -163,7 +168,7 @@ struct nvkm_device {
 	struct nvkm_engine *msppp;
 	struct nvkm_engine *msvld;
 	struct nvkm_engine *nvenc[3];
-	struct nvkm_nvdec *nvdec;
+	struct nvkm_nvdec *nvdec[3];
 	struct nvkm_pm *pm;
 	struct nvkm_engine *sec;
 	struct nvkm_sec2 *sec2;
@@ -235,7 +240,7 @@ struct nvkm_device_chip {
 	int (*msppp   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*msvld   )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*nvenc[3])(struct nvkm_device *, int idx, struct nvkm_engine **);
-	int (*nvdec   )(struct nvkm_device *, int idx, struct nvkm_nvdec **);
+	int (*nvdec[3])(struct nvkm_device *, int idx, struct nvkm_nvdec **);
 	int (*pm      )(struct nvkm_device *, int idx, struct nvkm_pm **);
 	int (*sec     )(struct nvkm_device *, int idx, struct nvkm_engine **);
 	int (*sec2    )(struct nvkm_device *, int idx, struct nvkm_sec2 **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
index 05f505de0075..f34c80310861 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
@@ -29,6 +29,7 @@ struct nvkm_memory_func {
 	void *(*dtor)(struct nvkm_memory *);
 	enum nvkm_memory_target (*target)(struct nvkm_memory *);
 	u8 (*page)(struct nvkm_memory *);
+	u64 (*bar2)(struct nvkm_memory *);
 	u64 (*addr)(struct nvkm_memory *);
 	u64 (*size)(struct nvkm_memory *);
 	void (*boot)(struct nvkm_memory *, struct nvkm_vmm *);
@@ -56,6 +57,7 @@ void nvkm_memory_tags_put(struct nvkm_memory *, struct nvkm_device *,
 
 #define nvkm_memory_target(p) (p)->func->target(p)
 #define nvkm_memory_page(p) (p)->func->page(p)
+#define nvkm_memory_bar2(p) (p)->func->bar2(p)
 #define nvkm_memory_addr(p) (p)->func->addr(p)
 #define nvkm_memory_size(p) (p)->func->size(p)
 #define nvkm_memory_boot(p,v) (p)->func->boot((p),(v))
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index fc295e1faa19..86abe76023c2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -11,4 +11,5 @@ int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gv100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int tu104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index ef7dc0844d26..5ca86e178bb9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -36,4 +36,5 @@ int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gv100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int tu104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index 7e39fbed2519..3b2b685778eb 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -74,4 +74,5 @@ int gm20b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gp10b_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 int gv100_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
+int tu104_fifo_new(struct nvkm_device *, int, struct nvkm_fifo **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
index f6bd94c7e0f7..fd9d713b611c 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bar.h
@@ -16,8 +16,10 @@ struct nvkm_bar {
 };
 
 struct nvkm_vmm *nvkm_bar_bar1_vmm(struct nvkm_device *);
+void nvkm_bar_bar1_reset(struct nvkm_device *);
 void nvkm_bar_bar2_init(struct nvkm_device *);
 void nvkm_bar_bar2_fini(struct nvkm_device *);
+void nvkm_bar_bar2_reset(struct nvkm_device *);
 struct nvkm_vmm *nvkm_bar_bar2_vmm(struct nvkm_device *);
 void nvkm_bar_flush(struct nvkm_bar *);
 
@@ -27,4 +29,5 @@ int gf100_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gk20a_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gm107_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 int gm20b_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
+int tu104_bar_new(struct nvkm_device *, int, struct nvkm_bar **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h
index 703a5b524b96..425ccc47e3b7 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/M0203.h
@@ -12,11 +12,14 @@ u32 nvbios_M0203Tp(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		   struct nvbios_M0203T *);
 
 struct nvbios_M0203E {
-#define M0203E_TYPE_DDR2  0x0
-#define M0203E_TYPE_DDR3  0x1
-#define M0203E_TYPE_GDDR3 0x2
-#define M0203E_TYPE_GDDR5 0x3
-#define M0203E_TYPE_SKIP  0xf
+#define M0203E_TYPE_DDR2   0x0
+#define M0203E_TYPE_DDR3   0x1
+#define M0203E_TYPE_GDDR3  0x2
+#define M0203E_TYPE_GDDR5  0x3
+#define M0203E_TYPE_HBM2   0x6
+#define M0203E_TYPE_GDDR5X 0x8
+#define M0203E_TYPE_GDDR6  0x9
+#define M0203E_TYPE_SKIP   0xf
 	u8 type;
 	u8 strap;
 	u8 group;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h
index ed9e0a6a0011..8463b421d345 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/conn.h
@@ -20,6 +20,7 @@ enum dcb_connector_type {
 	DCB_CONNECTOR_DMS59_DP0 = 0x64,
 	DCB_CONNECTOR_DMS59_DP1 = 0x65,
 	DCB_CONNECTOR_WFD	= 0x70,
+	DCB_CONNECTOR_USB_C = 0x71,
 	DCB_CONNECTOR_NONE = 0xff
 };
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
index 486e7635c29d..1b71812a790b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/devinit.h
@@ -31,4 +31,5 @@ int gf100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm107_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gm200_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 int gv100_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
+int tu104_devinit_new(struct nvkm_device *, int, struct nvkm_devinit **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
index 5a77498fe6a0..127f48066026 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fault.h
@@ -30,4 +30,5 @@ struct nvkm_fault_data {
 
 int gp100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
 int gv100_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
+int tu104_fault_new(struct nvkm_device *, int, struct nvkm_fault **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 96ccc624ee81..27298f8b7ead 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -105,7 +105,10 @@ enum nvkm_ram_type {
 	NVKM_RAM_TYPE_GDDR2,
 	NVKM_RAM_TYPE_GDDR3,
 	NVKM_RAM_TYPE_GDDR4,
-	NVKM_RAM_TYPE_GDDR5
+	NVKM_RAM_TYPE_GDDR5,
+	NVKM_RAM_TYPE_GDDR5X,
+	NVKM_RAM_TYPE_GDDR6,
+	NVKM_RAM_TYPE_HBM2,
 };
 
 struct nvkm_ram {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 61c93c86e2e2..b66dedd8abb6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -31,4 +31,5 @@ int gk104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gk20a_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp100_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 int gp10b_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
+int tu104_mc_new(struct nvkm_device *, int, struct nvkm_mc **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
index 688595545e21..0a0e064f22e5 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mmu.h
@@ -130,4 +130,5 @@ int gm20b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gp10b_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 int gv100_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
+int tu104_mmu_new(struct nvkm_device *, int, struct nvkm_mmu **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index e9b0746826ca..3693ebf371b6 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -28,6 +28,18 @@ struct nvkm_timer {
 u64 nvkm_timer_read(struct nvkm_timer *);
 void nvkm_timer_alarm(struct nvkm_timer *, u32 nsec, struct nvkm_alarm *);
 
+struct nvkm_timer_wait {
+	struct nvkm_timer *tmr;
+	u64 limit;
+	u64 time0;
+	u64 time1;
+	int reads;
+};
+
+void nvkm_timer_wait_init(struct nvkm_device *, u64 nsec,
+			  struct nvkm_timer_wait *);
+s64 nvkm_timer_wait_test(struct nvkm_timer_wait *);
+
 /* Delay based on GPU time (ie. PTIMER).
  *
  * Will return -ETIMEDOUT unless the loop was terminated with 'break',
@@ -38,21 +50,17 @@ void nvkm_timer_alarm(struct nvkm_timer *, u32 nsec, struct nvkm_alarm *);
  */
 #define NVKM_DELAY _warn = false;
 #define nvkm_nsec(d,n,cond...) ({                                              \
-	struct nvkm_device *_device = (d);                                     \
-	struct nvkm_timer *_tmr = _device->timer;                              \
-	u64 _nsecs = (n), _time0 = nvkm_timer_read(_tmr);                      \
-	s64 _taken = 0;                                                        \
+	struct nvkm_timer_wait _wait;                                          \
 	bool _warn = true;                                                     \
+	s64 _taken = 0;                                                        \
                                                                                \
+	nvkm_timer_wait_init((d), (n), &_wait);                                \
 	do {                                                                   \
 		cond                                                           \
-	} while (_taken = nvkm_timer_read(_tmr) - _time0, _taken < _nsecs);    \
+	} while ((_taken = nvkm_timer_wait_test(&_wait)) >= 0);                \
                                                                                \
-	if (_taken >= _nsecs) {                                                \
-		if (_warn)                                                     \
-			dev_WARN(_device->dev, "timeout\n");                   \
-		_taken = -ETIMEDOUT;                                           \
-	}                                                                      \
+	if (_warn && _taken < 0)                                               \
+		dev_WARN(_wait.tmr->subdev.device->dev, "timeout\n");          \
 	_taken;                                                                \
 })
 #define nvkm_usec(d,u,cond...) nvkm_nsec((d), (u) * 1000, ##cond)
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index e67a471331b5..b06cdac8f3a2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -306,7 +306,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 
 	/* create channel object and initialise dma and fence management */
 	ret = nouveau_channel_new(drm, device, init->fb_ctxdma_handle,
-				  init->tt_ctxdma_handle, &chan->chan);
+				  init->tt_ctxdma_handle, false, &chan->chan);
 	if (ret)
 		goto done;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index 5f5be6368aed..c7a94c94dbf3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -253,6 +253,9 @@ nouveau_backlight_init(struct drm_connector *connector)
 	case NV_DEVICE_INFO_V0_FERMI:
 	case NV_DEVICE_INFO_V0_KEPLER:
 	case NV_DEVICE_INFO_V0_MAXWELL:
+	case NV_DEVICE_INFO_V0_PASCAL:
+	case NV_DEVICE_INFO_V0_VOLTA:
+	case NV_DEVICE_INFO_V0_TURING:
 		ret = nv50_backlight_init(nv_encoder, &props, &ops);
 		break;
 	default:
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 7214022dfb91..73eff52036d2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -1141,6 +1141,8 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 			    struct ttm_mem_reg *, struct ttm_mem_reg *);
 		int (*init)(struct nouveau_channel *, u32 handle);
 	} _methods[] = {
+		{  "COPY", 4, 0xc5b5, nve0_bo_move_copy, nve0_bo_move_init },
+		{  "GRCE", 0, 0xc5b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc3b5, nve0_bo_move_copy, nve0_bo_move_init },
 		{  "GRCE", 0, 0xc3b5, nve0_bo_move_copy, nvc0_bo_move_init },
 		{  "COPY", 4, 0xc1b5, nve0_bo_move_copy, nve0_bo_move_init },
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index 92d3115f96b5..668afbc29c3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -29,6 +29,7 @@
 #include <nvif/cl506f.h>
 #include <nvif/cl906f.h>
 #include <nvif/cla06f.h>
+#include <nvif/clc36f.h>
 #include <nvif/ioctl.h>
 
 /*XXX*/
@@ -217,10 +218,11 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 static int
 nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
-		    u64 runlist, struct nouveau_channel **pchan)
+		    u64 runlist, bool priv, struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
-	static const u16 oclasses[] = { VOLTA_CHANNEL_GPFIFO_A,
+	static const u16 oclasses[] = { TURING_CHANNEL_GPFIFO_A,
+					VOLTA_CHANNEL_GPFIFO_A,
 					PASCAL_CHANNEL_GPFIFO_A,
 					MAXWELL_CHANNEL_GPFIFO_A,
 					KEPLER_CHANNEL_GPFIFO_B,
@@ -234,6 +236,7 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		struct nv50_channel_gpfifo_v0 nv50;
 		struct fermi_channel_gpfifo_v0 fermi;
 		struct kepler_channel_gpfifo_a_v0 kepler;
+		struct volta_channel_gpfifo_a_v0 volta;
 	} args;
 	struct nouveau_channel *chan;
 	u32 size;
@@ -247,12 +250,22 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 
 	/* create channel object */
 	do {
+		if (oclass[0] >= VOLTA_CHANNEL_GPFIFO_A) {
+			args.volta.version = 0;
+			args.volta.ilength = 0x02000;
+			args.volta.ioffset = 0x10000 + chan->push.addr;
+			args.volta.runlist = runlist;
+			args.volta.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.volta.priv = priv;
+			size = sizeof(args.volta);
+		} else
 		if (oclass[0] >= KEPLER_CHANNEL_GPFIFO_A) {
 			args.kepler.version = 0;
 			args.kepler.ilength = 0x02000;
 			args.kepler.ioffset = 0x10000 + chan->push.addr;
 			args.kepler.runlist = runlist;
 			args.kepler.vmm = nvif_handle(&cli->vmm.vmm.object);
+			args.kepler.priv = priv;
 			size = sizeof(args.kepler);
 		} else
 		if (oclass[0] >= FERMI_CHANNEL_GPFIFO) {
@@ -273,13 +286,20 @@ nouveau_channel_ind(struct nouveau_drm *drm, struct nvif_device *device,
 		ret = nvif_object_init(&device->object, 0, *oclass++,
 				       &args, size, &chan->user);
 		if (ret == 0) {
-			if (chan->user.oclass >= KEPLER_CHANNEL_GPFIFO_A)
+			if (chan->user.oclass >= VOLTA_CHANNEL_GPFIFO_A) {
+				chan->chid = args.volta.chid;
+				chan->inst = args.volta.inst;
+				chan->token = args.volta.token;
+			} else
+			if (chan->user.oclass >= KEPLER_CHANNEL_GPFIFO_A) {
 				chan->chid = args.kepler.chid;
-			else
-			if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO)
+				chan->inst = args.kepler.inst;
+			} else
+			if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
 				chan->chid = args.fermi.chid;
-			else
+			} else {
 				chan->chid = args.nv50.chid;
+			}
 			return ret;
 		}
 	} while (*oclass);
@@ -448,7 +468,8 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 
 int
 nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
-		    u32 arg0, u32 arg1, struct nouveau_channel **pchan)
+		    u32 arg0, u32 arg1, bool priv,
+		    struct nouveau_channel **pchan)
 {
 	struct nouveau_cli *cli = (void *)device->object.client;
 	bool super;
@@ -458,7 +479,7 @@ nouveau_channel_new(struct nouveau_drm *drm, struct nvif_device *device,
 	super = cli->base.super;
 	cli->base.super = true;
 
-	ret = nouveau_channel_ind(drm, device, arg0, pchan);
+	ret = nouveau_channel_ind(drm, device, arg0, priv, pchan);
 	if (ret) {
 		NV_PRINTK(dbg, cli, "ib channel create, %d\n", ret);
 		ret = nouveau_channel_dma(drm, device, pchan);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 64454c2ebd90..28418f4e5748 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -10,6 +10,8 @@ struct nouveau_channel {
 	struct nouveau_drm *drm;
 
 	int chid;
+	u64 inst;
+	u32 token;
 
 	struct nvif_object vram;
 	struct nvif_object gart;
@@ -48,7 +50,8 @@ struct nouveau_channel {
 int nouveau_channels_init(struct nouveau_drm *);
 
 int  nouveau_channel_new(struct nouveau_drm *, struct nvif_device *,
-			 u32 arg0, u32 arg1, struct nouveau_channel **);
+			 u32 arg0, u32 arg1, bool priv,
+			 struct nouveau_channel **);
 void nouveau_channel_del(struct nouveau_channel **);
 int  nouveau_channel_idle(struct nouveau_channel *);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index fd80661dff92..3f463c91314a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -403,6 +403,7 @@ nouveau_connector_destroy(struct drm_connector *connector)
 	if (nv_connector->aux.transfer) {
 		drm_dp_cec_unregister_connector(&nv_connector->aux);
 		drm_dp_aux_unregister(&nv_connector->aux);
+		kfree(nv_connector->aux.name);
 	}
 	kfree(connector);
 }
@@ -1218,7 +1219,8 @@ drm_conntype_from_dcb(enum dcb_connector_type dcb)
 	case DCB_CONNECTOR_LVDS_SPWG: return DRM_MODE_CONNECTOR_LVDS;
 	case DCB_CONNECTOR_DMS59_DP0:
 	case DCB_CONNECTOR_DMS59_DP1:
-	case DCB_CONNECTOR_DP       : return DRM_MODE_CONNECTOR_DisplayPort;
+	case DCB_CONNECTOR_DP       :
+	case DCB_CONNECTOR_USB_C    : return DRM_MODE_CONNECTOR_DisplayPort;
 	case DCB_CONNECTOR_eDP      : return DRM_MODE_CONNECTOR_eDP;
 	case DCB_CONNECTOR_HDMI_0   :
 	case DCB_CONNECTOR_HDMI_1   :
@@ -1232,7 +1234,8 @@ drm_conntype_from_dcb(enum dcb_connector_type dcb)
 }
 
 struct drm_connector *
-nouveau_connector_create(struct drm_device *dev, int index)
+nouveau_connector_create(struct drm_device *dev,
+			 const struct dcb_output *dcbe)
 {
 	const struct drm_connector_funcs *funcs = &nouveau_connector_funcs;
 	struct nouveau_drm *drm = nouveau_drm(dev);
@@ -1240,6 +1243,8 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	struct nouveau_connector *nv_connector = NULL;
 	struct drm_connector *connector;
 	struct drm_connector_list_iter conn_iter;
+	char aux_name[48] = {0};
+	int index = dcbe->connector;
 	int type, ret = 0;
 	bool dummy;
 
@@ -1342,6 +1347,9 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	case DRM_MODE_CONNECTOR_eDP:
 		nv_connector->aux.dev = dev->dev;
 		nv_connector->aux.transfer = nouveau_connector_aux_xfer;
+		snprintf(aux_name, sizeof(aux_name), "sor-%04x-%04x",
+			 dcbe->hasht, dcbe->hashm);
+		nv_connector->aux.name = kstrdup(aux_name, GFP_KERNEL);
 		ret = drm_dp_aux_register(&nv_connector->aux);
 		if (ret) {
 			NV_ERROR(drm, "failed to register aux channel\n");
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index f57ef35b1e5e..f43a8d63aef8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -38,6 +38,7 @@
 #include "nouveau_encoder.h"
 
 struct nvkm_i2c_port;
+struct dcb_output;
 
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
 struct nouveau_backlight;
@@ -113,7 +114,7 @@ nouveau_crtc_connector_get(struct nouveau_crtc *nv_crtc)
 }
 
 struct drm_connector *
-nouveau_connector_create(struct drm_device *, int index);
+nouveau_connector_create(struct drm_device *, const struct dcb_output *);
 
 extern int nouveau_tv_disable;
 extern int nouveau_ignorelid;
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 9109b69cd052..88a52f6b39fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -47,6 +47,26 @@ nouveau_debugfs_vbios_image(struct seq_file *m, void *data)
 }
 
 static int
+nouveau_debugfs_strap_peek(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = m->private;
+	struct nouveau_drm *drm = nouveau_drm(node->minor->dev);
+	int ret;
+
+	ret = pm_runtime_get_sync(drm->dev->dev);
+	if (ret < 0 && ret != -EACCES)
+		return ret;
+
+	seq_printf(m, "0x%08x\n",
+		   nvif_rd32(&drm->client.device.object, 0x101000));
+
+	pm_runtime_mark_last_busy(drm->dev->dev);
+	pm_runtime_put_autosuspend(drm->dev->dev);
+
+	return 0;
+}
+
+static int
 nouveau_debugfs_pstate_get(struct seq_file *m, void *data)
 {
 	struct drm_device *drm = m->private;
@@ -185,7 +205,8 @@ static const struct file_operations nouveau_pstate_fops = {
 };
 
 static struct drm_info_list nouveau_debugfs_list[] = {
-	{ "vbios.rom", nouveau_debugfs_vbios_image, 0, NULL },
+	{ "vbios.rom",  nouveau_debugfs_vbios_image, 0, NULL },
+	{ "strap_peek", nouveau_debugfs_strap_peek, 0, NULL },
 };
 #define NOUVEAU_DEBUGFS_ENTRIES ARRAY_SIZE(nouveau_debugfs_list)
 
@@ -199,8 +220,9 @@ static const struct nouveau_debugfs_files {
 int
 nouveau_drm_debugfs_init(struct drm_minor *minor)
 {
+	struct nouveau_drm *drm = nouveau_drm(minor->dev);
 	struct dentry *dentry;
-	int i;
+	int i, ret;
 
 	for (i = 0; i < ARRAY_SIZE(nouveau_debugfs_files); i++) {
 		dentry = debugfs_create_file(nouveau_debugfs_files[i].name,
@@ -211,9 +233,23 @@ nouveau_drm_debugfs_init(struct drm_minor *minor)
 			return -ENOMEM;
 	}
 
-	return drm_debugfs_create_files(nouveau_debugfs_list,
-					NOUVEAU_DEBUGFS_ENTRIES,
-					minor->debugfs_root, minor);
+	ret = drm_debugfs_create_files(nouveau_debugfs_list,
+				       NOUVEAU_DEBUGFS_ENTRIES,
+				       minor->debugfs_root, minor);
+	if (ret)
+		return ret;
+
+	/* Set the size of the vbios since we know it, and it's confusing to
+	 * userspace if it wants to seek() but the file has a length of 0
+	 */
+	dentry = debugfs_lookup("vbios.rom", minor->debugfs_root);
+	if (!dentry)
+		return 0;
+
+	d_inode(dentry)->i_size = drm->vbios.length;
+	dput(dentry);
+
+	return 0;
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 945afd34138e..078f65d849ce 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -101,7 +101,7 @@ nv50_dma_push(struct nouveau_channel *chan, u64 offset, int length)
 
 	nvif_wr32(&chan->user, 0x8c, chan->dma.ib_put);
 	if (user->func && user->func->doorbell)
-		user->func->doorbell(user, chan->chid);
+		user->func->doorbell(user, chan->token);
 	chan->dma.ib_free--;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 2b2baf6e0e0d..f900e94592f8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -353,6 +353,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		case MAXWELL_CHANNEL_GPFIFO_A:
 		case PASCAL_CHANNEL_GPFIFO_A:
 		case VOLTA_CHANNEL_GPFIFO_A:
+		case TURING_CHANNEL_GPFIFO_A:
 			ret = nvc0_fence_create(drm);
 			break;
 		default:
@@ -370,7 +371,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
 		ret = nouveau_channel_new(drm, &drm->client.device,
 					  nvif_fifo_runlist_ce(device), 0,
-					  &drm->cechan);
+					  true, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 
@@ -381,7 +382,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	    device->info.chipset != 0xaa &&
 	    device->info.chipset != 0xac) {
 		ret = nouveau_channel_new(drm, &drm->client.device,
-					  NvDmaFB, NvDmaTT, &drm->cechan);
+					  NvDmaFB, NvDmaTT, false,
+					  &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
 
@@ -393,7 +395,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	ret = nouveau_channel_new(drm, &drm->client.device,
-				  arg0, arg1, &drm->channel);
+				  arg0, arg1, false, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_fini(drm);
@@ -1171,10 +1173,16 @@ nouveau_platform_device_create(const struct nvkm_device_tegra_func *func,
 		goto err_free;
 	}
 
+	err = nouveau_drm_device_init(drm);
+	if (err)
+		goto err_put;
+
 	platform_set_drvdata(pdev, drm);
 
 	return drm;
 
+err_put:
+	drm_dev_put(drm);
 err_free:
 	nvkm_device_del(pdevice);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_vmm.h b/drivers/gpu/drm/nouveau/nouveau_vmm.h
index 7e3b118cf7c4..ede872f6f668 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vmm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vmm.h
@@ -25,7 +25,6 @@ void nouveau_vma_unmap(struct nouveau_vma *);
 struct nouveau_vmm {
 	struct nouveau_cli *cli;
 	struct nvif_vmm vmm;
-	struct nvkm_vm *vm;
 };
 
 int nouveau_vmm_init(struct nouveau_cli *, s32 oclass, struct nouveau_vmm *);
diff --git a/drivers/gpu/drm/nouveau/nvif/disp.c b/drivers/gpu/drm/nouveau/nvif/disp.c
index 18c7d064f75c..ef97dd223a32 100644
--- a/drivers/gpu/drm/nouveau/nvif/disp.c
+++ b/drivers/gpu/drm/nouveau/nvif/disp.c
@@ -34,6 +34,7 @@ int
 nvif_disp_ctor(struct nvif_device *device, s32 oclass, struct nvif_disp *disp)
 {
 	static const struct nvif_mclass disps[] = {
+		{ TU104_DISP, -1 },
 		{ GV100_DISP, -1 },
 		{ GP102_DISP, -1 },
 		{ GP100_DISP, -1 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
index 03f676c18aad..c61b467cf45e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/subdev.c
@@ -79,7 +79,9 @@ nvkm_subdev_name[NVKM_SUBDEV_NR] = {
 	[NVKM_ENGINE_NVENC0  ] = "nvenc0",
 	[NVKM_ENGINE_NVENC1  ] = "nvenc1",
 	[NVKM_ENGINE_NVENC2  ] = "nvenc2",
-	[NVKM_ENGINE_NVDEC   ] = "nvdec",
+	[NVKM_ENGINE_NVDEC0  ] = "nvdec0",
+	[NVKM_ENGINE_NVDEC1  ] = "nvdec1",
+	[NVKM_ENGINE_NVDEC2  ] = "nvdec2",
 	[NVKM_ENGINE_PM      ] = "pm",
 	[NVKM_ENGINE_SEC     ] = "sec",
 	[NVKM_ENGINE_SEC2    ] = "sec2",
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index 80d784441904..177a23301d6a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -6,3 +6,4 @@ nvkm-y += nvkm/engine/ce/gm200.o
 nvkm-y += nvkm/engine/ce/gp100.o
 nvkm-y += nvkm/engine/ce/gp102.o
 nvkm-y += nvkm/engine/ce/gv100.o
+nvkm-y += nvkm/engine/ce/tu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c
new file mode 100644
index 000000000000..3c25043bbb33
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/tu104.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <nvif/class.h>
+
+static const struct nvkm_engine_func
+tu104_ce = {
+	.intr = gp100_ce_intr,
+	.sclass = {
+		{ -1, -1, TURING_DMA_COPY_A },
+		{}
+	}
+};
+
+int
+tu104_ce_new(struct nvkm_device *device, int index,
+	     struct nvkm_engine **pengine)
+{
+	return nvkm_engine_new_(&tu104_ce, device, index, true, pengine);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index e294013426ce..d9edb5785813 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -2221,7 +2221,7 @@ nv132_chipset = {
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 	.gr = gp102_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
 };
@@ -2257,7 +2257,7 @@ nv134_chipset = {
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 	.gr = gp104_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
 };
@@ -2293,7 +2293,7 @@ nv136_chipset = {
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 	.gr = gp104_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
 };
@@ -2329,7 +2329,7 @@ nv137_chipset = {
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 	.gr = gp107_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
 };
@@ -2365,7 +2365,7 @@ nv138_chipset = {
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 	.gr = gp107_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 	.sw = gf100_sw_new,
 };
@@ -2430,10 +2430,106 @@ nv140_chipset = {
 	.dma = gv100_dma_new,
 	.fifo = gv100_fifo_new,
 	.gr = gv100_gr_new,
-	.nvdec = gp102_nvdec_new,
+	.nvdec[0] = gp102_nvdec_new,
 	.sec2 = gp102_sec2_new,
 };
 
+static const struct nvkm_device_chip
+nv162_chipset = {
+	.name = "TU102",
+	.bar = tu104_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = tu104_devinit_new,
+	.fault = tu104_fault_new,
+	.fb = gv100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp102_ltc_new,
+	.mc = tu104_mc_new,
+	.mmu = tu104_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.therm = gp100_therm_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = tu104_ce_new,
+	.ce[1] = tu104_ce_new,
+	.ce[2] = tu104_ce_new,
+	.ce[3] = tu104_ce_new,
+	.ce[4] = tu104_ce_new,
+	.disp = tu104_disp_new,
+	.dma = gv100_dma_new,
+	.fifo = tu104_fifo_new,
+};
+
+static const struct nvkm_device_chip
+nv164_chipset = {
+	.name = "TU104",
+	.bar = tu104_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = tu104_devinit_new,
+	.fault = tu104_fault_new,
+	.fb = gv100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp102_ltc_new,
+	.mc = tu104_mc_new,
+	.mmu = tu104_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.therm = gp100_therm_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = tu104_ce_new,
+	.ce[1] = tu104_ce_new,
+	.ce[2] = tu104_ce_new,
+	.ce[3] = tu104_ce_new,
+	.ce[4] = tu104_ce_new,
+	.disp = tu104_disp_new,
+	.dma = gv100_dma_new,
+	.fifo = tu104_fifo_new,
+};
+
+static const struct nvkm_device_chip
+nv166_chipset = {
+	.name = "TU106",
+	.bar = tu104_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = tu104_devinit_new,
+	.fault = tu104_fault_new,
+	.fb = gv100_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp102_ltc_new,
+	.mc = tu104_mc_new,
+	.mmu = tu104_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.therm = gp100_therm_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = tu104_ce_new,
+	.ce[1] = tu104_ce_new,
+	.ce[2] = tu104_ce_new,
+	.ce[3] = tu104_ce_new,
+	.ce[4] = tu104_ce_new,
+	.disp = tu104_disp_new,
+	.dma = gv100_dma_new,
+	.fifo = tu104_fifo_new,
+};
+
 static int
 nvkm_device_event_ctor(struct nvkm_object *object, void *data, u32 size,
 		       struct nvkm_notify *notify)
@@ -2529,7 +2625,9 @@ nvkm_device_engine(struct nvkm_device *device, int index)
 	_(NVENC0 , device->nvenc[0],  device->nvenc[0]);
 	_(NVENC1 , device->nvenc[1],  device->nvenc[1]);
 	_(NVENC2 , device->nvenc[2],  device->nvenc[2]);
-	_(NVDEC  , device->nvdec   , &device->nvdec->engine);
+	_(NVDEC0 , device->nvdec[0], &device->nvdec[0]->engine);
+	_(NVDEC1 , device->nvdec[1], &device->nvdec[1]->engine);
+	_(NVDEC2 , device->nvdec[2], &device->nvdec[2]->engine);
 	_(PM     , device->pm      , &device->pm->engine);
 	_(SEC    , device->sec     ,  device->sec);
 	_(SEC2   , device->sec2    , &device->sec2->engine);
@@ -2791,6 +2889,7 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 			case 0x120: device->card_type = GM100; break;
 			case 0x130: device->card_type = GP100; break;
 			case 0x140: device->card_type = GV100; break;
+			case 0x160: device->card_type = TU100; break;
 			default:
 				break;
 			}
@@ -2883,6 +2982,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x138: device->chip = &nv138_chipset; break;
 		case 0x13b: device->chip = &nv13b_chipset; break;
 		case 0x140: device->chip = &nv140_chipset; break;
+		case 0x162: device->chip = &nv162_chipset; break;
+		case 0x164: device->chip = &nv164_chipset; break;
+		case 0x166: device->chip = &nv166_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
@@ -2988,7 +3090,9 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		_(NVKM_ENGINE_NVENC0  , nvenc[0]);
 		_(NVKM_ENGINE_NVENC1  , nvenc[1]);
 		_(NVKM_ENGINE_NVENC2  , nvenc[2]);
-		_(NVKM_ENGINE_NVDEC   ,    nvdec);
+		_(NVKM_ENGINE_NVDEC0  , nvdec[0]);
+		_(NVKM_ENGINE_NVDEC1  , nvdec[1]);
+		_(NVKM_ENGINE_NVDEC2  , nvdec[2]);
 		_(NVKM_ENGINE_PM      ,       pm);
 		_(NVKM_ENGINE_SEC     ,      sec);
 		_(NVKM_ENGINE_SEC2    ,     sec2);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index dde6bbafa709..092ddc4ffefa 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -91,7 +91,7 @@ nvkm_udevice_info_v1(struct nvkm_device *device,
 	case ENGINE_A(MSENC ); break;
 	case ENGINE_A(VIC   ); break;
 	case ENGINE_A(SEC2  ); break;
-	case ENGINE_A(NVDEC ); break;
+	case ENGINE_B(NVDEC ); break;
 	case ENGINE_B(NVENC ); break;
 	default:
 		args->mthd = NV_DEVICE_INFO_INVALID;
@@ -175,6 +175,7 @@ nvkm_udevice_info(struct nvkm_udevice *udev, void *data, u32 size)
 	case GM100: args->v0.family = NV_DEVICE_INFO_V0_MAXWELL; break;
 	case GP100: args->v0.family = NV_DEVICE_INFO_V0_PASCAL; break;
 	case GV100: args->v0.family = NV_DEVICE_INFO_V0_VOLTA; break;
+	case TU100: args->v0.family = NV_DEVICE_INFO_V0_TURING; break;
 	default:
 		args->v0.family = 0;
 		break;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 8089ac9a12e2..c6a257ba4347 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -15,6 +15,7 @@ nvkm-y += nvkm/engine/disp/gm200.o
 nvkm-y += nvkm/engine/disp/gp100.o
 nvkm-y += nvkm/engine/disp/gp102.o
 nvkm-y += nvkm/engine/disp/gv100.o
+nvkm-y += nvkm/engine/disp/tu104.o
 nvkm-y += nvkm/engine/disp/vga.o
 
 nvkm-y += nvkm/engine/disp/head.o
@@ -38,6 +39,7 @@ nvkm-y += nvkm/engine/disp/sorgk104.o
 nvkm-y += nvkm/engine/disp/sorgm107.o
 nvkm-y += nvkm/engine/disp/sorgm200.o
 nvkm-y += nvkm/engine/disp/sorgv100.o
+nvkm-y += nvkm/engine/disp/sortu104.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/dp.o
@@ -69,6 +71,7 @@ nvkm-y += nvkm/engine/disp/rootgm200.o
 nvkm-y += nvkm/engine/disp/rootgp100.o
 nvkm-y += nvkm/engine/disp/rootgp102.o
 nvkm-y += nvkm/engine/disp/rootgv100.o
+nvkm-y += nvkm/engine/disp/roottu104.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
index d0a7e3456da1..47be0ba4aebe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gv100.c
@@ -28,7 +28,7 @@
 #include <core/gpuobj.h>
 #include <subdev/timer.h>
 
-static int
+int
 gv100_disp_wndw_cnt(struct nvkm_disp *disp, unsigned long *pmask)
 {
 	struct nvkm_device *device = disp->engine.subdev.device;
@@ -36,7 +36,7 @@ gv100_disp_wndw_cnt(struct nvkm_disp *disp, unsigned long *pmask)
 	return (nvkm_rd32(device, 0x610074) & 0x03f00000) >> 20;
 }
 
-static void
+void
 gv100_disp_super(struct work_struct *work)
 {
 	struct nv50_disp *disp =
@@ -257,7 +257,7 @@ gv100_disp_intr_head_timing(struct nv50_disp *disp, int head)
 	}
 }
 
-static void
+void
 gv100_disp_intr(struct nv50_disp *disp)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
@@ -297,7 +297,7 @@ gv100_disp_intr(struct nv50_disp *disp)
 		nvkm_warn(subdev, "intr %08x\n", stat);
 }
 
-static void
+void
 gv100_disp_fini(struct nv50_disp *disp)
 {
 	struct nvkm_device *device = disp->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
index 0f0c86c32ec3..790e42f460fd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ior.h
@@ -144,6 +144,11 @@ void gm200_sor_route_set(struct nvkm_outp *, struct nvkm_ior *);
 int gm200_sor_route_get(struct nvkm_outp *, int *);
 void gm200_sor_dp_drive(struct nvkm_ior *, int, int, int, int, int);
 
+void gv100_sor_state(struct nvkm_ior *, struct nvkm_ior_state *);
+void gv100_sor_dp_audio(struct nvkm_ior *, int, bool);
+void gv100_sor_dp_audio_sym(struct nvkm_ior *, int, u16, u32);
+void gv100_sor_dp_watermark(struct nvkm_ior *, int, u8);
+
 void g84_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gt215_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
 void gf119_hdmi_ctrl(struct nvkm_ior *, int, bool, u8, u8, u8 *, u8 , u8 *, u8);
@@ -195,4 +200,6 @@ int gm200_sor_new(struct nvkm_disp *, int);
 
 int gv100_sor_cnt(struct nvkm_disp *, unsigned long *);
 int gv100_sor_new(struct nvkm_disp *, int);
+
+int tu104_sor_new(struct nvkm_disp *, int);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
index 8580382ab248..c36a8a7cafa1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.h
@@ -78,6 +78,11 @@ void gf119_disp_intr(struct nv50_disp *);
 void gf119_disp_super(struct work_struct *);
 void gf119_disp_intr_error(struct nv50_disp *, int);
 
+void gv100_disp_fini(struct nv50_disp *);
+void gv100_disp_intr(struct nv50_disp *);
+void gv100_disp_super(struct work_struct *);
+int gv100_disp_wndw_cnt(struct nvkm_disp *, unsigned long *);
+
 void nv50_disp_dptmds_war_2(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_dptmds_war_3(struct nv50_disp *, struct dcb_output *);
 void nv50_disp_update_sppll1(struct nv50_disp *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index 6ca4f9184b51..97de928cbde1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -37,4 +37,5 @@ extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp102_disp_root_oclass;
 extern const struct nvkm_disp_oclass gv100_disp_root_oclass;
+extern const struct nvkm_disp_oclass tu104_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c
new file mode 100644
index 000000000000..ad438c62f66c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/roottu104.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "rootnv50.h"
+#include "channv50.h"
+
+#include <nvif/class.h>
+
+static const struct nv50_disp_root_func
+tu104_disp_root = {
+	.user = {
+		{{0,0,TU104_DISP_CURSOR                }, gv100_disp_curs_new },
+		{{0,0,TU104_DISP_WINDOW_IMM_CHANNEL_DMA}, gv100_disp_wimm_new },
+		{{0,0,TU104_DISP_CORE_CHANNEL_DMA      }, gv100_disp_core_new },
+		{{0,0,TU104_DISP_WINDOW_CHANNEL_DMA    }, gv100_disp_wndw_new },
+		{}
+	},
+};
+
+static int
+tu104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+		    void *data, u32 size, struct nvkm_object **pobject)
+{
+	return nv50_disp_root_new_(&tu104_disp_root, disp, oclass,
+				   data, size, pobject);
+}
+
+const struct nvkm_disp_oclass
+tu104_disp_root_oclass = {
+	.base.oclass = TU104_DISP,
+	.base.minver = -1,
+	.base.maxver = -1,
+	.ctor = tu104_disp_root_new,
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
index 8ba881a729ee..b0597ff9a714 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgv100.c
@@ -23,7 +23,7 @@
 
 #include <subdev/timer.h>
 
-static void
+void
 gv100_sor_dp_watermark(struct nvkm_ior *sor, int head, u8 watermark)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
@@ -31,7 +31,7 @@ gv100_sor_dp_watermark(struct nvkm_ior *sor, int head, u8 watermark)
 	nvkm_mask(device, 0x616550 + hoff, 0x0c00003f, 0x08000000 | watermark);
 }
 
-static void
+void
 gv100_sor_dp_audio_sym(struct nvkm_ior *sor, int head, u16 h, u32 v)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
@@ -40,7 +40,7 @@ gv100_sor_dp_audio_sym(struct nvkm_ior *sor, int head, u16 h, u32 v)
 	nvkm_mask(device, 0x61656c + hoff, 0x00ffffff, v);
 }
 
-static void
+void
 gv100_sor_dp_audio(struct nvkm_ior *sor, int head, bool enable)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
@@ -54,7 +54,7 @@ gv100_sor_dp_audio(struct nvkm_ior *sor, int head, bool enable)
 	);
 }
 
-static void
+void
 gv100_sor_state(struct nvkm_ior *sor, struct nvkm_ior_state *state)
 {
 	struct nvkm_device *device = sor->disp->engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c
new file mode 100644
index 000000000000..df026a525ef1
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sortu104.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "ior.h"
+
+#include <subdev/timer.h>
+
+static void
+tu104_sor_dp_vcpi(struct nvkm_ior *sor, int head,
+		  u8 slot, u8 slot_nr, u16 pbn, u16 aligned)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 hoff = head * 0x800;
+
+	nvkm_mask(device, 0x61657c + hoff, 0xffffffff, (aligned << 16) | pbn);
+	nvkm_mask(device, 0x616578 + hoff, 0x00003f3f, (slot_nr << 8) | slot);
+}
+
+static int
+tu104_sor_dp_links(struct nvkm_ior *sor, struct nvkm_i2c_aux *aux)
+{
+	struct nvkm_device *device = sor->disp->engine.subdev.device;
+	const u32 soff = nv50_ior_base(sor);
+	const u32 loff = nv50_sor_link(sor);
+	u32 dpctrl = 0x00000000;
+	u32 clksor = 0x00000000;
+
+	clksor |= sor->dp.bw << 18;
+	dpctrl |= ((1 << sor->dp.nr) - 1) << 16;
+	if (sor->dp.mst)
+		dpctrl |= 0x40000000;
+	if (sor->dp.ef)
+		dpctrl |= 0x00004000;
+
+	nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor);
+
+	/*XXX*/
+	nvkm_msec(device, 40, NVKM_DELAY);
+	nvkm_mask(device, 0x612300 + soff, 0x00030000, 0x00010000);
+	nvkm_mask(device, 0x61c10c + loff, 0x00000003, 0x00000001);
+
+	nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl);
+	return 0;
+}
+
+static const struct nvkm_ior_func
+tu104_sor = {
+	.route = {
+		.get = gm200_sor_route_get,
+		.set = gm200_sor_route_set,
+	},
+	.state = gv100_sor_state,
+	.power = nv50_sor_power,
+	.clock = gf119_sor_clock,
+	.hdmi = {
+		.ctrl = gv100_hdmi_ctrl,
+	},
+	.dp = {
+		.lanes = { 0, 1, 2, 3 },
+		.links = tu104_sor_dp_links,
+		.power = g94_sor_dp_power,
+		.pattern = gm107_sor_dp_pattern,
+		.drive = gm200_sor_dp_drive,
+		.vcpi = tu104_sor_dp_vcpi,
+		.audio = gv100_sor_dp_audio,
+		.audio_sym = gv100_sor_dp_audio_sym,
+		.watermark = gv100_sor_dp_watermark,
+	},
+	.hda = {
+		.hpd = gf119_hda_hpd,
+		.eld = gf119_hda_eld,
+	},
+};
+
+int
+tu104_sor_new(struct nvkm_disp *disp, int id)
+{
+	return nvkm_ior_new_(&tu104_sor, disp, SOR, id);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c
new file mode 100644
index 000000000000..13fa21459d38
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/tu104.c
@@ -0,0 +1,152 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+#include "head.h"
+#include "ior.h"
+#include "channv50.h"
+#include "rootnv50.h"
+
+#include <core/gpuobj.h>
+#include <subdev/timer.h>
+
+static int
+tu104_disp_init(struct nv50_disp *disp)
+{
+	struct nvkm_device *device = disp->base.engine.subdev.device;
+	struct nvkm_head *head;
+	int i, j;
+	u32 tmp;
+
+	/* Claim ownership of display. */
+	if (nvkm_rd32(device, 0x6254e8) & 0x00000002) {
+		nvkm_mask(device, 0x6254e8, 0x00000001, 0x00000000);
+		if (nvkm_msec(device, 2000,
+			if (!(nvkm_rd32(device, 0x6254e8) & 0x00000002))
+				break;
+		) < 0)
+			return -EBUSY;
+	}
+
+	/* Lock pin capabilities. */
+	tmp = 0x00000021; /*XXX*/
+	nvkm_wr32(device, 0x640008, tmp);
+
+	/* SOR capabilities. */
+	for (i = 0; i < disp->sor.nr; i++) {
+		tmp = nvkm_rd32(device, 0x61c000 + (i * 0x800));
+		nvkm_mask(device, 0x640000, 0x00000100 << i, 0x00000100 << i);
+		nvkm_wr32(device, 0x640144 + (i * 0x08), tmp);
+	}
+
+	/* Head capabilities. */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const int id = head->id;
+
+		/* RG. */
+		tmp = nvkm_rd32(device, 0x616300 + (id * 0x800));
+		nvkm_wr32(device, 0x640048 + (id * 0x020), tmp);
+
+		/* POSTCOMP. */
+		for (j = 0; j < 5 * 4; j += 4) {
+			tmp = nvkm_rd32(device, 0x616140 + (id * 0x800) + j);
+			nvkm_wr32(device, 0x640680 + (id * 0x20) + j, tmp);
+		}
+	}
+
+	/* Window capabilities. */
+	for (i = 0; i < disp->wndw.nr; i++) {
+		nvkm_mask(device, 0x640004, 1 << i, 1 << i);
+		for (j = 0; j < 6 * 4; j += 4) {
+			tmp = nvkm_rd32(device, 0x630100 + (i * 0x800) + j);
+			nvkm_mask(device, 0x640780 + (i * 0x20) + j, 0xffffffff, tmp);
+		}
+		nvkm_mask(device, 0x64000c, 0x00000100, 0x00000100);
+	}
+
+	/* IHUB capabilities. */
+	for (i = 0; i < 3; i++) {
+		tmp = nvkm_rd32(device, 0x62e000 + (i * 0x04));
+		nvkm_wr32(device, 0x640010 + (i * 0x04), tmp);
+	}
+
+	nvkm_mask(device, 0x610078, 0x00000001, 0x00000001);
+
+	/* Setup instance memory. */
+	switch (nvkm_memory_target(disp->inst->memory)) {
+	case NVKM_MEM_TARGET_VRAM: tmp = 0x00000001; break;
+	case NVKM_MEM_TARGET_NCOH: tmp = 0x00000002; break;
+	case NVKM_MEM_TARGET_HOST: tmp = 0x00000003; break;
+	default:
+		break;
+	}
+	nvkm_wr32(device, 0x610010, 0x00000008 | tmp);
+	nvkm_wr32(device, 0x610014, disp->inst->addr >> 16);
+
+	/* CTRL_DISP: AWAKEN, ERROR, SUPERVISOR[1-3]. */
+	nvkm_wr32(device, 0x611cf0, 0x00000187); /* MSK. */
+	nvkm_wr32(device, 0x611db0, 0x00000187); /* EN. */
+
+	/* EXC_OTHER: CURSn, CORE. */
+	nvkm_wr32(device, 0x611cec, disp->head.mask << 16 |
+				    0x00000001); /* MSK. */
+	nvkm_wr32(device, 0x611dac, 0x00000000); /* EN. */
+
+	/* EXC_WINIM. */
+	nvkm_wr32(device, 0x611ce8, disp->wndw.mask); /* MSK. */
+	nvkm_wr32(device, 0x611da8, 0x00000000); /* EN. */
+
+	/* EXC_WIN. */
+	nvkm_wr32(device, 0x611ce4, disp->wndw.mask); /* MSK. */
+	nvkm_wr32(device, 0x611da4, 0x00000000); /* EN. */
+
+	/* HEAD_TIMING(n): VBLANK. */
+	list_for_each_entry(head, &disp->base.head, head) {
+		const u32 hoff = head->id * 4;
+		nvkm_wr32(device, 0x611cc0 + hoff, 0x00000004); /* MSK. */
+		nvkm_wr32(device, 0x611d80 + hoff, 0x00000000); /* EN. */
+	}
+
+	/* OR. */
+	nvkm_wr32(device, 0x611cf4, 0x00000000); /* MSK. */
+	nvkm_wr32(device, 0x611db4, 0x00000000); /* EN. */
+	return 0;
+}
+
+static const struct nv50_disp_func
+tu104_disp = {
+	.init = tu104_disp_init,
+	.fini = gv100_disp_fini,
+	.intr = gv100_disp_intr,
+	.uevent = &gv100_disp_chan_uevent,
+	.super = gv100_disp_super,
+	.root = &tu104_disp_root_oclass,
+	.wndw = { .cnt = gv100_disp_wndw_cnt },
+	.head = { .cnt = gv100_head_cnt, .new = gv100_head_new },
+	.sor = { .cnt = gv100_sor_cnt, .new = tu104_sor_new },
+	.ramht_size = 0x2000,
+};
+
+int
+tu104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+{
+	return nv50_disp_new_(&tu104_disp, device, index, pdisp);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c
index 98911805aabf..5d3b641dbb14 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/wndwgv100.c
@@ -118,7 +118,7 @@ gv100_disp_wndw_mthd_base = {
 
 const struct nv50_disp_chan_mthd
 gv100_disp_wndw_mthd = {
-	.name = "Base",
+	.name = "Window",
 	.addr = 0x001000,
 	.prev = 0x000800,
 	.data = {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
index 816ccaedfc73..8675613e142b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/falcon.c
@@ -22,6 +22,7 @@
 #include <engine/falcon.h>
 
 #include <core/gpuobj.h>
+#include <subdev/mc.h>
 #include <subdev/timer.h>
 #include <engine/fifo.h>
 
@@ -107,8 +108,10 @@ nvkm_falcon_fini(struct nvkm_engine *engine, bool suspend)
 		}
 	}
 
-	nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000);
-	nvkm_wr32(device, base + 0x014, 0xffffffff);
+	if (nvkm_mc_enabled(device, engine->subdev.index)) {
+		nvkm_mask(device, base + 0x048, 0x00000003, 0x00000000);
+		nvkm_wr32(device, base + 0x014, 0xffffffff);
+	}
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
index f00408577a6a..87d8e054e40a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/Kbuild
@@ -16,6 +16,7 @@ nvkm-y += nvkm/engine/fifo/gm20b.o
 nvkm-y += nvkm/engine/fifo/gp100.o
 nvkm-y += nvkm/engine/fifo/gp10b.o
 nvkm-y += nvkm/engine/fifo/gv100.o
+nvkm-y += nvkm/engine/fifo/tu104.o
 
 nvkm-y += nvkm/engine/fifo/chan.o
 nvkm-y += nvkm/engine/fifo/channv50.o
@@ -33,5 +34,7 @@ nvkm-y += nvkm/engine/fifo/gpfifog84.o
 nvkm-y += nvkm/engine/fifo/gpfifogf100.o
 nvkm-y += nvkm/engine/fifo/gpfifogk104.o
 nvkm-y += nvkm/engine/fifo/gpfifogv100.o
+nvkm-y += nvkm/engine/fifo/gpfifotu104.o
 
 nvkm-y += nvkm/engine/fifo/usergv100.o
+nvkm-y += nvkm/engine/fifo/usertu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index 3ffef236189e..2c7c5afc1ea5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -17,6 +17,7 @@ struct nvkm_fifo_chan_func {
 			    bool suspend);
 	int  (*object_ctor)(struct nvkm_fifo_chan *, struct nvkm_object *);
 	void (*object_dtor)(struct nvkm_fifo_chan *, int);
+	u32 (*submit_token)(struct nvkm_fifo_chan *);
 };
 
 int nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *, struct nvkm_fifo *,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
index 8e28ba6b2307..a14545d871d8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/changk104.h
@@ -14,6 +14,8 @@ struct gk104_fifo_chan {
 	struct list_head head;
 	bool killed;
 
+	struct nvkm_memory *mthd;
+
 	struct {
 		struct nvkm_gpuobj *inst;
 		struct nvkm_vma *vma;
@@ -36,4 +38,15 @@ int gk104_fifo_gpfifo_kick_locked(struct gk104_fifo_chan *);
 
 int gv100_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
 			  void *data, u32 size, struct nvkm_object **);
+int gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *,
+			   struct gk104_fifo *, u64 *, u16 *, u64, u64, u64,
+			   u64 *, bool, u32 *, const struct nvkm_oclass *,
+			   struct nvkm_object **);
+int gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *,
+				  struct nvkm_engine *);
+int gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *,
+				  struct nvkm_engine *, bool);
+
+int tu104_fifo_gpfifo_new(struct gk104_fifo *, const struct nvkm_oclass *,
+			  void *data, u32 size, struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index f69576868164..10a2e7039a75 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -346,10 +346,10 @@ gf100_fifo_intr_fault(struct gf100_fifo *fifo, int unit)
 	if (eu && eu->data2) {
 		switch (eu->data2) {
 		case NVKM_SUBDEV_BAR:
-			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
+			nvkm_bar_bar1_reset(device);
 			break;
 		case NVKM_SUBDEV_INSTMEM:
-			nvkm_mask(device, 0x001714, 0x00000000, 0x00000000);
+			nvkm_bar_bar2_reset(device);
 			break;
 		case NVKM_ENGINE_IFB:
 			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index afccf9721cf0..1053fe796466 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -149,16 +149,41 @@ gk104_fifo_uevent_init(struct nvkm_fifo *fifo)
 }
 
 void
-gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
+gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
+			  struct nvkm_memory *mem, int nr)
+{
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	int target;
+
+	switch (nvkm_memory_target(mem)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
+				    (target << 28));
+	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
+
+	if (nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000))
+			break;
+	) < 0)
+		nvkm_error(subdev, "runlist %d update timeout\n", runl);
+}
+
+void
+gk104_fifo_runlist_update(struct gk104_fifo *fifo, int runl)
 {
 	const struct gk104_fifo_runlist_func *func = fifo->func->runlist;
 	struct gk104_fifo_chan *chan;
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
-	struct nvkm_device *device = subdev->device;
 	struct nvkm_memory *mem;
 	struct nvkm_fifo_cgrp *cgrp;
 	int nr = 0;
-	int target;
 
 	mutex_lock(&subdev->mutex);
 	mem = fifo->runlist[runl].mem[fifo->runlist[runl].next];
@@ -177,24 +202,7 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	}
 	nvkm_done(mem);
 
-	switch (nvkm_memory_target(mem)) {
-	case NVKM_MEM_TARGET_VRAM: target = 0; break;
-	case NVKM_MEM_TARGET_NCOH: target = 3; break;
-	default:
-		WARN_ON(1);
-		goto unlock;
-	}
-
-	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
-				    (target << 28));
-	nvkm_wr32(device, 0x002274, (runl << 20) | nr);
-
-	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x002284 + (runl * 0x08)) & 0x00100000))
-			break;
-	) < 0)
-		nvkm_error(subdev, "runlist %d update timeout\n", runl);
-unlock:
+	func->commit(fifo, runl, mem, nr);
 	mutex_unlock(&subdev->mutex);
 }
 
@@ -238,6 +246,29 @@ const struct gk104_fifo_runlist_func
 gk104_fifo_runlist = {
 	.size = 8,
 	.chan = gk104_fifo_runlist_chan,
+	.commit = gk104_fifo_runlist_commit,
+};
+
+void
+gk104_fifo_pbdma_init(struct gk104_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
+}
+
+int
+gk104_fifo_pbdma_nr(struct gk104_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	/* Determine number of PBDMAs by checking valid enable bits. */
+	nvkm_wr32(device, 0x000204, 0xffffffff);
+	return hweight32(nvkm_rd32(device, 0x000204));
+}
+
+const struct gk104_fifo_pbdma_func
+gk104_fifo_pbdma = {
+	.nr = gk104_fifo_pbdma_nr,
+	.init = gk104_fifo_pbdma_init,
 };
 
 static void
@@ -267,7 +298,7 @@ gk104_fifo_recover_work(struct work_struct *w)
 	}
 
 	for (todo = runm; runl = __ffs(todo), todo; todo &= ~BIT(runl))
-		gk104_fifo_runlist_commit(fifo, runl);
+		gk104_fifo_runlist_update(fifo, runl);
 
 	nvkm_wr32(device, 0x00262c, runm);
 	nvkm_mask(device, 0x002630, runm, 0x00000000);
@@ -456,10 +487,10 @@ gk104_fifo_fault(struct nvkm_fifo *base, struct nvkm_fault_data *info)
 	if (ee && ee->data2) {
 		switch (ee->data2) {
 		case NVKM_SUBDEV_BAR:
-			nvkm_mask(device, 0x001704, 0x00000000, 0x00000000);
+			nvkm_bar_bar1_reset(device);
 			break;
 		case NVKM_SUBDEV_INSTMEM:
-			nvkm_mask(device, 0x001714, 0x00000000, 0x00000000);
+			nvkm_bar_bar2_reset(device);
 			break;
 		case NVKM_ENGINE_IFB:
 			nvkm_mask(device, 0x001718, 0x00000000, 0x00000000);
@@ -904,9 +935,7 @@ gk104_fifo_oneinit(struct nvkm_fifo *base)
 	enum nvkm_devidx engidx;
 	u32 *map;
 
-	/* Determine number of PBDMAs by checking valid enable bits. */
-	nvkm_wr32(device, 0x000204, 0xffffffff);
-	fifo->pbdma_nr = hweight32(nvkm_rd32(device, 0x000204));
+	fifo->pbdma_nr = fifo->func->pbdma->nr(fifo);
 	nvkm_debug(subdev, "%d PBDMA(s)\n", fifo->pbdma_nr);
 
 	/* Read PBDMA->runlist(s) mapping from HW. */
@@ -978,7 +1007,7 @@ gk104_fifo_init(struct nvkm_fifo *base)
 	int i;
 
 	/* Enable PBDMAs. */
-	nvkm_wr32(device, 0x000204, (1 << fifo->pbdma_nr) - 1);
+	fifo->func->pbdma->init(fifo);
 
 	/* PBDMA[n] */
 	for (i = 0; i < fifo->pbdma_nr; i++) {
@@ -995,8 +1024,8 @@ gk104_fifo_init(struct nvkm_fifo *base)
 
 	nvkm_wr32(device, 0x002254, 0x10000000 | fifo->user.bar->addr >> 12);
 
-	if (fifo->func->init_pbdma_timeout)
-		fifo->func->init_pbdma_timeout(fifo);
+	if (fifo->func->pbdma->init_timeout)
+		fifo->func->pbdma->init_timeout(fifo);
 
 	nvkm_wr32(device, 0x002100, 0xffffffff);
 	nvkm_wr32(device, 0x002140, 0x7fffffff);
@@ -1175,6 +1204,7 @@ gk104_fifo_fault_gpcclient[] = {
 
 static const struct gk104_fifo_func
 gk104_fifo = {
+	.pbdma = &gk104_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
index d295b81e18d6..d4e565658f46 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.h
@@ -45,7 +45,11 @@ struct gk104_fifo {
 };
 
 struct gk104_fifo_func {
-	void (*init_pbdma_timeout)(struct gk104_fifo *);
+	const struct gk104_fifo_pbdma_func {
+		int (*nr)(struct gk104_fifo *);
+		void (*init)(struct gk104_fifo *);
+		void (*init_timeout)(struct gk104_fifo *);
+	} *pbdma;
 
 	struct {
 		const struct nvkm_enum *access;
@@ -61,6 +65,8 @@ struct gk104_fifo_func {
 			     struct nvkm_memory *, u32 offset);
 		void (*chan)(struct gk104_fifo_chan *,
 			     struct nvkm_memory *, u32 offset);
+		void (*commit)(struct gk104_fifo *, int runl,
+			       struct nvkm_memory *, int entries);
 	} *runlist;
 
 	struct gk104_fifo_user_user {
@@ -81,8 +87,11 @@ int gk104_fifo_new_(const struct gk104_fifo_func *, struct nvkm_device *,
 		    int index, int nr, struct nvkm_fifo **);
 void gk104_fifo_runlist_insert(struct gk104_fifo *, struct gk104_fifo_chan *);
 void gk104_fifo_runlist_remove(struct gk104_fifo *, struct gk104_fifo_chan *);
-void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl);
+void gk104_fifo_runlist_update(struct gk104_fifo *, int runl);
 
+extern const struct gk104_fifo_pbdma_func gk104_fifo_pbdma;
+int gk104_fifo_pbdma_nr(struct gk104_fifo *);
+void gk104_fifo_pbdma_init(struct gk104_fifo *);
 extern const struct nvkm_enum gk104_fifo_fault_access[];
 extern const struct nvkm_enum gk104_fifo_fault_engine[];
 extern const struct nvkm_enum gk104_fifo_fault_reason[];
@@ -91,15 +100,30 @@ extern const struct nvkm_enum gk104_fifo_fault_gpcclient[];
 extern const struct gk104_fifo_runlist_func gk104_fifo_runlist;
 void gk104_fifo_runlist_chan(struct gk104_fifo_chan *,
 			     struct nvkm_memory *, u32);
+void gk104_fifo_runlist_commit(struct gk104_fifo *, int runl,
+			       struct nvkm_memory *, int);
 
 extern const struct gk104_fifo_runlist_func gk110_fifo_runlist;
 void gk110_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
 			     struct nvkm_memory *, u32);
 
-void gk208_fifo_init_pbdma_timeout(struct gk104_fifo *);
+extern const struct gk104_fifo_pbdma_func gk208_fifo_pbdma;
+void gk208_fifo_pbdma_init_timeout(struct gk104_fifo *);
 
 extern const struct nvkm_enum gm107_fifo_fault_engine[];
 extern const struct gk104_fifo_runlist_func gm107_fifo_runlist;
 
+extern const struct gk104_fifo_pbdma_func gm200_fifo_pbdma;
+int gm200_fifo_pbdma_nr(struct gk104_fifo *);
+
 extern const struct nvkm_enum gp100_fifo_fault_engine[];
+
+extern const struct nvkm_enum gv100_fifo_fault_access[];
+extern const struct nvkm_enum gv100_fifo_fault_reason[];
+extern const struct nvkm_enum gv100_fifo_fault_hubclient[];
+extern const struct nvkm_enum gv100_fifo_fault_gpcclient[];
+void gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *,
+			     struct nvkm_memory *, u32);
+void gv100_fifo_runlist_chan(struct gk104_fifo_chan *,
+			     struct nvkm_memory *, u32);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
index ac7655a130fb..8adfa6b182cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk110.c
@@ -43,10 +43,12 @@ gk110_fifo_runlist = {
 	.size = 8,
 	.cgrp = gk110_fifo_runlist_cgrp,
 	.chan = gk104_fifo_runlist_chan,
+	.commit = gk104_fifo_runlist_commit,
 };
 
 static const struct gk104_fifo_func
 gk110_fifo = {
+	.pbdma = &gk104_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
index 5ea7e452cc66..9553fb4af601 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk208.c
@@ -27,7 +27,7 @@
 #include <nvif/class.h>
 
 void
-gk208_fifo_init_pbdma_timeout(struct gk104_fifo *fifo)
+gk208_fifo_pbdma_init_timeout(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	int i;
@@ -36,9 +36,16 @@ gk208_fifo_init_pbdma_timeout(struct gk104_fifo *fifo)
 		nvkm_wr32(device, 0x04012c + (i * 0x2000), 0x0000ffff);
 }
 
+const struct gk104_fifo_pbdma_func
+gk208_fifo_pbdma = {
+	.nr = gk104_fifo_pbdma_nr,
+	.init = gk104_fifo_pbdma_init,
+	.init_timeout = gk208_fifo_pbdma_init_timeout,
+};
+
 static const struct gk104_fifo_func
 gk208_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gk208_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
index 535a0eb67a5f..a4c6ac3cd6c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk20a.c
@@ -26,7 +26,7 @@
 
 static const struct gk104_fifo_func
 gk20a_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gk208_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gk104_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
index 79ae19b1db67..acf230764cb0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -41,6 +41,7 @@ gm107_fifo_runlist = {
 	.size = 8,
 	.cgrp = gk110_fifo_runlist_cgrp,
 	.chan = gm107_fifo_runlist_chan,
+	.commit = gk104_fifo_runlist_commit,
 };
 
 const struct nvkm_enum
@@ -68,7 +69,7 @@ gm107_fifo_fault_engine[] = {
 
 static const struct gk104_fifo_func
 gm107_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gk208_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
index 49565faa854d..b96c1c5d6577 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm200.c
@@ -26,9 +26,23 @@
 
 #include <nvif/class.h>
 
+int
+gm200_fifo_pbdma_nr(struct gk104_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	return nvkm_rd32(device, 0x002004) & 0x000000ff;
+}
+
+const struct gk104_fifo_pbdma_func
+gm200_fifo_pbdma = {
+	.nr = gm200_fifo_pbdma_nr,
+	.init = gk104_fifo_pbdma_init,
+	.init_timeout = gk208_fifo_pbdma_init_timeout,
+};
+
 static const struct gk104_fifo_func
 gm200_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gm200_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
index 46736513bd11..a49539b9e4ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm20b.c
@@ -26,7 +26,7 @@
 
 static const struct gk104_fifo_func
 gm20b_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gm200_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gm107_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
index e2f8f9087d7c..54377e0f6a88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -52,7 +52,7 @@ gp100_fifo_fault_engine[] = {
 
 static const struct gk104_fifo_func
 gp100_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gm200_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gp100_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
index 7733bf7c6545..778ba7e46fb3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp10b.c
@@ -26,7 +26,7 @@
 
 static const struct gk104_fifo_func
 gp10b_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gm200_fifo_pbdma,
 	.fault.access = gk104_fifo_fault_access,
 	.fault.engine = gp100_fifo_fault_engine,
 	.fault.reason = gk104_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index 118b37aea318..728a1edbf98c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -85,7 +85,7 @@ gk104_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 	case NVKM_ENGINE_MSVLD : return 0x0270;
 	case NVKM_ENGINE_VIC   : return 0x0280;
 	case NVKM_ENGINE_MSENC : return 0x0290;
-	case NVKM_ENGINE_NVDEC : return 0x02100270;
+	case NVKM_ENGINE_NVDEC0: return 0x02100270;
 	case NVKM_ENGINE_NVENC0: return 0x02100290;
 	case NVKM_ENGINE_NVENC1: return 0x0210;
 	default:
@@ -192,7 +192,7 @@ gk104_fifo_gpfifo_fini(struct nvkm_fifo_chan *base)
 		gk104_fifo_runlist_remove(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000800, 0x00000800);
 		gk104_fifo_gpfifo_kick(chan);
-		gk104_fifo_runlist_commit(fifo, chan->runl);
+		gk104_fifo_runlist_update(fifo, chan->runl);
 	}
 
 	nvkm_wr32(device, 0x800000 + coff, 0x00000000);
@@ -213,7 +213,7 @@ gk104_fifo_gpfifo_init(struct nvkm_fifo_chan *base)
 	if (list_empty(&chan->head) && !chan->killed) {
 		gk104_fifo_runlist_insert(fifo, chan);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
-		gk104_fifo_runlist_commit(fifo, chan->runl);
+		gk104_fifo_runlist_update(fifo, chan->runl);
 		nvkm_mask(device, 0x800004 + coff, 0x00000400, 0x00000400);
 	}
 }
@@ -222,6 +222,7 @@ void *
 gk104_fifo_gpfifo_dtor(struct nvkm_fifo_chan *base)
 {
 	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	nvkm_memory_unref(&chan->mthd);
 	kfree(chan->cgrp);
 	return chan;
 }
@@ -240,7 +241,7 @@ gk104_fifo_gpfifo_func = {
 
 static int
 gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength,
+		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
 		       const struct nvkm_oclass *oclass,
 		       struct nvkm_object **pobject)
 {
@@ -279,6 +280,7 @@ gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 		return ret;
 
 	*chid = chan->base.chid;
+	*inst = chan->base.inst->addr;
 
 	/* Hack to support GPUs where even individual channels should be
 	 * part of a channel group.
@@ -315,6 +317,7 @@ gk104_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 	nvkm_wo32(chan->base.inst, 0x94, 0x30000001);
 	nvkm_wo32(chan->base.inst, 0x9c, 0x00000100);
 	nvkm_wo32(chan->base.inst, 0xac, 0x0000001f);
+	nvkm_wo32(chan->base.inst, 0xe4, priv ? 0x00000020 : 0x00000000);
 	nvkm_wo32(chan->base.inst, 0xe8, chan->base.chid);
 	nvkm_wo32(chan->base.inst, 0xb8, 0xf8000000);
 	nvkm_wo32(chan->base.inst, 0xf8, 0x10003080); /* 0x002310 */
@@ -337,15 +340,19 @@ gk104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
 				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx\n",
+				   "runlist %016llx priv %d\n",
 			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist);
+			   args->v0.ilength, args->v0.runlist, args->v0.priv);
+		if (args->v0.priv && !oclass->client->super)
+			return -EINVAL;
 		return gk104_fifo_gpfifo_new_(fifo,
 					      &args->v0.runlist,
 					      &args->v0.chid,
 					       args->v0.vmm,
 					       args->v0.ioffset,
 					       args->v0.ilength,
+					      &args->v0.inst,
+					       args->v0.priv,
 					      oclass, pobject);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
index 9598853ced56..a7462cf59d65 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogv100.c
@@ -25,9 +25,15 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 
-#include <nvif/cla06f.h>
+#include <nvif/clc36f.h>
 #include <nvif/unpack.h>
 
+static u32
+gv100_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *chan)
+{
+	return chan->chid;
+}
+
 static int
 gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid)
 {
@@ -56,7 +62,7 @@ gv100_fifo_gpfifo_engine_valid(struct gk104_fifo_chan *chan, bool ce, bool valid
 	return ret;
 }
 
-static int
+int
 gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine, bool suspend)
 {
@@ -79,7 +85,7 @@ gv100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 	return ret;
 }
 
-static int
+int
 gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 			      struct nvkm_engine *engine)
 {
@@ -100,8 +106,8 @@ gv100_fifo_gpfifo_engine_init(struct nvkm_fifo_chan *base,
 	return gv100_fifo_gpfifo_engine_valid(chan, false, true);
 }
 
-const struct nvkm_fifo_chan_func
-gv100_fifo_gpfifo_func = {
+static const struct nvkm_fifo_chan_func
+gv100_fifo_gpfifo = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
@@ -110,19 +116,23 @@ gv100_fifo_gpfifo_func = {
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gv100_fifo_gpfifo_engine_init,
 	.engine_fini = gv100_fifo_gpfifo_engine_fini,
+	.submit_token = gv100_fifo_gpfifo_submit_token,
 };
 
-static int
-gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
-		       u64 vmm, u64 ioffset, u64 ilength,
-		       const struct nvkm_oclass *oclass,
+int
+gv100_fifo_gpfifo_new_(const struct nvkm_fifo_chan_func *func,
+		       struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
+		       u64 vmm, u64 ioffset, u64 ilength, u64 *inst, bool priv,
+		       u32 *token, const struct nvkm_oclass *oclass,
 		       struct nvkm_object **pobject)
 {
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
 	struct gk104_fifo_chan *chan;
 	int runlist = ffs(*runlists) -1, ret, i;
 	unsigned long engm;
 	u64 subdevs = 0;
-	u64 usermem;
+	u64 usermem, mthd;
+	u32 size;
 
 	if (!vmm || runlist < 0 || runlist >= fifo->runlist_nr)
 		return -EINVAL;
@@ -142,14 +152,15 @@ gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 	chan->runl = runlist;
 	INIT_LIST_HEAD(&chan->head);
 
-	ret = nvkm_fifo_chan_ctor(&gv100_fifo_gpfifo_func, &fifo->base,
-				  0x1000, 0x1000, true, vmm, 0, subdevs,
-				  1, fifo->user.bar->addr, 0x200,
+	ret = nvkm_fifo_chan_ctor(func, &fifo->base, 0x1000, 0x1000, true, vmm,
+				  0, subdevs, 1, fifo->user.bar->addr, 0x200,
 				  oclass, &chan->base);
 	if (ret)
 		return ret;
 
 	*chid = chan->base.chid;
+	*inst = chan->base.inst->addr;
+	*token = chan->base.func->submit_token(&chan->base);
 
 	/* Hack to support GPUs where even individual channels should be
 	 * part of a channel group.
@@ -173,6 +184,20 @@ gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 	nvkm_done(fifo->user.mem);
 	usermem = nvkm_memory_addr(fifo->user.mem) + usermem;
 
+	/* Allocate fault method buffer (magics come from nvgpu). */
+	size = nvkm_rd32(device, 0x104028); /* NV_PCE_PCE_MAP */
+	size = 27 * 5 * (((9 + 1 + 3) * hweight32(size)) + 2);
+	size = roundup(size, PAGE_SIZE);
+
+	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, size, 0x1000, true,
+			      &chan->mthd);
+	if (ret)
+		return ret;
+
+	mthd = nvkm_memory_bar2(chan->mthd);
+	if (mthd == ~0ULL)
+		return -EFAULT;
+
 	/* RAMFC */
 	nvkm_kmap(chan->base.inst);
 	nvkm_wo32(chan->base.inst, 0x008, lower_32_bits(usermem));
@@ -184,13 +209,13 @@ gv100_fifo_gpfifo_new_(struct gk104_fifo *fifo, u64 *runlists, u16 *chid,
 					  (ilength << 16));
 	nvkm_wo32(chan->base.inst, 0x084, 0x20400000);
 	nvkm_wo32(chan->base.inst, 0x094, 0x30000001);
-	nvkm_wo32(chan->base.inst, 0x0e4, 0x00000020);
+	nvkm_wo32(chan->base.inst, 0x0e4, priv ? 0x00000020 : 0x00000000);
 	nvkm_wo32(chan->base.inst, 0x0e8, chan->base.chid);
-	nvkm_wo32(chan->base.inst, 0x0f4, 0x00001100);
+	nvkm_wo32(chan->base.inst, 0x0f4, 0x00001000);
 	nvkm_wo32(chan->base.inst, 0x0f8, 0x10003080);
 	nvkm_mo32(chan->base.inst, 0x218, 0x00000000, 0x00000000);
-	nvkm_wo32(chan->base.inst, 0x220, 0x020a1000);
-	nvkm_wo32(chan->base.inst, 0x224, 0x00000000);
+	nvkm_wo32(chan->base.inst, 0x220, lower_32_bits(mthd));
+	nvkm_wo32(chan->base.inst, 0x224, upper_32_bits(mthd));
 	nvkm_done(chan->base.inst);
 	return gv100_fifo_gpfifo_engine_valid(chan, true, true);
 }
@@ -201,7 +226,7 @@ gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 {
 	struct nvkm_object *parent = oclass->parent;
 	union {
-		struct kepler_channel_gpfifo_a_v0 v0;
+		struct volta_channel_gpfifo_a_v0 v0;
 	} *args = data;
 	int ret = -ENOSYS;
 
@@ -209,15 +234,20 @@ gv100_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
 		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
 				   "ioffset %016llx ilength %08x "
-				   "runlist %016llx\n",
+				   "runlist %016llx priv %d\n",
 			   args->v0.version, args->v0.vmm, args->v0.ioffset,
-			   args->v0.ilength, args->v0.runlist);
-		return gv100_fifo_gpfifo_new_(fifo,
+			   args->v0.ilength, args->v0.runlist, args->v0.priv);
+		if (args->v0.priv && !oclass->client->super)
+			return -EINVAL;
+		return gv100_fifo_gpfifo_new_(&gv100_fifo_gpfifo, fifo,
 					      &args->v0.runlist,
 					      &args->v0.chid,
 					       args->v0.vmm,
 					       args->v0.ioffset,
 					       args->v0.ilength,
+					      &args->v0.inst,
+					       args->v0.priv,
+					      &args->v0.token,
 					      oclass, pobject);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c
new file mode 100644
index 000000000000..ff70484dd01a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifotu104.c
@@ -0,0 +1,83 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "changk104.h"
+#include "cgrp.h"
+
+#include <core/client.h>
+#include <core/gpuobj.h>
+
+#include <nvif/clc36f.h>
+#include <nvif/unpack.h>
+
+static u32
+tu104_fifo_gpfifo_submit_token(struct nvkm_fifo_chan *base)
+{
+	struct gk104_fifo_chan *chan = gk104_fifo_chan(base);
+	return (chan->runl << 16) | chan->base.chid;
+}
+
+static const struct nvkm_fifo_chan_func
+tu104_fifo_gpfifo = {
+	.dtor = gk104_fifo_gpfifo_dtor,
+	.init = gk104_fifo_gpfifo_init,
+	.fini = gk104_fifo_gpfifo_fini,
+	.ntfy = gf100_fifo_chan_ntfy,
+	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
+	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
+	.engine_init = gv100_fifo_gpfifo_engine_init,
+	.engine_fini = gv100_fifo_gpfifo_engine_fini,
+	.submit_token = tu104_fifo_gpfifo_submit_token,
+};
+
+int
+tu104_fifo_gpfifo_new(struct gk104_fifo *fifo, const struct nvkm_oclass *oclass,
+		      void *data, u32 size, struct nvkm_object **pobject)
+{
+	struct nvkm_object *parent = oclass->parent;
+	union {
+		struct volta_channel_gpfifo_a_v0 v0;
+	} *args = data;
+	int ret = -ENOSYS;
+
+	nvif_ioctl(parent, "create channel gpfifo size %d\n", size);
+	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+		nvif_ioctl(parent, "create channel gpfifo vers %d vmm %llx "
+				   "ioffset %016llx ilength %08x "
+				   "runlist %016llx priv %d\n",
+			   args->v0.version, args->v0.vmm, args->v0.ioffset,
+			   args->v0.ilength, args->v0.runlist, args->v0.priv);
+		if (args->v0.priv && !oclass->client->super)
+			return -EINVAL;
+		return gv100_fifo_gpfifo_new_(&tu104_fifo_gpfifo, fifo,
+					      &args->v0.runlist,
+					      &args->v0.chid,
+					       args->v0.vmm,
+					       args->v0.ioffset,
+					       args->v0.ilength,
+					      &args->v0.inst,
+					       args->v0.priv,
+					      &args->v0.token,
+					      oclass, pobject);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
index 4e1d159c0ae7..6ee1bb32a071 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gv100.c
@@ -28,7 +28,7 @@
 
 #include <nvif/class.h>
 
-static void
+void
 gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
 			struct nvkm_memory *memory, u32 offset)
 {
@@ -42,7 +42,7 @@ gv100_fifo_runlist_chan(struct gk104_fifo_chan *chan,
 	nvkm_wo32(memory, offset + 0xc, upper_32_bits(inst));
 }
 
-static void
+void
 gv100_fifo_runlist_cgrp(struct nvkm_fifo_cgrp *cgrp,
 			struct nvkm_memory *memory, u32 offset)
 {
@@ -57,9 +57,10 @@ gv100_fifo_runlist = {
 	.size = 16,
 	.cgrp = gv100_fifo_runlist_cgrp,
 	.chan = gv100_fifo_runlist_chan,
+	.commit = gk104_fifo_runlist_commit,
 };
 
-static const struct nvkm_enum
+const struct nvkm_enum
 gv100_fifo_fault_gpcclient[] = {
 	{ 0x00, "T1_0" },
 	{ 0x01, "T1_1" },
@@ -161,7 +162,7 @@ gv100_fifo_fault_gpcclient[] = {
 	{}
 };
 
-static const struct nvkm_enum
+const struct nvkm_enum
 gv100_fifo_fault_hubclient[] = {
 	{ 0x00, "VIP" },
 	{ 0x01, "CE0" },
@@ -223,7 +224,7 @@ gv100_fifo_fault_hubclient[] = {
 	{}
 };
 
-static const struct nvkm_enum
+const struct nvkm_enum
 gv100_fifo_fault_reason[] = {
 	{ 0x00, "PDE" },
 	{ 0x01, "PDE_SIZE" },
@@ -271,7 +272,7 @@ gv100_fifo_fault_engine[] = {
 	{}
 };
 
-static const struct nvkm_enum
+const struct nvkm_enum
 gv100_fifo_fault_access[] = {
 	{ 0x0, "VIRT_READ" },
 	{ 0x1, "VIRT_WRITE" },
@@ -287,7 +288,7 @@ gv100_fifo_fault_access[] = {
 
 static const struct gk104_fifo_func
 gv100_fifo = {
-	.init_pbdma_timeout = gk208_fifo_init_pbdma_timeout,
+	.pbdma = &gm200_fifo_pbdma,
 	.fault.access = gv100_fifo_fault_access,
 	.fault.engine = gv100_fifo_fault_engine,
 	.fault.reason = gv100_fifo_fault_reason,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c
new file mode 100644
index 000000000000..98c80705bc61
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/tu104.c
@@ -0,0 +1,116 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gk104.h"
+#include "cgrp.h"
+#include "changk104.h"
+#include "user.h"
+
+#include <core/gpuobj.h>
+
+#include <nvif/class.h>
+
+static void
+tu104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl,
+			  struct nvkm_memory *mem, int nr)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	u64 addr = nvkm_memory_addr(mem);
+	/*XXX: target? */
+
+	nvkm_wr32(device, 0x002b00 + (runl * 0x10), lower_32_bits(addr));
+	nvkm_wr32(device, 0x002b04 + (runl * 0x10), upper_32_bits(addr));
+	nvkm_wr32(device, 0x002b08 + (runl * 0x10), nr);
+
+	/*XXX: how to wait? can you even wait? */
+}
+
+const struct gk104_fifo_runlist_func
+tu104_fifo_runlist = {
+	.size = 16,
+	.cgrp = gv100_fifo_runlist_cgrp,
+	.chan = gv100_fifo_runlist_chan,
+	.commit = tu104_fifo_runlist_commit,
+};
+
+static const struct nvkm_enum
+tu104_fifo_fault_engine[] = {
+	{ 0x01, "DISPLAY" },
+	{ 0x03, "PTP" },
+	{ 0x06, "PWR_PMU" },
+	{ 0x08, "IFB", NULL, NVKM_ENGINE_IFB },
+	{ 0x09, "PERF" },
+	{ 0x1f, "PHYSICAL" },
+	{ 0x20, "HOST0" },
+	{ 0x21, "HOST1" },
+	{ 0x22, "HOST2" },
+	{ 0x23, "HOST3" },
+	{ 0x24, "HOST4" },
+	{ 0x25, "HOST5" },
+	{ 0x26, "HOST6" },
+	{ 0x27, "HOST7" },
+	{ 0x28, "HOST8" },
+	{ 0x29, "HOST9" },
+	{ 0x2a, "HOST10" },
+	{ 0x2b, "HOST11" },
+	{ 0x2c, "HOST12" },
+	{ 0x2d, "HOST13" },
+	{ 0x2e, "HOST14" },
+	{ 0x80, "BAR1", NULL, NVKM_SUBDEV_BAR },
+	{ 0xc0, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
+	{}
+};
+
+static void
+tu104_fifo_pbdma_init(struct gk104_fifo *fifo)
+{
+	struct nvkm_device *device = fifo->base.engine.subdev.device;
+	const u32 mask = (1 << fifo->pbdma_nr) - 1;
+	/*XXX: this is a bit of a guess at this point in time. */
+	nvkm_mask(device, 0xb65000, 0x80000fff, 0x80000000 | mask);
+}
+
+static const struct gk104_fifo_pbdma_func
+tu104_fifo_pbdma = {
+	.nr = gm200_fifo_pbdma_nr,
+	.init = tu104_fifo_pbdma_init,
+	.init_timeout = gk208_fifo_pbdma_init_timeout,
+};
+
+static const struct gk104_fifo_func
+tu104_fifo = {
+	.pbdma = &tu104_fifo_pbdma,
+	.fault.access = gv100_fifo_fault_access,
+	.fault.engine = tu104_fifo_fault_engine,
+	.fault.reason = gv100_fifo_fault_reason,
+	.fault.hubclient = gv100_fifo_fault_hubclient,
+	.fault.gpcclient = gv100_fifo_fault_gpcclient,
+	.runlist = &tu104_fifo_runlist,
+	.user = {{-1,-1,VOLTA_USERMODE_A       }, tu104_fifo_user_new   },
+	.chan = {{ 0, 0,TURING_CHANNEL_GPFIFO_A}, tu104_fifo_gpfifo_new },
+	.cgrp_force = true,
+};
+
+int
+tu104_fifo_new(struct nvkm_device *device, int index, struct nvkm_fifo **pfifo)
+{
+	return gk104_fifo_new_(&tu104_fifo, device, index, 4096, pfifo);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
index ed840921ebe8..14b0c6bde8eb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/user.h
@@ -3,4 +3,6 @@
 #include "priv.h"
 int gv100_fifo_user_new(const struct nvkm_oclass *, void *, u32,
 			struct nvkm_object **);
+int tu104_fifo_user_new(const struct nvkm_oclass *, void *, u32,
+			struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c
new file mode 100644
index 000000000000..8f98548a21f6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/usertu104.c
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "user.h"
+
+static int
+tu104_fifo_user_map(struct nvkm_object *object, void *argv, u32 argc,
+		    enum nvkm_object_map *type, u64 *addr, u64 *size)
+{
+	struct nvkm_device *device = object->engine->subdev.device;
+	*addr = 0xbb0000 + device->func->resource_addr(device, 0);
+	*size = 0x010000;
+	*type = NVKM_OBJECT_MAP_IO;
+	return 0;
+}
+
+static const struct nvkm_object_func
+tu104_fifo_user = {
+	.map = tu104_fifo_user_map,
+};
+
+int
+tu104_fifo_user_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		    struct nvkm_object **pobject)
+{
+	return nvkm_object_new_(&tu104_fifo_user, oclass, argv, argc, pobject);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
index 14be41f24155..427340153640 100644
--- a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -197,7 +197,7 @@ nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
 	case NVKM_SUBDEV_PMU:
 		debug_reg = 0xc08;
 		break;
-	case NVKM_ENGINE_NVDEC:
+	case NVKM_ENGINE_NVDEC0:
 		debug_reg = 0xd00;
 		break;
 	case NVKM_ENGINE_SEC2:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
index e5830453813d..ab0282dc0736 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/Kbuild
@@ -5,3 +5,4 @@ nvkm-y += nvkm/subdev/bar/gf100.o
 nvkm-y += nvkm/subdev/bar/gk20a.o
 nvkm-y += nvkm/subdev/bar/gm107.o
 nvkm-y += nvkm/subdev/bar/gm20b.o
+nvkm-y += nvkm/subdev/bar/tu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
index 243f0a5c8a62..209a6a40834a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/base.c
@@ -36,6 +36,16 @@ nvkm_bar_bar1_vmm(struct nvkm_device *device)
 	return device->bar->func->bar1.vmm(device->bar);
 }
 
+void
+nvkm_bar_bar1_reset(struct nvkm_device *device)
+{
+	struct nvkm_bar *bar = device->bar;
+	if (bar) {
+		bar->func->bar1.init(bar);
+		bar->func->bar1.wait(bar);
+	}
+}
+
 struct nvkm_vmm *
 nvkm_bar_bar2_vmm(struct nvkm_device *device)
 {
@@ -49,6 +59,16 @@ nvkm_bar_bar2_vmm(struct nvkm_device *device)
 }
 
 void
+nvkm_bar_bar2_reset(struct nvkm_device *device)
+{
+	struct nvkm_bar *bar = device->bar;
+	if (bar && bar->bar2) {
+		bar->func->bar2.init(bar);
+		bar->func->bar2.wait(bar);
+	}
+}
+
+void
 nvkm_bar_bar2_fini(struct nvkm_device *device)
 {
 	struct nvkm_bar *bar = device->bar;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c
new file mode 100644
index 000000000000..ecaead156e9b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/tu104.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "gf100.h"
+
+#include <core/memory.h>
+#include <subdev/timer.h>
+
+static void
+tu104_bar_bar2_wait(struct nvkm_bar *bar)
+{
+	struct nvkm_device *device = bar->subdev.device;
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0xb80f50) & 0x0000000c))
+			break;
+	);
+}
+
+static void
+tu104_bar_bar2_fini(struct nvkm_bar *bar)
+{
+	nvkm_mask(bar->subdev.device, 0xb80f48, 0x80000000, 0x00000000);
+}
+
+static void
+tu104_bar_bar2_init(struct nvkm_bar *base)
+{
+	struct nvkm_device *device = base->subdev.device;
+	struct gf100_bar *bar = gf100_bar(base);
+	u32 addr = nvkm_memory_addr(bar->bar[0].inst) >> 12;
+	if (bar->bar2_halve)
+		addr |= 0x40000000;
+	nvkm_wr32(device, 0xb80f48, 0x80000000 | addr);
+}
+
+static void
+tu104_bar_bar1_wait(struct nvkm_bar *bar)
+{
+	struct nvkm_device *device = bar->subdev.device;
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0xb80f50) & 0x00000003))
+			break;
+	);
+}
+
+static void
+tu104_bar_bar1_fini(struct nvkm_bar *bar)
+{
+	nvkm_mask(bar->subdev.device, 0xb80f40, 0x80000000, 0x00000000);
+}
+
+static void
+tu104_bar_bar1_init(struct nvkm_bar *base)
+{
+	struct nvkm_device *device = base->subdev.device;
+	struct gf100_bar *bar = gf100_bar(base);
+	const u32 addr = nvkm_memory_addr(bar->bar[1].inst) >> 12;
+	nvkm_wr32(device, 0xb80f40, 0x80000000 | addr);
+}
+
+static const struct nvkm_bar_func
+tu104_bar = {
+	.dtor = gf100_bar_dtor,
+	.oneinit = gf100_bar_oneinit,
+	.bar1.init = tu104_bar_bar1_init,
+	.bar1.fini = tu104_bar_bar1_fini,
+	.bar1.wait = tu104_bar_bar1_wait,
+	.bar1.vmm = gf100_bar_bar1_vmm,
+	.bar2.init = tu104_bar_bar2_init,
+	.bar2.fini = tu104_bar_bar2_fini,
+	.bar2.wait = tu104_bar_bar2_wait,
+	.bar2.vmm = gf100_bar_bar2_vmm,
+	.flush = g84_bar_flush,
+};
+
+int
+tu104_bar_new(struct nvkm_device *device, int index, struct nvkm_bar **pbar)
+{
+	return gf100_bar_new_(&tu104_bar, device, index, pbar);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
index 50a436926484..3ef505a5c01b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/Kbuild
@@ -13,3 +13,4 @@ nvkm-y += nvkm/subdev/devinit/gf100.o
 nvkm-y += nvkm/subdev/devinit/gm107.o
 nvkm-y += nvkm/subdev/devinit/gm200.o
 nvkm-y += nvkm/subdev/devinit/gv100.o
+nvkm-y += nvkm/subdev/devinit/tu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
index 17235e940ca9..59940dacc2ba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
@@ -105,6 +105,15 @@ pmu_load(struct nv50_devinit *init, u8 type, bool post,
 	return pmu_exec(init, pmu.init_addr_pmu), 0;
 }
 
+void
+gm200_devinit_preos(struct nv50_devinit *init, bool post)
+{
+	/* Optional: Execute PRE_OS application on PMU, which should at
+	 * least take care of fans until a full PMU has been loaded.
+	 */
+	pmu_load(init, 0x01, post, NULL, NULL);
+}
+
 int
 gm200_devinit_post(struct nvkm_devinit *base, bool post)
 {
@@ -156,10 +165,7 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post)
 			return -ETIMEDOUT;
 	}
 
-	/* Optional: Execute PRE_OS application on PMU, which should at
-	 * least take care of fans until a full PMU has been loaded.
-	 */
-	pmu_load(init, 0x01, post, NULL, NULL);
+	gm200_devinit_preos(init, post);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
index 9b9f0dc1e192..72d130bb7f7c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.h
@@ -26,4 +26,5 @@ void gf100_devinit_preinit(struct nvkm_devinit *);
 u64  gm107_devinit_disable(struct nvkm_devinit *);
 
 int gm200_devinit_post(struct nvkm_devinit *, bool);
+void gm200_devinit_preos(struct nv50_devinit *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c
new file mode 100644
index 000000000000..aae87b3fc429
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/tu104.c
@@ -0,0 +1,89 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "nv50.h"
+
+#include <subdev/bios.h>
+#include <subdev/bios/pll.h>
+#include <subdev/clk/pll.h>
+
+static int
+tu104_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
+{
+	struct nvkm_subdev *subdev = &init->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvbios_pll info;
+	int head = type - PLL_VPLL0;
+	int N, fN, M, P;
+	int ret;
+
+	ret = nvbios_pll_parse(device->bios, type, &info);
+	if (ret)
+		return ret;
+
+	ret = gt215_pll_calc(subdev, &info, freq, &N, &fN, &M, &P);
+	if (ret < 0)
+		return ret;
+
+	switch (info.type) {
+	case PLL_VPLL0:
+	case PLL_VPLL1:
+	case PLL_VPLL2:
+	case PLL_VPLL3:
+		nvkm_wr32(device, 0x00ef10 + (head * 0x40), fN << 16);
+		nvkm_wr32(device, 0x00ef04 + (head * 0x40), (P << 16) |
+							    (N <<  8) |
+							    (M <<  0));
+		/*XXX*/
+		nvkm_wr32(device, 0x00ef0c + (head * 0x40), 0x00000900);
+		nvkm_wr32(device, 0x00ef00 + (head * 0x40), 0x02000014);
+		break;
+	default:
+		nvkm_warn(subdev, "%08x/%dKhz unimplemented\n", type, freq);
+		ret = -EINVAL;
+		break;
+	}
+
+	return ret;
+}
+
+static int
+tu104_devinit_post(struct nvkm_devinit *base, bool post)
+{
+	struct nv50_devinit *init = nv50_devinit(base);
+	gm200_devinit_preos(init, post);
+	return 0;
+}
+
+static const struct nvkm_devinit_func
+tu104_devinit = {
+	.init = nv50_devinit_init,
+	.post = tu104_devinit_post,
+	.pll_set = tu104_devinit_pll_set,
+	.disable = gm107_devinit_disable,
+};
+
+int
+tu104_devinit_new(struct nvkm_device *device, int index,
+		struct nvkm_devinit **pinit)
+{
+	return nv50_devinit_new_(&tu104_devinit, device, index, pinit);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
index 45bb46fb0929..794eb1745b2f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/Kbuild
@@ -1,3 +1,4 @@
 nvkm-y += nvkm/subdev/fault/base.o
 nvkm-y += nvkm/subdev/fault/gp100.o
 nvkm-y += nvkm/subdev/fault/gv100.o
+nvkm-y += nvkm/subdev/fault/tu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
index 16ad91c91a7b..4ba1e21e8fda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/base.c
@@ -23,21 +23,19 @@
 
 #include <core/memory.h>
 #include <core/notify.h>
-#include <subdev/bar.h>
-#include <subdev/mmu.h>
 
 static void
 nvkm_fault_ntfy_fini(struct nvkm_event *event, int type, int index)
 {
 	struct nvkm_fault *fault = container_of(event, typeof(*fault), event);
-	fault->func->buffer.fini(fault->buffer[index]);
+	fault->func->buffer.intr(fault->buffer[index], false);
 }
 
 static void
 nvkm_fault_ntfy_init(struct nvkm_event *event, int type, int index)
 {
 	struct nvkm_fault *fault = container_of(event, typeof(*fault), event);
-	fault->func->buffer.init(fault->buffer[index]);
+	fault->func->buffer.intr(fault->buffer[index], true);
 }
 
 static int
@@ -91,7 +89,6 @@ nvkm_fault_oneinit_buffer(struct nvkm_fault *fault, int id)
 {
 	struct nvkm_subdev *subdev = &fault->subdev;
 	struct nvkm_device *device = subdev->device;
-	struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(device);
 	struct nvkm_fault_buffer *buffer;
 	int ret;
 
@@ -99,7 +96,7 @@ nvkm_fault_oneinit_buffer(struct nvkm_fault *fault, int id)
 		return -ENOMEM;
 	buffer->fault = fault;
 	buffer->id = id;
-	buffer->entries = fault->func->buffer.entries(buffer);
+	fault->func->buffer.info(buffer);
 	fault->buffer[id] = buffer;
 
 	nvkm_debug(subdev, "buffer %d: %d entries\n", id, buffer->entries);
@@ -110,12 +107,12 @@ nvkm_fault_oneinit_buffer(struct nvkm_fault *fault, int id)
 	if (ret)
 		return ret;
 
-	ret = nvkm_vmm_get(bar2, 12, nvkm_memory_size(buffer->mem),
-			   &buffer->vma);
-	if (ret)
-		return ret;
+	/* Pin fault buffer in BAR2. */
+	buffer->addr = nvkm_memory_bar2(buffer->mem);
+	if (buffer->addr == ~0ULL)
+		return -EFAULT;
 
-	return nvkm_memory_map(buffer->mem, 0, bar2, buffer->vma, NULL, 0);
+	return 0;
 }
 
 static int
@@ -146,7 +143,6 @@ nvkm_fault_oneinit(struct nvkm_subdev *subdev)
 static void *
 nvkm_fault_dtor(struct nvkm_subdev *subdev)
 {
-	struct nvkm_vmm *bar2 = nvkm_bar_bar2_vmm(subdev->device);
 	struct nvkm_fault *fault = nvkm_fault(subdev);
 	int i;
 
@@ -154,7 +150,6 @@ nvkm_fault_dtor(struct nvkm_subdev *subdev)
 
 	for (i = 0; i < fault->buffer_nr; i++) {
 		if (fault->buffer[i]) {
-			nvkm_vmm_put(bar2, &fault->buffer[i]->vma);
 			nvkm_memory_unref(&fault->buffer[i]->mem);
 			kfree(fault->buffer[i]);
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
index 5e71db2e8d75..8fb96fe614f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gp100.c
@@ -21,7 +21,14 @@
  */
 #include "priv.h"
 
-#include <subdev/mmu.h>
+#include <subdev/mc.h>
+
+static void
+gp100_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	nvkm_mc_intr_mask(device, NVKM_SUBDEV_FAULT, enable);
+}
 
 static void
 gp100_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
@@ -34,15 +41,17 @@ static void
 gp100_fault_buffer_init(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
-	nvkm_wr32(device, 0x002a74, upper_32_bits(buffer->vma->addr));
-	nvkm_wr32(device, 0x002a70, lower_32_bits(buffer->vma->addr));
+	nvkm_wr32(device, 0x002a74, upper_32_bits(buffer->addr));
+	nvkm_wr32(device, 0x002a70, lower_32_bits(buffer->addr));
 	nvkm_mask(device, 0x002a70, 0x00000001, 0x00000001);
 }
 
-static u32
-gp100_fault_buffer_entries(struct nvkm_fault_buffer *buffer)
+static void
+gp100_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 {
-	return nvkm_rd32(buffer->fault->subdev.device, 0x002a78);
+	buffer->entries = nvkm_rd32(buffer->fault->subdev.device, 0x002a78);
+	buffer->get = 0x002a7c;
+	buffer->put = 0x002a80;
 }
 
 static void
@@ -56,9 +65,10 @@ gp100_fault = {
 	.intr = gp100_fault_intr,
 	.buffer.nr = 1,
 	.buffer.entry_size = 32,
-	.buffer.entries = gp100_fault_buffer_entries,
+	.buffer.info = gp100_fault_buffer_info,
 	.buffer.init = gp100_fault_buffer_init,
 	.buffer.fini = gp100_fault_buffer_fini,
+	.buffer.intr = gp100_fault_buffer_intr,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
index 3cd610d7deb5..6fc54e17c935 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/gv100.c
@@ -30,9 +30,8 @@ gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	struct nvkm_memory *mem = buffer->mem;
-	const u32 foff = buffer->id * 0x14;
-	u32 get = nvkm_rd32(device, 0x100e2c + foff);
-	u32 put = nvkm_rd32(device, 0x100e30 + foff);
+	u32 get = nvkm_rd32(device, buffer->get);
+	u32 put = nvkm_rd32(device, buffer->put);
 	if (put == get)
 		return;
 
@@ -51,7 +50,7 @@ gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
 
 		if (++get == buffer->entries)
 			get = 0;
-		nvkm_wr32(device, 0x100e2c + foff, get);
+		nvkm_wr32(device, buffer->get, get);
 
 		info.addr   = ((u64)addrhi << 32) | addrlo;
 		info.inst   = ((u64)insthi << 32) | instlo;
@@ -70,13 +69,21 @@ gv100_fault_buffer_process(struct nvkm_fault_buffer *buffer)
 }
 
 static void
-gv100_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+gv100_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 intr = buffer->id ? 0x08000000 : 0x20000000;
-	const u32 foff = buffer->id * 0x14;
+	if (enable)
+		nvkm_mask(device, 0x100a2c, intr, intr);
+	else
+		nvkm_mask(device, 0x100a34, intr, intr);
+}
 
-	nvkm_mask(device, 0x100a34, intr, intr);
+static void
+gv100_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 foff = buffer->id * 0x14;
 	nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x00000000);
 }
 
@@ -84,23 +91,25 @@ static void
 gv100_fault_buffer_init(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
-	const u32 intr = buffer->id ? 0x08000000 : 0x20000000;
 	const u32 foff = buffer->id * 0x14;
 
 	nvkm_mask(device, 0x100e34 + foff, 0xc0000000, 0x40000000);
-	nvkm_wr32(device, 0x100e28 + foff, upper_32_bits(buffer->vma->addr));
-	nvkm_wr32(device, 0x100e24 + foff, lower_32_bits(buffer->vma->addr));
+	nvkm_wr32(device, 0x100e28 + foff, upper_32_bits(buffer->addr));
+	nvkm_wr32(device, 0x100e24 + foff, lower_32_bits(buffer->addr));
 	nvkm_mask(device, 0x100e34 + foff, 0x80000000, 0x80000000);
-	nvkm_mask(device, 0x100a2c, intr, intr);
 }
 
-static u32
-gv100_fault_buffer_entries(struct nvkm_fault_buffer *buffer)
+static void
+gv100_fault_buffer_info(struct nvkm_fault_buffer *buffer)
 {
 	struct nvkm_device *device = buffer->fault->subdev.device;
 	const u32 foff = buffer->id * 0x14;
+
 	nvkm_mask(device, 0x100e34 + foff, 0x40000000, 0x40000000);
-	return nvkm_rd32(device, 0x100e34 + foff) & 0x000fffff;
+
+	buffer->entries = nvkm_rd32(device, 0x100e34 + foff) & 0x000fffff;
+	buffer->get = 0x100e2c + foff;
+	buffer->put = 0x100e30 + foff;
 }
 
 static int
@@ -166,6 +175,8 @@ static void
 gv100_fault_fini(struct nvkm_fault *fault)
 {
 	nvkm_notify_put(&fault->nrpfb);
+	if (fault->buffer[0])
+		fault->func->buffer.fini(fault->buffer[0]);
 	nvkm_mask(fault->subdev.device, 0x100a34, 0x80000000, 0x80000000);
 }
 
@@ -173,14 +184,15 @@ static void
 gv100_fault_init(struct nvkm_fault *fault)
 {
 	nvkm_mask(fault->subdev.device, 0x100a2c, 0x80000000, 0x80000000);
+	fault->func->buffer.init(fault->buffer[0]);
 	nvkm_notify_get(&fault->nrpfb);
 }
 
-static int
+int
 gv100_fault_oneinit(struct nvkm_fault *fault)
 {
 	return nvkm_notify_init(&fault->buffer[0]->object, &fault->event,
-				gv100_fault_ntfy_nrpfb, false, NULL, 0, 0,
+				gv100_fault_ntfy_nrpfb, true, NULL, 0, 0,
 				&fault->nrpfb);
 }
 
@@ -192,9 +204,10 @@ gv100_fault = {
 	.intr = gv100_fault_intr,
 	.buffer.nr = 2,
 	.buffer.entry_size = 32,
-	.buffer.entries = gv100_fault_buffer_entries,
+	.buffer.info = gv100_fault_buffer_info,
 	.buffer.init = gv100_fault_buffer_init,
 	.buffer.fini = gv100_fault_buffer_fini,
+	.buffer.intr = gv100_fault_buffer_intr,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
index e4d2f5234fd1..8ca8b2876dad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/priv.h
@@ -12,8 +12,10 @@ struct nvkm_fault_buffer {
 	struct nvkm_fault *fault;
 	int id;
 	int entries;
+	u32 get;
+	u32 put;
 	struct nvkm_memory *mem;
-	struct nvkm_vma *vma;
+	u64 addr;
 };
 
 int nvkm_fault_new_(const struct nvkm_fault_func *, struct nvkm_device *,
@@ -27,9 +29,12 @@ struct nvkm_fault_func {
 	struct {
 		int nr;
 		u32 entry_size;
-		u32 (*entries)(struct nvkm_fault_buffer *);
+		void (*info)(struct nvkm_fault_buffer *);
 		void (*init)(struct nvkm_fault_buffer *);
 		void (*fini)(struct nvkm_fault_buffer *);
+		void (*intr)(struct nvkm_fault_buffer *, bool enable);
 	} buffer;
 };
+
+int gv100_fault_oneinit(struct nvkm_fault *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c
new file mode 100644
index 000000000000..9c8a3adf99d7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fault/tu104.c
@@ -0,0 +1,167 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/memory.h>
+#include <subdev/mmu.h>
+#include <engine/fifo.h>
+
+#include <nvif/class.h>
+
+static void
+tu104_fault_buffer_intr(struct nvkm_fault_buffer *buffer, bool enable)
+{
+	/*XXX: Earlier versions of RM touched the old regs on Turing,
+	 *     which don't appear to actually work anymore, but newer
+	 *     versions of RM don't appear to touch anything at all..
+	 */
+}
+
+static void
+tu104_fault_buffer_fini(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 foff = buffer->id * 0x20;
+	nvkm_mask(device, 0xb83010 + foff, 0x80000000, 0x00000000);
+}
+
+static void
+tu104_fault_buffer_init(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 foff = buffer->id * 0x20;
+
+	nvkm_mask(device, 0xb83010 + foff, 0xc0000000, 0x40000000);
+	nvkm_wr32(device, 0xb83004 + foff, upper_32_bits(buffer->addr));
+	nvkm_wr32(device, 0xb83000 + foff, lower_32_bits(buffer->addr));
+	nvkm_mask(device, 0xb83010 + foff, 0x80000000, 0x80000000);
+}
+
+static void
+tu104_fault_buffer_info(struct nvkm_fault_buffer *buffer)
+{
+	struct nvkm_device *device = buffer->fault->subdev.device;
+	const u32 foff = buffer->id * 0x20;
+
+	nvkm_mask(device, 0xb83010 + foff, 0x40000000, 0x40000000);
+
+	buffer->entries = nvkm_rd32(device, 0xb83010 + foff) & 0x000fffff;
+	buffer->get = 0xb83008 + foff;
+	buffer->put = 0xb8300c + foff;
+}
+
+static void
+tu104_fault_intr_fault(struct nvkm_fault *fault)
+{
+	struct nvkm_subdev *subdev = &fault->subdev;
+	struct nvkm_device *device = subdev->device;
+	struct nvkm_fault_data info;
+	const u32 addrlo = nvkm_rd32(device, 0xb83080);
+	const u32 addrhi = nvkm_rd32(device, 0xb83084);
+	const u32  info0 = nvkm_rd32(device, 0xb83088);
+	const u32 insthi = nvkm_rd32(device, 0xb8308c);
+	const u32  info1 = nvkm_rd32(device, 0xb83090);
+
+	info.addr = ((u64)addrhi << 32) | addrlo;
+	info.inst = ((u64)insthi << 32) | (info0 & 0xfffff000);
+	info.time = 0;
+	info.engine = (info0 & 0x000000ff);
+	info.valid  = (info1 & 0x80000000) >> 31;
+	info.gpc    = (info1 & 0x1f000000) >> 24;
+	info.hub    = (info1 & 0x00100000) >> 20;
+	info.access = (info1 & 0x000f0000) >> 16;
+	info.client = (info1 & 0x00007f00) >> 8;
+	info.reason = (info1 & 0x0000001f);
+
+	nvkm_fifo_fault(device->fifo, &info);
+}
+
+static void
+tu104_fault_intr(struct nvkm_fault *fault)
+{
+	struct nvkm_subdev *subdev = &fault->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0xb83094);
+
+	if (stat & 0x80000000) {
+		tu104_fault_intr_fault(fault);
+		nvkm_wr32(device, 0xb83094, 0x80000000);
+		stat &= ~0x80000000;
+	}
+
+	if (stat & 0x00000200) {
+		if (fault->buffer[0]) {
+			nvkm_event_send(&fault->event, 1, 0, NULL, 0);
+			stat &= ~0x00000200;
+		}
+	}
+
+	/*XXX: guess, can't confirm until we get fw... */
+	if (stat & 0x00000100) {
+		if (fault->buffer[1]) {
+			nvkm_event_send(&fault->event, 1, 1, NULL, 0);
+			stat &= ~0x00000100;
+		}
+	}
+
+	if (stat) {
+		nvkm_debug(subdev, "intr %08x\n", stat);
+	}
+}
+
+static void
+tu104_fault_fini(struct nvkm_fault *fault)
+{
+	nvkm_notify_put(&fault->nrpfb);
+	if (fault->buffer[0])
+		fault->func->buffer.fini(fault->buffer[0]);
+	/*XXX: disable priv faults */
+}
+
+static void
+tu104_fault_init(struct nvkm_fault *fault)
+{
+	/*XXX: enable priv faults */
+	fault->func->buffer.init(fault->buffer[0]);
+	nvkm_notify_get(&fault->nrpfb);
+}
+
+static const struct nvkm_fault_func
+tu104_fault = {
+	.oneinit = gv100_fault_oneinit,
+	.init = tu104_fault_init,
+	.fini = tu104_fault_fini,
+	.intr = tu104_fault_intr,
+	.buffer.nr = 2,
+	.buffer.entry_size = 32,
+	.buffer.info = tu104_fault_buffer_info,
+	.buffer.init = tu104_fault_buffer_init,
+	.buffer.fini = tu104_fault_buffer_fini,
+	.buffer.intr = tu104_fault_buffer_intr,
+};
+
+int
+tu104_fault_new(struct nvkm_device *device, int index,
+		struct nvkm_fault **pfault)
+{
+	return nvkm_fault_new_(&tu104_fault, device, index, pfault);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
index 434d2fc5bb1c..b2bb5a3ccb02 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/base.c
@@ -68,10 +68,13 @@ nvkm_fb_bios_memtype(struct nvkm_bios *bios)
 
 	if (nvbios_M0203Em(bios, ramcfg, &ver, &hdr, &M0203E)) {
 		switch (M0203E.type) {
-		case M0203E_TYPE_DDR2 : return NVKM_RAM_TYPE_DDR2;
-		case M0203E_TYPE_DDR3 : return NVKM_RAM_TYPE_DDR3;
-		case M0203E_TYPE_GDDR3: return NVKM_RAM_TYPE_GDDR3;
-		case M0203E_TYPE_GDDR5: return NVKM_RAM_TYPE_GDDR5;
+		case M0203E_TYPE_DDR2  : return NVKM_RAM_TYPE_DDR2;
+		case M0203E_TYPE_DDR3  : return NVKM_RAM_TYPE_DDR3;
+		case M0203E_TYPE_GDDR3 : return NVKM_RAM_TYPE_GDDR3;
+		case M0203E_TYPE_GDDR5 : return NVKM_RAM_TYPE_GDDR5;
+		case M0203E_TYPE_GDDR5X: return NVKM_RAM_TYPE_GDDR5X;
+		case M0203E_TYPE_GDDR6 : return NVKM_RAM_TYPE_GDDR6;
+		case M0203E_TYPE_HBM2  : return NVKM_RAM_TYPE_HBM2;
 		default:
 			nvkm_warn(subdev, "M0203E type %02x\n", M0203E.type);
 			return NVKM_RAM_TYPE_UNKNOWN;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
index 24c7bd505731..b11867f682cb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.c
@@ -184,6 +184,9 @@ nvkm_ram_ctor(const struct nvkm_ram_func *func, struct nvkm_fb *fb,
 		[NVKM_RAM_TYPE_GDDR3  ] = "GDDR3",
 		[NVKM_RAM_TYPE_GDDR4  ] = "GDDR4",
 		[NVKM_RAM_TYPE_GDDR5  ] = "GDDR5",
+		[NVKM_RAM_TYPE_GDDR5X ] = "GDDR5X",
+		[NVKM_RAM_TYPE_GDDR6  ] = "GDDR6",
+		[NVKM_RAM_TYPE_HBM2   ] = "HBM2",
 	};
 	struct nvkm_subdev *subdev = &fb->subdev;
 	int ret;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
index db48a1daca0c..02c4eb28cef4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/nv50.c
@@ -288,6 +288,19 @@ nv50_instobj_addr(struct nvkm_memory *memory)
 	return nvkm_memory_addr(nv50_instobj(memory)->ram);
 }
 
+static u64
+nv50_instobj_bar2(struct nvkm_memory *memory)
+{
+	struct nv50_instobj *iobj = nv50_instobj(memory);
+	u64 addr = ~0ULL;
+	if (nv50_instobj_acquire(&iobj->base.memory)) {
+		iobj->lru.next = NULL; /* Exclude from eviction. */
+		addr = iobj->bar->addr;
+	}
+	nv50_instobj_release(&iobj->base.memory);
+	return addr;
+}
+
 static enum nvkm_memory_target
 nv50_instobj_target(struct nvkm_memory *memory)
 {
@@ -325,8 +338,9 @@ static const struct nvkm_memory_func
 nv50_instobj_func = {
 	.dtor = nv50_instobj_dtor,
 	.target = nv50_instobj_target,
-	.size = nv50_instobj_size,
+	.bar2 = nv50_instobj_bar2,
 	.addr = nv50_instobj_addr,
+	.size = nv50_instobj_size,
 	.boot = nv50_instobj_boot,
 	.acquire = nv50_instobj_acquire,
 	.release = nv50_instobj_release,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
index 2befbe36dc28..f3b06329c338 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/Kbuild
@@ -12,3 +12,4 @@ nvkm-y += nvkm/subdev/mc/gk104.o
 nvkm-y += nvkm/subdev/mc/gk20a.o
 nvkm-y += nvkm/subdev/mc/gp100.o
 nvkm-y += nvkm/subdev/mc/gp10b.o
+nvkm-y += nvkm/subdev/mc/tu104.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 09f669ac6630..0e57ab2a709f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -108,6 +108,9 @@ nvkm_mc_intr(struct nvkm_device *device, bool *handled)
 	if (stat)
 		nvkm_error(&mc->subdev, "intr %08x\n", stat);
 	*handled = intr != 0;
+
+	if (mc->func->intr_hack)
+		mc->func->intr_hack(mc, handled);
 }
 
 static u32
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
index d9e3691d45b7..eb91a4cf452b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/priv.h
@@ -26,6 +26,7 @@ struct nvkm_mc_func {
 	void (*intr_mask)(struct nvkm_mc *, u32 mask, u32 stat);
 	/* retrieve pending interrupt mask (NV_PMC_INTR) */
 	u32 (*intr_stat)(struct nvkm_mc *);
+	void (*intr_hack)(struct nvkm_mc *, bool *handled);
 	const struct nvkm_mc_map *reset;
 	void (*unk260)(struct nvkm_mc *, u32);
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c
new file mode 100644
index 000000000000..b7165bd18999
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/tu104.c
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+static void
+tu104_mc_intr_hack(struct nvkm_mc *mc, bool *handled)
+{
+	struct nvkm_device *device = mc->subdev.device;
+	u32 stat = nvkm_rd32(device, 0xb81010);
+	if (stat & 0x00000050) {
+		struct nvkm_subdev *subdev =
+			nvkm_device_subdev(device, NVKM_SUBDEV_FAULT);
+		nvkm_wr32(device, 0xb81010, stat & 0x00000050);
+		if (subdev)
+			nvkm_subdev_intr(subdev);
+		*handled = true;
+	}
+}
+
+static const struct nvkm_mc_func
+tu104_mc = {
+	.init = nv50_mc_init,
+	.intr = gp100_mc_intr,
+	.intr_unarm = gp100_mc_intr_unarm,
+	.intr_rearm = gp100_mc_intr_rearm,
+	.intr_mask = gp100_mc_intr_mask,
+	.intr_stat = gf100_mc_intr_stat,
+	.intr_hack = tu104_mc_intr_hack,
+	.reset = gk104_mc_reset,
+};
+
+int
+tu104_mc_new(struct nvkm_device *device, int index, struct nvkm_mc **pmc)
+{
+	return gp100_mc_new_(&tu104_mc, device, index, pmc);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
index 58a24e3a0598..8966180b36cc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/Kbuild
@@ -13,6 +13,7 @@ nvkm-y += nvkm/subdev/mmu/gm20b.o
 nvkm-y += nvkm/subdev/mmu/gp100.o
 nvkm-y += nvkm/subdev/mmu/gp10b.o
 nvkm-y += nvkm/subdev/mmu/gv100.o
+nvkm-y += nvkm/subdev/mmu/tu104.o
 
 nvkm-y += nvkm/subdev/mmu/mem.o
 nvkm-y += nvkm/subdev/mmu/memnv04.o
@@ -33,6 +34,7 @@ nvkm-y += nvkm/subdev/mmu/vmmgm20b.o
 nvkm-y += nvkm/subdev/mmu/vmmgp100.o
 nvkm-y += nvkm/subdev/mmu/vmmgp10b.o
 nvkm-y += nvkm/subdev/mmu/vmmgv100.o
+nvkm-y += nvkm/subdev/mmu/vmmtu104.o
 
 nvkm-y += nvkm/subdev/mmu/umem.o
 nvkm-y += nvkm/subdev/mmu/ummu.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c
new file mode 100644
index 000000000000..8e6f4096170d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/tu104.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "mem.h"
+#include "vmm.h"
+
+#include <core/option.h>
+
+#include <nvif/class.h>
+
+static const struct nvkm_mmu_func
+tu104_mmu = {
+	.dma_bits = 47,
+	.mmu = {{ -1, -1, NVIF_CLASS_MMU_GF100}},
+	.mem = {{ -1,  0, NVIF_CLASS_MEM_GF100}, gf100_mem_new, gf100_mem_map },
+	.vmm = {{ -1,  0, NVIF_CLASS_VMM_GP100}, tu104_vmm_new },
+	.kind = gm200_mmu_kind,
+	.kind_sys = true,
+};
+
+int
+tu104_mmu_new(struct nvkm_device *device, int index, struct nvkm_mmu **pmmu)
+{
+	return nvkm_mmu_new_(&tu104_mmu, device, index, pmmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
index 37b201b95f15..6889076097ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/uvmm.c
@@ -134,23 +134,10 @@ nvkm_uvmm_mthd_map(struct nvkm_uvmm *uvmm, void *argv, u32 argc)
 			goto fail;
 		}
 
-		if (vma->addr != addr) {
-			const u64 tail = vma->size + vma->addr - addr;
-			if (ret = -ENOMEM, !(vma = nvkm_vma_tail(vma, tail)))
-				goto fail;
-			vma->part = true;
-			nvkm_vmm_node_insert(vmm, vma);
-		}
-
-		if (vma->size != size) {
-			const u64 tail = vma->size - size;
-			struct nvkm_vma *tmp;
-			if (ret = -ENOMEM, !(tmp = nvkm_vma_tail(vma, tail))) {
-				nvkm_vmm_unmap_region(vmm, vma);
-				goto fail;
-			}
-			tmp->part = true;
-			nvkm_vmm_node_insert(vmm, tmp);
+		vma = nvkm_vmm_node_split(vmm, vma, addr, size);
+		if (!vma) {
+			ret = -ENOMEM;
+			goto fail;
 		}
 	}
 	vma->busy = true;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
index 7459def78d50..6b87fff014b3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.c
@@ -767,6 +767,20 @@ nvkm_vma_tail(struct nvkm_vma *vma, u64 tail)
 	return new;
 }
 
+static inline void
+nvkm_vmm_free_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
+{
+	rb_erase(&vma->tree, &vmm->free);
+}
+
+static inline void
+nvkm_vmm_free_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
+{
+	nvkm_vmm_free_remove(vmm, vma);
+	list_del(&vma->head);
+	kfree(vma);
+}
+
 static void
 nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
@@ -795,7 +809,21 @@ nvkm_vmm_free_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 	rb_insert_color(&vma->tree, &vmm->free);
 }
 
-void
+static inline void
+nvkm_vmm_node_remove(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
+{
+	rb_erase(&vma->tree, &vmm->root);
+}
+
+static inline void
+nvkm_vmm_node_delete(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
+{
+	nvkm_vmm_node_remove(vmm, vma);
+	list_del(&vma->head);
+	kfree(vma);
+}
+
+static void
 nvkm_vmm_node_insert(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
 	struct rb_node **ptr = &vmm->root.rb_node;
@@ -834,6 +862,78 @@ nvkm_vmm_node_search(struct nvkm_vmm *vmm, u64 addr)
 	return NULL;
 }
 
+#define node(root, dir) (((root)->head.dir == &vmm->list) ? NULL :             \
+	list_entry((root)->head.dir, struct nvkm_vma, head))
+
+static struct nvkm_vma *
+nvkm_vmm_node_merge(struct nvkm_vmm *vmm, struct nvkm_vma *prev,
+		    struct nvkm_vma *vma, struct nvkm_vma *next, u64 size)
+{
+	if (next) {
+		if (vma->size == size) {
+			vma->size += next->size;
+			nvkm_vmm_node_delete(vmm, next);
+			if (prev) {
+				prev->size += vma->size;
+				nvkm_vmm_node_delete(vmm, vma);
+				return prev;
+			}
+			return vma;
+		}
+		BUG_ON(prev);
+
+		nvkm_vmm_node_remove(vmm, next);
+		vma->size -= size;
+		next->addr -= size;
+		next->size += size;
+		nvkm_vmm_node_insert(vmm, next);
+		return next;
+	}
+
+	if (prev) {
+		if (vma->size != size) {
+			nvkm_vmm_node_remove(vmm, vma);
+			prev->size += size;
+			vma->addr += size;
+			vma->size -= size;
+			nvkm_vmm_node_insert(vmm, vma);
+		} else {
+			prev->size += vma->size;
+			nvkm_vmm_node_delete(vmm, vma);
+		}
+		return prev;
+	}
+
+	return vma;
+}
+
+struct nvkm_vma *
+nvkm_vmm_node_split(struct nvkm_vmm *vmm,
+		    struct nvkm_vma *vma, u64 addr, u64 size)
+{
+	struct nvkm_vma *prev = NULL;
+
+	if (vma->addr != addr) {
+		prev = vma;
+		if (!(vma = nvkm_vma_tail(vma, vma->size + vma->addr - addr)))
+			return NULL;
+		vma->part = true;
+		nvkm_vmm_node_insert(vmm, vma);
+	}
+
+	if (vma->size != size) {
+		struct nvkm_vma *tmp;
+		if (!(tmp = nvkm_vma_tail(vma, vma->size - size))) {
+			nvkm_vmm_node_merge(vmm, prev, vma, NULL, vma->size);
+			return NULL;
+		}
+		tmp->part = true;
+		nvkm_vmm_node_insert(vmm, tmp);
+	}
+
+	return vma;
+}
+
 static void
 nvkm_vmm_dtor(struct nvkm_vmm *vmm)
 {
@@ -954,37 +1054,20 @@ nvkm_vmm_new_(const struct nvkm_vmm_func *func, struct nvkm_mmu *mmu,
 	return nvkm_vmm_ctor(func, mmu, hdr, addr, size, key, name, *pvmm);
 }
 
-#define node(root, dir) ((root)->head.dir == &vmm->list) ? NULL :              \
-	list_entry((root)->head.dir, struct nvkm_vma, head)
-
 void
 nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 {
-	struct nvkm_vma *next;
+	struct nvkm_vma *next = node(vma, next);
+	struct nvkm_vma *prev = NULL;
 
 	nvkm_memory_tags_put(vma->memory, vmm->mmu->subdev.device, &vma->tags);
 	nvkm_memory_unref(&vma->memory);
 
-	if (vma->part) {
-		struct nvkm_vma *prev = node(vma, prev);
-		if (!prev->memory) {
-			prev->size += vma->size;
-			rb_erase(&vma->tree, &vmm->root);
-			list_del(&vma->head);
-			kfree(vma);
-			vma = prev;
-		}
-	}
-
-	next = node(vma, next);
-	if (next && next->part) {
-		if (!next->memory) {
-			vma->size += next->size;
-			rb_erase(&next->tree, &vmm->root);
-			list_del(&next->head);
-			kfree(next);
-		}
-	}
+	if (!vma->part || ((prev = node(vma, prev)), prev->memory))
+		prev = NULL;
+	if (!next->part || next->memory)
+		next = NULL;
+	nvkm_vmm_node_merge(vmm, prev, vma, next, vma->size);
 }
 
 void
@@ -1163,18 +1246,14 @@ nvkm_vmm_put_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 	struct nvkm_vma *prev, *next;
 
 	if ((prev = node(vma, prev)) && !prev->used) {
-		rb_erase(&prev->tree, &vmm->free);
-		list_del(&prev->head);
 		vma->addr  = prev->addr;
 		vma->size += prev->size;
-		kfree(prev);
+		nvkm_vmm_free_delete(vmm, prev);
 	}
 
 	if ((next = node(vma, next)) && !next->used) {
-		rb_erase(&next->tree, &vmm->free);
-		list_del(&next->head);
 		vma->size += next->size;
-		kfree(next);
+		nvkm_vmm_free_delete(vmm, next);
 	}
 
 	nvkm_vmm_free_insert(vmm, vma);
@@ -1250,7 +1329,7 @@ nvkm_vmm_put_locked(struct nvkm_vmm *vmm, struct nvkm_vma *vma)
 	}
 
 	/* Remove VMA from the list of allocated nodes. */
-	rb_erase(&vma->tree, &vmm->root);
+	nvkm_vmm_node_remove(vmm, vma);
 
 	/* Merge VMA back into the free list. */
 	vma->page = NVKM_VMA_PAGE_NONE;
@@ -1357,7 +1436,7 @@ nvkm_vmm_get_locked(struct nvkm_vmm *vmm, bool getref, bool mapref, bool sparse,
 			tail = ALIGN_DOWN(tail, vmm->func->page_block);
 
 		if (addr <= tail && tail - addr >= size) {
-			rb_erase(&this->tree, &vmm->free);
+			nvkm_vmm_free_remove(vmm, this);
 			vma = this;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
index 1a3b0a3724ca..42ad326521a3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmm.h
@@ -157,6 +157,8 @@ int nvkm_vmm_ctor(const struct nvkm_vmm_func *, struct nvkm_mmu *,
 		  u32 pd_header, u64 addr, u64 size, struct lock_class_key *,
 		  const char *name, struct nvkm_vmm *);
 struct nvkm_vma *nvkm_vmm_node_search(struct nvkm_vmm *, u64 addr);
+struct nvkm_vma *nvkm_vmm_node_split(struct nvkm_vmm *, struct nvkm_vma *,
+				     u64 addr, u64 size);
 int nvkm_vmm_get_locked(struct nvkm_vmm *, bool getref, bool mapref,
 			bool sparse, u8 page, u8 align, u64 size,
 			struct nvkm_vma **pvma);
@@ -165,7 +167,6 @@ void nvkm_vmm_unmap_locked(struct nvkm_vmm *, struct nvkm_vma *);
 void nvkm_vmm_unmap_region(struct nvkm_vmm *vmm, struct nvkm_vma *vma);
 
 struct nvkm_vma *nvkm_vma_tail(struct nvkm_vma *, u64 tail);
-void nvkm_vmm_node_insert(struct nvkm_vmm *, struct nvkm_vma *);
 
 int nv04_vmm_new_(const struct nvkm_vmm_func *, struct nvkm_mmu *, u32,
 		  u64, u64, void *, u32, struct lock_class_key *,
@@ -200,6 +201,8 @@ int gp100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
 int gp100_vmm_valid(struct nvkm_vmm *, void *, u32, struct nvkm_vmm_map *);
 void gp100_vmm_flush(struct nvkm_vmm *, int);
 
+int gv100_vmm_join(struct nvkm_vmm *, struct nvkm_memory *);
+
 int nv04_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		 struct lock_class_key *, const char *, struct nvkm_vmm **);
 int nv41_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
@@ -239,6 +242,9 @@ int gp10b_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 int gv100_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
 		  struct lock_class_key *, const char *,
 		  struct nvkm_vmm **);
+int tu104_vmm_new(struct nvkm_mmu *, u64, u64, void *, u32,
+		  struct lock_class_key *, const char *,
+		  struct nvkm_vmm **);
 
 #define VMM_PRINT(l,v,p,f,a...) do {                                           \
 	struct nvkm_vmm *_vmm = (v);                                           \
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c
new file mode 100644
index 000000000000..adaadd92110f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/vmmtu104.c
@@ -0,0 +1,77 @@
+/*
+ * Copyright 2018 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#include "vmm.h"
+
+#include <subdev/timer.h>
+
+static void
+tu104_vmm_flush(struct nvkm_vmm *vmm, int depth)
+{
+	struct nvkm_subdev *subdev = &vmm->mmu->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 type = depth << 24; /*XXX: not confirmed */
+
+	type = 0x00000001; /* PAGE_ALL */
+	if (atomic_read(&vmm->engref[NVKM_SUBDEV_BAR]))
+		type |= 0x00000004; /* HUB_ONLY */
+
+	mutex_lock(&subdev->mutex);
+
+	nvkm_wr32(device, 0xb830a0, vmm->pd->pt[0]->addr >> 8);
+	nvkm_wr32(device, 0xb830a4, 0x00000000);
+	nvkm_wr32(device, 0x100e68, 0x00000000);
+	nvkm_wr32(device, 0xb830b0, 0x80000000 | type);
+
+	nvkm_msec(device, 2000,
+		if (!(nvkm_rd32(device, 0xb830b0) & 0x80000000))
+			break;
+	);
+
+	mutex_unlock(&subdev->mutex);
+}
+
+static const struct nvkm_vmm_func
+tu104_vmm = {
+	.join = gv100_vmm_join,
+	.part = gf100_vmm_part,
+	.aper = gf100_vmm_aper,
+	.valid = gp100_vmm_valid,
+	.flush = tu104_vmm_flush,
+	.page = {
+		{ 47, &gp100_vmm_desc_16[4], NVKM_VMM_PAGE_Sxxx },
+		{ 38, &gp100_vmm_desc_16[3], NVKM_VMM_PAGE_Sxxx },
+		{ 29, &gp100_vmm_desc_16[2], NVKM_VMM_PAGE_Sxxx },
+		{ 21, &gp100_vmm_desc_16[1], NVKM_VMM_PAGE_SVxC },
+		{ 16, &gp100_vmm_desc_16[0], NVKM_VMM_PAGE_SVxC },
+		{ 12, &gp100_vmm_desc_12[0], NVKM_VMM_PAGE_SVHx },
+		{}
+	}
+};
+
+int
+tu104_vmm_new(struct nvkm_mmu *mmu, u64 addr, u64 size,
+	      void *argv, u32 argc, struct lock_class_key *key,
+	      const char *name, struct nvkm_vmm **pvmm)
+{
+	return nv04_vmm_new_(&tu104_vmm, mmu, 0, addr, size,
+			     argv, argc, key, name, pvmm);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c
index 1f7a3c1a7f50..84a2f243ed9b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gp102.c
@@ -59,10 +59,10 @@ gp102_run_secure_scrub(struct nvkm_secboot *sb)
 
 	nvkm_debug(subdev, "running VPR scrubber binary on NVDEC...\n");
 
-	engine = nvkm_engine_ref(&device->nvdec->engine);
+	engine = nvkm_engine_ref(&device->nvdec[0]->engine);
 	if (IS_ERR(engine))
 		return PTR_ERR(engine);
-	falcon = device->nvdec->falcon;
+	falcon = device->nvdec[0]->falcon;
 
 	nvkm_falcon_get(falcon, &sb->subdev);
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 3695cde669f8..07914e36939e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -132,11 +132,12 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 			duty = nvkm_therm_update_linear(therm);
 			break;
 		case NVBIOS_THERM_FAN_OTHER:
-			if (therm->cstate)
+			if (therm->cstate) {
 				duty = therm->cstate;
-			else
+				poll = false;
+			} else {
 				duty = nvkm_therm_update_linear_fallback(therm);
-			poll = false;
+			}
 			break;
 		}
 		immd = false;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
index 36de23d12ae4..dd922033628c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/timer/base.c
@@ -23,6 +23,42 @@
  */
 #include "priv.h"
 
+s64
+nvkm_timer_wait_test(struct nvkm_timer_wait *wait)
+{
+	struct nvkm_subdev *subdev = &wait->tmr->subdev;
+	u64 time = nvkm_timer_read(wait->tmr);
+
+	if (wait->reads == 0) {
+		wait->time0 = time;
+		wait->time1 = time;
+	}
+
+	if (wait->time1 == time) {
+		if (wait->reads++ == 16) {
+			nvkm_fatal(subdev, "stalled at %016llx\n", time);
+			return -ETIMEDOUT;
+		}
+	} else {
+		wait->time1 = time;
+		wait->reads = 1;
+	}
+
+	if (wait->time1 - wait->time0 > wait->limit)
+		return -ETIMEDOUT;
+
+	return wait->time1 - wait->time0;
+}
+
+void
+nvkm_timer_wait_init(struct nvkm_device *device, u64 nsec,
+		     struct nvkm_timer_wait *wait)
+{
+	wait->tmr = device->timer;
+	wait->limit = nsec;
+	wait->reads = 0;
+}
+
 u64
 nvkm_timer_read(struct nvkm_timer *tmr)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
index 4f1f3e890650..39081eadfd84 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/gk104.c
@@ -86,7 +86,7 @@ gk104_top_oneinit(struct nvkm_top *top)
 		case 0x0000000d: A_(SEC2  ); break;
 		case 0x0000000e: B_(NVENC ); break;
 		case 0x0000000f: A_(NVENC1); break;
-		case 0x00000010: A_(NVDEC ); break;
+		case 0x00000010: B_(NVDEC ); break;
 		case 0x00000013: B_(CE    ); break;
 			break;
 		default:
diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
index 1f8161b041be..465120809eb3 100644
--- a/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
+++ b/drivers/gpu/drm/omapdrm/displays/panel-dpi.c
@@ -177,6 +177,7 @@ static int panel_dpi_probe(struct platform_device *pdev)
 	dssdev->type = OMAP_DISPLAY_TYPE_DPI;
 	dssdev->owner = THIS_MODULE;
 	dssdev->of_ports = BIT(0);
+	drm_bus_flags_from_videomode(&ddata->vm, &dssdev->bus_flags);
 
 	omapdss_display_init(dssdev);
 	omapdss_device_register(dssdev);
diff --git a/drivers/gpu/drm/omapdrm/dss/dsi.c b/drivers/gpu/drm/omapdrm/dss/dsi.c
index 0a485c5b982e..00a9c2ab9e6c 100644
--- a/drivers/gpu/drm/omapdrm/dss/dsi.c
+++ b/drivers/gpu/drm/omapdrm/dss/dsi.c
@@ -5418,9 +5418,15 @@ static int dsi_probe(struct platform_device *pdev)
 		dsi->num_lanes_supported = 3;
 	}
 
+	r = of_platform_populate(dev->of_node, NULL, NULL, dev);
+	if (r) {
+		DSSERR("Failed to populate DSI child devices: %d\n", r);
+		goto err_pm_disable;
+	}
+
 	r = dsi_init_output(dsi);
 	if (r)
-		goto err_pm_disable;
+		goto err_of_depopulate;
 
 	r = dsi_probe_of(dsi);
 	if (r) {
@@ -5428,22 +5434,16 @@ static int dsi_probe(struct platform_device *pdev)
 		goto err_uninit_output;
 	}
 
-	r = of_platform_populate(dev->of_node, NULL, NULL, dev);
-	if (r) {
-		DSSERR("Failed to populate DSI child devices: %d\n", r);
-		goto err_uninit_output;
-	}
-
 	r = component_add(&pdev->dev, &dsi_component_ops);
 	if (r)
-		goto err_of_depopulate;
+		goto err_uninit_output;
 
 	return 0;
 
-err_of_depopulate:
-	of_platform_depopulate(dev);
 err_uninit_output:
 	dsi_uninit_output(dsi);
+err_of_depopulate:
+	of_platform_depopulate(dev);
 err_pm_disable:
 	pm_runtime_disable(dev);
 	return r;
diff --git a/drivers/gpu/drm/omapdrm/dss/omapdss.h b/drivers/gpu/drm/omapdrm/dss/omapdss.h
index 1f698a95a94a..33e15cb77efa 100644
--- a/drivers/gpu/drm/omapdrm/dss/omapdss.h
+++ b/drivers/gpu/drm/omapdrm/dss/omapdss.h
@@ -432,7 +432,7 @@ struct omap_dss_device {
 	const struct omap_dss_driver *driver;
 	const struct omap_dss_device_ops *ops;
 	unsigned long ops_flags;
-	unsigned long bus_flags;
+	u32 bus_flags;
 
 	/* helper variable for driver suspend/resume */
 	bool activate_after_resume;
diff --git a/drivers/gpu/drm/omapdrm/omap_encoder.c b/drivers/gpu/drm/omapdrm/omap_encoder.c
index 452e625f6ce3..933ebc9f9faa 100644
--- a/drivers/gpu/drm/omapdrm/omap_encoder.c
+++ b/drivers/gpu/drm/omapdrm/omap_encoder.c
@@ -52,17 +52,44 @@ static const struct drm_encoder_funcs omap_encoder_funcs = {
 	.destroy = omap_encoder_destroy,
 };
 
+static void omap_encoder_hdmi_mode_set(struct drm_encoder *encoder,
+				       struct drm_display_mode *adjusted_mode)
+{
+	struct drm_device *dev = encoder->dev;
+	struct omap_encoder *omap_encoder = to_omap_encoder(encoder);
+	struct omap_dss_device *dssdev = omap_encoder->output;
+	struct drm_connector *connector;
+	bool hdmi_mode;
+
+	hdmi_mode = false;
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		if (connector->encoder == encoder) {
+			hdmi_mode = omap_connector_get_hdmi_mode(connector);
+			break;
+		}
+	}
+
+	if (dssdev->ops->hdmi.set_hdmi_mode)
+		dssdev->ops->hdmi.set_hdmi_mode(dssdev, hdmi_mode);
+
+	if (hdmi_mode && dssdev->ops->hdmi.set_infoframe) {
+		struct hdmi_avi_infoframe avi;
+		int r;
+
+		r = drm_hdmi_avi_infoframe_from_display_mode(&avi, adjusted_mode,
+							     false);
+		if (r == 0)
+			dssdev->ops->hdmi.set_infoframe(dssdev, &avi);
+	}
+}
+
 static void omap_encoder_mode_set(struct drm_encoder *encoder,
 				  struct drm_display_mode *mode,
 				  struct drm_display_mode *adjusted_mode)
 {
-	struct drm_device *dev = encoder->dev;
 	struct omap_encoder *omap_encoder = to_omap_encoder(encoder);
-	struct drm_connector *connector;
 	struct omap_dss_device *dssdev;
 	struct videomode vm = { 0 };
-	bool hdmi_mode;
-	int r;
 
 	drm_display_mode_to_videomode(adjusted_mode, &vm);
 
@@ -112,27 +139,8 @@ static void omap_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* Set the HDMI mode and HDMI infoframe if applicable. */
-	hdmi_mode = false;
-	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
-		if (connector->encoder == encoder) {
-			hdmi_mode = omap_connector_get_hdmi_mode(connector);
-			break;
-		}
-	}
-
-	dssdev = omap_encoder->output;
-
-	if (dssdev->ops->hdmi.set_hdmi_mode)
-		dssdev->ops->hdmi.set_hdmi_mode(dssdev, hdmi_mode);
-
-	if (hdmi_mode && dssdev->ops->hdmi.set_infoframe) {
-		struct hdmi_avi_infoframe avi;
-
-		r = drm_hdmi_avi_infoframe_from_display_mode(&avi, adjusted_mode,
-							     false);
-		if (r == 0)
-			dssdev->ops->hdmi.set_infoframe(dssdev, &avi);
-	}
+	if (omap_encoder->output->output_type == OMAP_DISPLAY_TYPE_HDMI)
+		omap_encoder_hdmi_mode_set(encoder, adjusted_mode);
 }
 
 static void omap_encoder_disable(struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/qxl/qxl_drv.c b/drivers/gpu/drm/qxl/qxl_drv.c
index 13c8a662f9b4..ccb090f3ab30 100644
--- a/drivers/gpu/drm/qxl/qxl_drv.c
+++ b/drivers/gpu/drm/qxl/qxl_drv.c
@@ -250,14 +250,10 @@ static struct drm_driver qxl_driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = qxl_debugfs_init,
 #endif
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = qxl_gem_prime_pin,
 	.gem_prime_unpin = qxl_gem_prime_unpin,
-	.gem_prime_get_sg_table = qxl_gem_prime_get_sg_table,
-	.gem_prime_import_sg_table = qxl_gem_prime_import_sg_table,
 	.gem_prime_vmap = qxl_gem_prime_vmap,
 	.gem_prime_vunmap = qxl_gem_prime_vunmap,
 	.gem_prime_mmap = qxl_gem_prime_mmap,
diff --git a/drivers/gpu/drm/qxl/qxl_ioctl.c b/drivers/gpu/drm/qxl/qxl_ioctl.c
index 6e828158bcb0..d410e2925162 100644
--- a/drivers/gpu/drm/qxl/qxl_ioctl.c
+++ b/drivers/gpu/drm/qxl/qxl_ioctl.c
@@ -163,8 +163,7 @@ static int qxl_process_single_command(struct qxl_device *qdev,
 	if (cmd->command_size > PAGE_SIZE - sizeof(union qxl_release_info))
 		return -EINVAL;
 
-	if (!access_ok(VERIFY_READ,
-		       u64_to_user_ptr(cmd->command),
+	if (!access_ok(u64_to_user_ptr(cmd->command),
 		       cmd->command_size))
 		return -EFAULT;
 
diff --git a/drivers/gpu/drm/qxl/qxl_prime.c b/drivers/gpu/drm/qxl/qxl_prime.c
index a55dece118b2..df65d3c1a7b8 100644
--- a/drivers/gpu/drm/qxl/qxl_prime.c
+++ b/drivers/gpu/drm/qxl/qxl_prime.c
@@ -38,20 +38,6 @@ void qxl_gem_prime_unpin(struct drm_gem_object *obj)
 	WARN_ONCE(1, "not implemented");
 }
 
-struct sg_table *qxl_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENOSYS);
-}
-
-struct drm_gem_object *qxl_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *table)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENOSYS);
-}
-
 void *qxl_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	WARN_ONCE(1, "not implemented");
diff --git a/drivers/gpu/drm/qxl/qxl_release.c b/drivers/gpu/drm/qxl/qxl_release.c
index 0a693fede05e..30f85f0130cb 100644
--- a/drivers/gpu/drm/qxl/qxl_release.c
+++ b/drivers/gpu/drm/qxl/qxl_release.c
@@ -217,7 +217,7 @@ int qxl_release_list_add(struct qxl_release *release, struct qxl_bo *bo)
 
 	qxl_bo_ref(bo);
 	entry->tv.bo = &bo->tbo;
-	entry->tv.shared = false;
+	entry->tv.num_shared = 0;
 	list_add_tail(&entry->tv.head, &release->bos);
 	return 0;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 1ae31dbc61c6..f43305329939 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -178,7 +178,7 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		}
 
 		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
-		p->relocs[i].tv.shared = !r->write_domain;
+		p->relocs[i].tv.num_shared = !r->write_domain;
 
 		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
 				      priority);
@@ -253,7 +253,7 @@ static int radeon_cs_sync_rings(struct radeon_cs_parser *p)
 
 		resv = reloc->robj->tbo.resv;
 		r = radeon_sync_resv(p->rdev, &p->ib.sync, resv,
-				     reloc->tv.shared);
+				     reloc->tv.num_shared);
 		if (r)
 			return r;
 	}
diff --git a/drivers/gpu/drm/radeon/radeon_gem.c b/drivers/gpu/drm/radeon/radeon_gem.c
index 27d8e7dd2d06..44617dec8183 100644
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@@ -552,7 +552,7 @@ static void radeon_gem_va_update_vm(struct radeon_device *rdev,
 	INIT_LIST_HEAD(&list);
 
 	tv.bo = &bo_va->bo->tbo;
-	tv.shared = true;
+	tv.num_shared = 1;
 	list_add(&tv.head, &list);
 
 	vm_bos = radeon_vm_get_bos(rdev, bo_va->vm, &list);
diff --git a/drivers/gpu/drm/radeon/radeon_mn.c b/drivers/gpu/drm/radeon/radeon_mn.c
index f8b35df44c60..b3019505065a 100644
--- a/drivers/gpu/drm/radeon/radeon_mn.c
+++ b/drivers/gpu/drm/radeon/radeon_mn.c
@@ -119,40 +119,38 @@ static void radeon_mn_release(struct mmu_notifier *mn,
  * unmap them by move them into system domain again.
  */
 static int radeon_mn_invalidate_range_start(struct mmu_notifier *mn,
-					     struct mm_struct *mm,
-					     unsigned long start,
-					     unsigned long end,
-					     bool blockable)
+				const struct mmu_notifier_range *range)
 {
 	struct radeon_mn *rmn = container_of(mn, struct radeon_mn, mn);
 	struct ttm_operation_ctx ctx = { false, false };
 	struct interval_tree_node *it;
+	unsigned long end;
 	int ret = 0;
 
 	/* notification is exclusive, but interval is inclusive */
-	end -= 1;
+	end = range->end - 1;
 
 	/* TODO we should be able to split locking for interval tree and
 	 * the tear down.
 	 */
-	if (blockable)
+	if (range->blockable)
 		mutex_lock(&rmn->lock);
 	else if (!mutex_trylock(&rmn->lock))
 		return -EAGAIN;
 
-	it = interval_tree_iter_first(&rmn->objects, start, end);
+	it = interval_tree_iter_first(&rmn->objects, range->start, end);
 	while (it) {
 		struct radeon_mn_node *node;
 		struct radeon_bo *bo;
 		long r;
 
-		if (!blockable) {
+		if (!range->blockable) {
 			ret = -EAGAIN;
 			goto out_unlock;
 		}
 
 		node = container_of(it, struct radeon_mn_node, it);
-		it = interval_tree_iter_next(it, start, end);
+		it = interval_tree_iter_next(it, range->start, end);
 
 		list_for_each_entry(bo, &node->bos, mn_list) {
 
diff --git a/drivers/gpu/drm/radeon/radeon_vm.c b/drivers/gpu/drm/radeon/radeon_vm.c
index a3d2ca07a058..0d374211661c 100644
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@@ -142,7 +142,7 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 	list[0].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 	list[0].tv.bo = &vm->page_directory->tbo;
-	list[0].tv.shared = true;
+	list[0].tv.num_shared = 1;
 	list[0].tiling_flags = 0;
 	list_add(&list[0].tv.head, head);
 
@@ -154,7 +154,7 @@ struct radeon_bo_list *radeon_vm_get_bos(struct radeon_device *rdev,
 		list[idx].preferred_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].allowed_domains = RADEON_GEM_DOMAIN_VRAM;
 		list[idx].tv.bo = &list[idx].robj->tbo;
-		list[idx].tv.shared = true;
+		list[idx].tv.num_shared = 1;
 		list[idx].tiling_flags = 0;
 		list_add(&list[idx++].tv.head, head);
 	}
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_group.c b/drivers/gpu/drm/rcar-du/rcar_du_group.c
index d85f0a1c1581..cebf313c6e1f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_group.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_group.c
@@ -202,10 +202,25 @@ void rcar_du_group_put(struct rcar_du_group *rgrp)
 
 static void __rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
 {
-	struct rcar_du_crtc *rcrtc = &rgrp->dev->crtcs[rgrp->index * 2];
+	struct rcar_du_device *rcdu = rgrp->dev;
+
+	/*
+	 * Group start/stop is controlled by the DRES and DEN bits of DSYSR0
+	 * for the first group and DSYSR2 for the second group. On most DU
+	 * instances, this maps to the first CRTC of the group, and we can just
+	 * use rcar_du_crtc_dsysr_clr_set() to access the correct DSYSR. On
+	 * M3-N, however, DU2 doesn't exist, but DSYSR2 does. We thus need to
+	 * access the register directly using group read/write.
+	 */
+	if (rcdu->info->channels_mask & BIT(rgrp->index * 2)) {
+		struct rcar_du_crtc *rcrtc = &rgrp->dev->crtcs[rgrp->index * 2];
 
-	rcar_du_crtc_dsysr_clr_set(rcrtc, DSYSR_DRES | DSYSR_DEN,
-				   start ? DSYSR_DEN : DSYSR_DRES);
+		rcar_du_crtc_dsysr_clr_set(rcrtc, DSYSR_DRES | DSYSR_DEN,
+					   start ? DSYSR_DEN : DSYSR_DRES);
+	} else {
+		rcar_du_group_write(rgrp, DSYSR,
+				    start ? DSYSR_DEN : DSYSR_DRES);
+	}
 }
 
 void rcar_du_group_start_stop(struct rcar_du_group *rgrp, bool start)
diff --git a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
index 37f9a3b651ab..be6c2573039a 100644
--- a/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
+++ b/drivers/gpu/drm/rockchip/rockchip_drm_drv.c
@@ -448,11 +448,6 @@ static int rockchip_drm_platform_remove(struct platform_device *pdev)
 	return 0;
 }
 
-static void rockchip_drm_platform_shutdown(struct platform_device *pdev)
-{
-	rockchip_drm_platform_remove(pdev);
-}
-
 static const struct of_device_id rockchip_drm_dt_ids[] = {
 	{ .compatible = "rockchip,display-subsystem", },
 	{ /* sentinel */ },
@@ -462,7 +457,6 @@ MODULE_DEVICE_TABLE(of, rockchip_drm_dt_ids);
 static struct platform_driver rockchip_drm_platform_driver = {
 	.probe = rockchip_drm_platform_probe,
 	.remove = rockchip_drm_platform_remove,
-	.shutdown = rockchip_drm_platform_shutdown,
 	.driver = {
 		.name = "rockchip-drm",
 		.of_match_table = rockchip_drm_dt_ids,
diff --git a/drivers/gpu/drm/rockchip/rockchip_rgb.c b/drivers/gpu/drm/rockchip/rockchip_rgb.c
index 96ac1458a59c..37f93022a106 100644
--- a/drivers/gpu/drm/rockchip/rockchip_rgb.c
+++ b/drivers/gpu/drm/rockchip/rockchip_rgb.c
@@ -113,8 +113,10 @@ struct rockchip_rgb *rockchip_rgb_init(struct device *dev,
 		child_count++;
 		ret = drm_of_find_panel_or_bridge(dev->of_node, 0, endpoint_id,
 						  &panel, &bridge);
-		if (!ret)
+		if (!ret) {
+			of_node_put(endpoint);
 			break;
+		}
 	}
 
 	of_node_put(port);
diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
index 9d4cd196037a..dbb69063b3d5 100644
--- a/drivers/gpu/drm/scheduler/sched_main.c
+++ b/drivers/gpu/drm/scheduler/sched_main.c
@@ -211,6 +211,62 @@ void drm_sched_fault(struct drm_gpu_scheduler *sched)
 }
 EXPORT_SYMBOL(drm_sched_fault);
 
+/**
+ * drm_sched_suspend_timeout - Suspend scheduler job timeout
+ *
+ * @sched: scheduler instance for which to suspend the timeout
+ *
+ * Suspend the delayed work timeout for the scheduler. This is done by
+ * modifying the delayed work timeout to an arbitrary large value,
+ * MAX_SCHEDULE_TIMEOUT in this case. Note that this function can be
+ * called from an IRQ context.
+ *
+ * Returns the timeout remaining
+ *
+ */
+unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
+{
+	unsigned long sched_timeout, now = jiffies;
+
+	sched_timeout = sched->work_tdr.timer.expires;
+
+	/*
+	 * Modify the timeout to an arbitrarily large value. This also prevents
+	 * the timeout to be restarted when new submissions arrive
+	 */
+	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
+			&& time_after(sched_timeout, now))
+		return sched_timeout - now;
+	else
+		return sched->timeout;
+}
+EXPORT_SYMBOL(drm_sched_suspend_timeout);
+
+/**
+ * drm_sched_resume_timeout - Resume scheduler job timeout
+ *
+ * @sched: scheduler instance for which to resume the timeout
+ * @remaining: remaining timeout
+ *
+ * Resume the delayed work timeout for the scheduler. Note that
+ * this function can be called from an IRQ context.
+ */
+void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
+		unsigned long remaining)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&sched->job_list_lock, flags);
+
+	if (list_empty(&sched->ring_mirror_list))
+		cancel_delayed_work(&sched->work_tdr);
+	else
+		mod_delayed_work(system_wq, &sched->work_tdr, remaining);
+
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
+}
+EXPORT_SYMBOL(drm_sched_resume_timeout);
+
 /* job_finish is called after hw fence signaled
  */
 static void drm_sched_job_finish(struct work_struct *work)
@@ -218,6 +274,7 @@ static void drm_sched_job_finish(struct work_struct *work)
 	struct drm_sched_job *s_job = container_of(work, struct drm_sched_job,
 						   finish_work);
 	struct drm_gpu_scheduler *sched = s_job->sched;
+	unsigned long flags;
 
 	/*
 	 * Canceling the timeout without removing our job from the ring mirror
@@ -228,12 +285,12 @@ static void drm_sched_job_finish(struct work_struct *work)
 	 */
 	cancel_delayed_work_sync(&sched->work_tdr);
 
-	spin_lock(&sched->job_list_lock);
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	/* remove job from ring_mirror_list */
 	list_del_init(&s_job->node);
 	/* queue TDR for next job */
 	drm_sched_start_timeout(sched);
-	spin_unlock(&sched->job_list_lock);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 	sched->ops->free_job(s_job);
 }
@@ -249,20 +306,22 @@ static void drm_sched_job_finish_cb(struct dma_fence *f,
 static void drm_sched_job_begin(struct drm_sched_job *s_job)
 {
 	struct drm_gpu_scheduler *sched = s_job->sched;
+	unsigned long flags;
 
 	dma_fence_add_callback(&s_job->s_fence->finished, &s_job->finish_cb,
 			       drm_sched_job_finish_cb);
 
-	spin_lock(&sched->job_list_lock);
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_add_tail(&s_job->node, &sched->ring_mirror_list);
 	drm_sched_start_timeout(sched);
-	spin_unlock(&sched->job_list_lock);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 }
 
 static void drm_sched_job_timedout(struct work_struct *work)
 {
 	struct drm_gpu_scheduler *sched;
 	struct drm_sched_job *job;
+	unsigned long flags;
 
 	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
 	job = list_first_entry_or_null(&sched->ring_mirror_list,
@@ -271,9 +330,9 @@ static void drm_sched_job_timedout(struct work_struct *work)
 	if (job)
 		job->sched->ops->timedout_job(job);
 
-	spin_lock(&sched->job_list_lock);
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	drm_sched_start_timeout(sched);
-	spin_unlock(&sched->job_list_lock);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 }
 
 /**
@@ -287,9 +346,10 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
 {
 	struct drm_sched_job *s_job;
 	struct drm_sched_entity *entity, *tmp;
+	unsigned long flags;
 	int i;
 
-	spin_lock(&sched->job_list_lock);
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
 		if (s_job->s_fence->parent &&
 		    dma_fence_remove_callback(s_job->s_fence->parent,
@@ -299,7 +359,7 @@ void drm_sched_hw_job_reset(struct drm_gpu_scheduler *sched, struct drm_sched_jo
 			atomic_dec(&sched->hw_rq_count);
 		}
 	}
-	spin_unlock(&sched->job_list_lock);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 
 	if (bad && bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
 		atomic_inc(&bad->karma);
@@ -337,9 +397,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 {
 	struct drm_sched_job *s_job, *tmp;
 	bool found_guilty = false;
+	unsigned long flags;
 	int r;
 
-	spin_lock(&sched->job_list_lock);
+	spin_lock_irqsave(&sched->job_list_lock, flags);
 	list_for_each_entry_safe(s_job, tmp, &sched->ring_mirror_list, node) {
 		struct drm_sched_fence *s_fence = s_job->s_fence;
 		struct dma_fence *fence;
@@ -353,7 +414,7 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
 			dma_fence_set_error(&s_fence->finished, -ECANCELED);
 
-		spin_unlock(&sched->job_list_lock);
+		spin_unlock_irqrestore(&sched->job_list_lock, flags);
 		fence = sched->ops->run_job(s_job);
 		atomic_inc(&sched->hw_rq_count);
 
@@ -372,10 +433,10 @@ void drm_sched_job_recovery(struct drm_gpu_scheduler *sched)
 				drm_sched_expel_job_unlocked(s_job);
 			drm_sched_process_job(NULL, &s_fence->cb);
 		}
-		spin_lock(&sched->job_list_lock);
+		spin_lock_irqsave(&sched->job_list_lock, flags);
 	}
 	drm_sched_start_timeout(sched);
-	spin_unlock(&sched->job_list_lock);
+	spin_unlock_irqrestore(&sched->job_list_lock, flags);
 }
 EXPORT_SYMBOL(drm_sched_job_recovery);
 
@@ -612,7 +673,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 		   long timeout,
 		   const char *name)
 {
-	int i;
+	int i, ret;
 	sched->ops = ops;
 	sched->hw_submission_limit = hw_submission;
 	sched->name = name;
@@ -633,8 +694,10 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
 	/* Each scheduler will run on a seperate kernel thread */
 	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
 	if (IS_ERR(sched->thread)) {
+		ret = PTR_ERR(sched->thread);
+		sched->thread = NULL;
 		DRM_ERROR("Failed to create scheduler for %s.\n", name);
-		return PTR_ERR(sched->thread);
+		return ret;
 	}
 
 	sched->ready = true;
diff --git a/drivers/gpu/drm/selftests/test-drm_damage_helper.c b/drivers/gpu/drm/selftests/test-drm_damage_helper.c
index a2f753205a3e..9d2bcdf8bc29 100644
--- a/drivers/gpu/drm/selftests/test-drm_damage_helper.c
+++ b/drivers/gpu/drm/selftests/test-drm_damage_helper.c
@@ -53,7 +53,7 @@ static bool check_damage_clip(struct drm_plane_state *state, struct drm_rect *r,
 	int src_y2 = (state->src.y2 >> 16) + !!(state->src.y2 & 0xFFFF);
 
 	if (x1 >= x2 || y1 >= y2) {
-		pr_err("Cannot have damage clip with no dimention.\n");
+		pr_err("Cannot have damage clip with no dimension.\n");
 		return false;
 	}
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_backend.c b/drivers/gpu/drm/sun4i/sun4i_backend.c
index 9e9255ee59cd..a021bab11a4f 100644
--- a/drivers/gpu/drm/sun4i/sun4i_backend.c
+++ b/drivers/gpu/drm/sun4i/sun4i_backend.c
@@ -786,17 +786,18 @@ static struct sun4i_frontend *sun4i_backend_find_frontend(struct sun4i_drv *drv,
 		remote = of_graph_get_remote_port_parent(ep);
 		if (!remote)
 			continue;
+		of_node_put(remote);
 
 		/* does this node match any registered engines? */
 		list_for_each_entry(frontend, &drv->frontend_list, list) {
 			if (remote == frontend->node) {
-				of_node_put(remote);
 				of_node_put(port);
+				of_node_put(ep);
 				return frontend;
 			}
 		}
 	}
-
+	of_node_put(port);
 	return ERR_PTR(-EINVAL);
 }
 
diff --git a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
index 061d2e0d9011..416da5376701 100644
--- a/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
+++ b/drivers/gpu/drm/sun4i/sun4i_hdmi_enc.c
@@ -92,6 +92,8 @@ static void sun4i_hdmi_disable(struct drm_encoder *encoder)
 	val = readl(hdmi->base + SUN4I_HDMI_VID_CTRL_REG);
 	val &= ~SUN4I_HDMI_VID_CTRL_ENABLE;
 	writel(val, hdmi->base + SUN4I_HDMI_VID_CTRL_REG);
+
+	clk_disable_unprepare(hdmi->tmds_clk);
 }
 
 static void sun4i_hdmi_enable(struct drm_encoder *encoder)
@@ -102,6 +104,8 @@ static void sun4i_hdmi_enable(struct drm_encoder *encoder)
 
 	DRM_DEBUG_DRIVER("Enabling the HDMI Output\n");
 
+	clk_prepare_enable(hdmi->tmds_clk);
+
 	sun4i_hdmi_setup_avi_infoframes(hdmi, mode);
 	val |= SUN4I_HDMI_PKT_CTRL_TYPE(0, SUN4I_HDMI_PKT_AVI);
 	val |= SUN4I_HDMI_PKT_CTRL_TYPE(1, SUN4I_HDMI_PKT_END);
diff --git a/drivers/gpu/drm/tegra/dc.c b/drivers/gpu/drm/tegra/dc.c
index f80e82e16475..607a6ea17ecc 100644
--- a/drivers/gpu/drm/tegra/dc.c
+++ b/drivers/gpu/drm/tegra/dc.c
@@ -1978,6 +1978,23 @@ static irqreturn_t tegra_dc_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
+static bool tegra_dc_has_window_groups(struct tegra_dc *dc)
+{
+	unsigned int i;
+
+	if (!dc->soc->wgrps)
+		return true;
+
+	for (i = 0; i < dc->soc->num_wgrps; i++) {
+		const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i];
+
+		if (wgrp->dc == dc->pipe && wgrp->num_windows > 0)
+			return true;
+	}
+
+	return false;
+}
+
 static int tegra_dc_init(struct host1x_client *client)
 {
 	struct drm_device *drm = dev_get_drvdata(client->parent);
@@ -1993,22 +2010,8 @@ static int tegra_dc_init(struct host1x_client *client)
 	 * assign a primary plane to them, which in turn will cause KMS to
 	 * crash.
 	 */
-	if (dc->soc->wgrps) {
-		bool has_wgrps = false;
-		unsigned int i;
-
-		for (i = 0; i < dc->soc->num_wgrps; i++) {
-			const struct tegra_windowgroup_soc *wgrp = &dc->soc->wgrps[i];
-
-			if (wgrp->dc == dc->pipe && wgrp->num_windows > 0) {
-				has_wgrps = true;
-				break;
-			}
-		}
-
-		if (!has_wgrps)
-			return 0;
-	}
+	if (!tegra_dc_has_window_groups(dc))
+		return 0;
 
 	dc->syncpt = host1x_syncpt_request(client, flags);
 	if (!dc->syncpt)
@@ -2094,6 +2097,9 @@ static int tegra_dc_exit(struct host1x_client *client)
 	struct tegra_dc *dc = host1x_client_to_dc(client);
 	int err;
 
+	if (!tegra_dc_has_window_groups(dc))
+		return 0;
+
 	devm_free_irq(dc->dev, dc->irq, dc);
 
 	err = tegra_dc_rgb_exit(dc);
diff --git a/drivers/gpu/drm/tegra/drm.c b/drivers/gpu/drm/tegra/drm.c
index 65ea4988b332..4b70ce664c41 100644
--- a/drivers/gpu/drm/tegra/drm.c
+++ b/drivers/gpu/drm/tegra/drm.c
@@ -1274,6 +1274,7 @@ static const struct of_device_id host1x_drm_subdevs[] = {
 	{ .compatible = "nvidia,tegra194-display", },
 	{ .compatible = "nvidia,tegra194-dc", },
 	{ .compatible = "nvidia,tegra194-sor", },
+	{ .compatible = "nvidia,tegra194-vic", },
 	{ /* sentinel */ }
 };
 
diff --git a/drivers/gpu/drm/tegra/falcon.c b/drivers/gpu/drm/tegra/falcon.c
index f685e72949d1..352d05feabb0 100644
--- a/drivers/gpu/drm/tegra/falcon.c
+++ b/drivers/gpu/drm/tegra/falcon.c
@@ -141,9 +141,9 @@ int falcon_load_firmware(struct falcon *falcon)
 	/* allocate iova space for the firmware */
 	falcon->firmware.vaddr = falcon->ops->alloc(falcon, firmware->size,
 						    &falcon->firmware.paddr);
-	if (!falcon->firmware.vaddr) {
-		dev_err(falcon->dev, "dma memory mapping failed\n");
-		return -ENOMEM;
+	if (IS_ERR(falcon->firmware.vaddr)) {
+		dev_err(falcon->dev, "DMA memory mapping failed\n");
+		return PTR_ERR(falcon->firmware.vaddr);
 	}
 
 	/* copy firmware image into local area. this also ensures endianness */
@@ -197,11 +197,19 @@ void falcon_exit(struct falcon *falcon)
 int falcon_boot(struct falcon *falcon)
 {
 	unsigned long offset;
+	u32 value;
 	int err;
 
 	if (!falcon->firmware.vaddr)
 		return -EINVAL;
 
+	err = readl_poll_timeout(falcon->regs + FALCON_DMACTL, value,
+				 (value & (FALCON_DMACTL_IMEM_SCRUBBING |
+					   FALCON_DMACTL_DMEM_SCRUBBING)) == 0,
+				 10, 10000);
+	if (err < 0)
+		return err;
+
 	falcon_writel(falcon, 0, FALCON_DMACTL);
 
 	/* setup the address of the binary data so Falcon can access it later */
diff --git a/drivers/gpu/drm/tegra/hub.c b/drivers/gpu/drm/tegra/hub.c
index 6112d9042979..922a48d5a483 100644
--- a/drivers/gpu/drm/tegra/hub.c
+++ b/drivers/gpu/drm/tegra/hub.c
@@ -742,7 +742,9 @@ static const struct host1x_client_ops tegra_display_hub_ops = {
 
 static int tegra_display_hub_probe(struct platform_device *pdev)
 {
+	struct device_node *child = NULL;
 	struct tegra_display_hub *hub;
+	struct clk *clk;
 	unsigned int i;
 	int err;
 
@@ -801,6 +803,34 @@ static int tegra_display_hub_probe(struct platform_device *pdev)
 			return err;
 	}
 
+	hub->num_heads = of_get_child_count(pdev->dev.of_node);
+
+	hub->clk_heads = devm_kcalloc(&pdev->dev, hub->num_heads, sizeof(clk),
+				      GFP_KERNEL);
+	if (!hub->clk_heads)
+		return -ENOMEM;
+
+	for (i = 0; i < hub->num_heads; i++) {
+		child = of_get_next_child(pdev->dev.of_node, child);
+		if (!child) {
+			dev_err(&pdev->dev, "failed to find node for head %u\n",
+				i);
+			return -ENODEV;
+		}
+
+		clk = devm_get_clk_from_child(&pdev->dev, child, "dc");
+		if (IS_ERR(clk)) {
+			dev_err(&pdev->dev, "failed to get clock for head %u\n",
+				i);
+			of_node_put(child);
+			return PTR_ERR(clk);
+		}
+
+		hub->clk_heads[i] = clk;
+	}
+
+	of_node_put(child);
+
 	/* XXX: enable clock across reset? */
 	err = reset_control_assert(hub->rst);
 	if (err < 0)
@@ -840,12 +870,16 @@ static int tegra_display_hub_remove(struct platform_device *pdev)
 static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
 {
 	struct tegra_display_hub *hub = dev_get_drvdata(dev);
+	unsigned int i = hub->num_heads;
 	int err;
 
 	err = reset_control_assert(hub->rst);
 	if (err < 0)
 		return err;
 
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
 	clk_disable_unprepare(hub->clk_hub);
 	clk_disable_unprepare(hub->clk_dsc);
 	clk_disable_unprepare(hub->clk_disp);
@@ -856,6 +890,7 @@ static int __maybe_unused tegra_display_hub_suspend(struct device *dev)
 static int __maybe_unused tegra_display_hub_resume(struct device *dev)
 {
 	struct tegra_display_hub *hub = dev_get_drvdata(dev);
+	unsigned int i;
 	int err;
 
 	err = clk_prepare_enable(hub->clk_disp);
@@ -870,13 +905,22 @@ static int __maybe_unused tegra_display_hub_resume(struct device *dev)
 	if (err < 0)
 		goto disable_dsc;
 
+	for (i = 0; i < hub->num_heads; i++) {
+		err = clk_prepare_enable(hub->clk_heads[i]);
+		if (err < 0)
+			goto disable_heads;
+	}
+
 	err = reset_control_deassert(hub->rst);
 	if (err < 0)
-		goto disable_hub;
+		goto disable_heads;
 
 	return 0;
 
-disable_hub:
+disable_heads:
+	while (i--)
+		clk_disable_unprepare(hub->clk_heads[i]);
+
 	clk_disable_unprepare(hub->clk_hub);
 disable_dsc:
 	clk_disable_unprepare(hub->clk_dsc);
diff --git a/drivers/gpu/drm/tegra/hub.h b/drivers/gpu/drm/tegra/hub.h
index 6696a85fc1f2..479087c0705a 100644
--- a/drivers/gpu/drm/tegra/hub.h
+++ b/drivers/gpu/drm/tegra/hub.h
@@ -49,6 +49,9 @@ struct tegra_display_hub {
 	struct clk *clk_hub;
 	struct reset_control *rst;
 
+	unsigned int num_heads;
+	struct clk **clk_heads;
+
 	const struct tegra_display_hub_soc *soc;
 	struct tegra_windowgroup *wgrps;
 };
diff --git a/drivers/gpu/drm/tegra/sor.c b/drivers/gpu/drm/tegra/sor.c
index b129da2e5afd..ef8692b7075a 100644
--- a/drivers/gpu/drm/tegra/sor.c
+++ b/drivers/gpu/drm/tegra/sor.c
@@ -19,6 +19,8 @@
 
 #include <soc/tegra/pmc.h>
 
+#include <sound/hda_verbs.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_panel.h>
@@ -29,14 +31,6 @@
 #include "sor.h"
 #include "trace.h"
 
-/*
- * XXX Remove this after the commit adding it to soc/tegra/pmc.h has been
- * merged. Having this around after the commit is merged should be safe since
- * the preprocessor will effectively replace all occurrences and therefore no
- * duplicate will be defined.
- */
-#define TEGRA_IO_PAD_HDMI_DP0 26
-
 #define SOR_REKEY 0x38
 
 struct tegra_sor_hdmi_settings {
@@ -407,6 +401,7 @@ struct tegra_sor {
 	const struct tegra_sor_soc *soc;
 	void __iomem *regs;
 	unsigned int index;
+	unsigned int irq;
 
 	struct reset_control *rst;
 	struct clk *clk_parent;
@@ -433,6 +428,11 @@ struct tegra_sor {
 
 	struct delayed_work scdc;
 	bool scdc_enabled;
+
+	struct {
+		unsigned int sample_rate;
+		unsigned int channels;
+	} audio;
 };
 
 struct tegra_sor_state {
@@ -2139,6 +2139,144 @@ tegra_sor_hdmi_setup_avi_infoframe(struct tegra_sor *sor,
 	return 0;
 }
 
+static void tegra_sor_write_eld(struct tegra_sor *sor)
+{
+	size_t length = drm_eld_size(sor->output.connector.eld), i;
+
+	for (i = 0; i < length; i++)
+		tegra_sor_writel(sor, i << 8 | sor->output.connector.eld[i],
+				 SOR_AUDIO_HDA_ELD_BUFWR);
+
+	/*
+	 * The HDA codec will always report an ELD buffer size of 96 bytes and
+	 * the HDA codec driver will check that each byte read from the buffer
+	 * is valid. Therefore every byte must be written, even if no 96 bytes
+	 * were parsed from EDID.
+	 */
+	for (i = length; i < 96; i++)
+		tegra_sor_writel(sor, i << 8 | 0, SOR_AUDIO_HDA_ELD_BUFWR);
+}
+
+static void tegra_sor_audio_prepare(struct tegra_sor *sor)
+{
+	u32 value;
+
+	tegra_sor_write_eld(sor);
+
+	value = SOR_AUDIO_HDA_PRESENSE_ELDV | SOR_AUDIO_HDA_PRESENSE_PD;
+	tegra_sor_writel(sor, value, SOR_AUDIO_HDA_PRESENSE);
+}
+
+static void tegra_sor_audio_unprepare(struct tegra_sor *sor)
+{
+	tegra_sor_writel(sor, 0, SOR_AUDIO_HDA_PRESENSE);
+}
+
+static int tegra_sor_hdmi_enable_audio_infoframe(struct tegra_sor *sor)
+{
+	u8 buffer[HDMI_INFOFRAME_SIZE(AUDIO)];
+	struct hdmi_audio_infoframe frame;
+	u32 value;
+	int err;
+
+	err = hdmi_audio_infoframe_init(&frame);
+	if (err < 0) {
+		dev_err(sor->dev, "failed to setup audio infoframe: %d\n", err);
+		return err;
+	}
+
+	frame.channels = sor->audio.channels;
+
+	err = hdmi_audio_infoframe_pack(&frame, buffer, sizeof(buffer));
+	if (err < 0) {
+		dev_err(sor->dev, "failed to pack audio infoframe: %d\n", err);
+		return err;
+	}
+
+	tegra_sor_hdmi_write_infopack(sor, buffer, err);
+
+	value = tegra_sor_readl(sor, SOR_HDMI_AUDIO_INFOFRAME_CTRL);
+	value |= INFOFRAME_CTRL_CHECKSUM_ENABLE;
+	value |= INFOFRAME_CTRL_ENABLE;
+	tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_INFOFRAME_CTRL);
+
+	return 0;
+}
+
+static void tegra_sor_hdmi_audio_enable(struct tegra_sor *sor)
+{
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_AUDIO_CNTRL);
+
+	/* select HDA audio input */
+	value &= ~SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_MASK);
+	value |= SOR_AUDIO_CNTRL_SOURCE_SELECT(SOURCE_SELECT_HDA);
+
+	/* inject null samples */
+	if (sor->audio.channels != 2)
+		value &= ~SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+	else
+		value |= SOR_AUDIO_CNTRL_INJECT_NULLSMPL;
+
+	value |= SOR_AUDIO_CNTRL_AFIFO_FLUSH;
+
+	tegra_sor_writel(sor, value, SOR_AUDIO_CNTRL);
+
+	/* enable advertising HBR capability */
+	tegra_sor_writel(sor, SOR_AUDIO_SPARE_HBR_ENABLE, SOR_AUDIO_SPARE);
+
+	tegra_sor_writel(sor, 0, SOR_HDMI_ACR_CTRL);
+
+	value = SOR_HDMI_SPARE_ACR_PRIORITY_HIGH |
+		SOR_HDMI_SPARE_CTS_RESET(1) |
+		SOR_HDMI_SPARE_HW_CTS_ENABLE;
+	tegra_sor_writel(sor, value, SOR_HDMI_SPARE);
+
+	/* enable HW CTS */
+	value = SOR_HDMI_ACR_SUBPACK_LOW_SB1(0);
+	tegra_sor_writel(sor, value, SOR_HDMI_ACR_0441_SUBPACK_LOW);
+
+	/* allow packet to be sent */
+	value = SOR_HDMI_ACR_SUBPACK_HIGH_ENABLE;
+	tegra_sor_writel(sor, value, SOR_HDMI_ACR_0441_SUBPACK_HIGH);
+
+	/* reset N counter and enable lookup */
+	value = SOR_HDMI_AUDIO_N_RESET | SOR_HDMI_AUDIO_N_LOOKUP;
+	tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_N);
+
+	value = (24000 * 4096) / (128 * sor->audio.sample_rate / 1000);
+	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0320);
+	tegra_sor_writel(sor, 4096, SOR_AUDIO_NVAL_0320);
+
+	tegra_sor_writel(sor, 20000, SOR_AUDIO_AVAL_0441);
+	tegra_sor_writel(sor, 4704, SOR_AUDIO_NVAL_0441);
+
+	tegra_sor_writel(sor, 20000, SOR_AUDIO_AVAL_0882);
+	tegra_sor_writel(sor, 9408, SOR_AUDIO_NVAL_0882);
+
+	tegra_sor_writel(sor, 20000, SOR_AUDIO_AVAL_1764);
+	tegra_sor_writel(sor, 18816, SOR_AUDIO_NVAL_1764);
+
+	value = (24000 * 6144) / (128 * sor->audio.sample_rate / 1000);
+	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0480);
+	tegra_sor_writel(sor, 6144, SOR_AUDIO_NVAL_0480);
+
+	value = (24000 * 12288) / (128 * sor->audio.sample_rate / 1000);
+	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_0960);
+	tegra_sor_writel(sor, 12288, SOR_AUDIO_NVAL_0960);
+
+	value = (24000 * 24576) / (128 * sor->audio.sample_rate / 1000);
+	tegra_sor_writel(sor, value, SOR_AUDIO_AVAL_1920);
+	tegra_sor_writel(sor, 24576, SOR_AUDIO_NVAL_1920);
+
+	value = tegra_sor_readl(sor, SOR_HDMI_AUDIO_N);
+	value &= ~SOR_HDMI_AUDIO_N_RESET;
+	tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_N);
+
+	tegra_sor_hdmi_enable_audio_infoframe(sor);
+}
+
 static void tegra_sor_hdmi_disable_audio_infoframe(struct tegra_sor *sor)
 {
 	u32 value;
@@ -2148,6 +2286,11 @@ static void tegra_sor_hdmi_disable_audio_infoframe(struct tegra_sor *sor)
 	tegra_sor_writel(sor, value, SOR_HDMI_AUDIO_INFOFRAME_CTRL);
 }
 
+static void tegra_sor_hdmi_audio_disable(struct tegra_sor *sor)
+{
+	tegra_sor_hdmi_disable_audio_infoframe(sor);
+}
+
 static struct tegra_sor_hdmi_settings *
 tegra_sor_hdmi_find_settings(struct tegra_sor *sor, unsigned long frequency)
 {
@@ -2243,6 +2386,7 @@ static void tegra_sor_hdmi_disable(struct drm_encoder *encoder)
 	u32 value;
 	int err;
 
+	tegra_sor_audio_unprepare(sor);
 	tegra_sor_hdmi_scdc_stop(sor);
 
 	err = tegra_sor_detach(sor);
@@ -2651,6 +2795,7 @@ static void tegra_sor_hdmi_enable(struct drm_encoder *encoder)
 		dev_err(sor->dev, "failed to wakeup SOR: %d\n", err);
 
 	tegra_sor_hdmi_scdc_start(sor);
+	tegra_sor_audio_prepare(sor);
 }
 
 static const struct drm_encoder_helper_funcs tegra_sor_hdmi_helpers = {
@@ -2666,6 +2811,7 @@ static int tegra_sor_init(struct host1x_client *client)
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int connector = DRM_MODE_CONNECTOR_Unknown;
 	int encoder = DRM_MODE_ENCODER_NONE;
+	u32 value;
 	int err;
 
 	if (!sor->aux) {
@@ -2759,6 +2905,15 @@ static int tegra_sor_init(struct host1x_client *client)
 	if (err < 0)
 		return err;
 
+	/*
+	 * Enable and unmask the HDA codec SCRATCH0 register interrupt. This
+	 * is used for interoperability between the HDA codec driver and the
+	 * HDMI/DP driver.
+	 */
+	value = SOR_INT_CODEC_SCRATCH1 | SOR_INT_CODEC_SCRATCH0;
+	tegra_sor_writel(sor, value, SOR_INT_ENABLE);
+	tegra_sor_writel(sor, value, SOR_INT_MASK);
+
 	return 0;
 }
 
@@ -2767,6 +2922,9 @@ static int tegra_sor_exit(struct host1x_client *client)
 	struct tegra_sor *sor = host1x_client_to_sor(client);
 	int err;
 
+	tegra_sor_writel(sor, 0, SOR_INT_MASK);
+	tegra_sor_writel(sor, 0, SOR_INT_ENABLE);
+
 	tegra_output_exit(&sor->output);
 
 	if (sor->aux) {
@@ -3037,6 +3195,54 @@ static int tegra_sor_parse_dt(struct tegra_sor *sor)
 	return 0;
 }
 
+static void tegra_hda_parse_format(unsigned int format, unsigned int *rate,
+				   unsigned int *channels)
+{
+	unsigned int mul, div;
+
+	if (format & AC_FMT_BASE_44K)
+		*rate = 44100;
+	else
+		*rate = 48000;
+
+	mul = (format & AC_FMT_MULT_MASK) >> AC_FMT_MULT_SHIFT;
+	div = (format & AC_FMT_DIV_MASK) >> AC_FMT_DIV_SHIFT;
+
+	*rate = *rate * (mul + 1) / (div + 1);
+
+	*channels = (format & AC_FMT_CHAN_MASK) >> AC_FMT_CHAN_SHIFT;
+}
+
+static irqreturn_t tegra_sor_irq(int irq, void *data)
+{
+	struct tegra_sor *sor = data;
+	u32 value;
+
+	value = tegra_sor_readl(sor, SOR_INT_STATUS);
+	tegra_sor_writel(sor, value, SOR_INT_STATUS);
+
+	if (value & SOR_INT_CODEC_SCRATCH0) {
+		value = tegra_sor_readl(sor, SOR_AUDIO_HDA_CODEC_SCRATCH0);
+
+		if (value & SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID) {
+			unsigned int format, sample_rate, channels;
+
+			format = value & SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK;
+
+			tegra_hda_parse_format(format, &sample_rate, &channels);
+
+			sor->audio.sample_rate = sample_rate;
+			sor->audio.channels = channels;
+
+			tegra_sor_hdmi_audio_enable(sor);
+		} else {
+			tegra_sor_hdmi_audio_disable(sor);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
 static int tegra_sor_probe(struct platform_device *pdev)
 {
 	struct device_node *np;
@@ -3119,14 +3325,38 @@ static int tegra_sor_probe(struct platform_device *pdev)
 		goto remove;
 	}
 
-	if (!pdev->dev.pm_domain) {
-		sor->rst = devm_reset_control_get(&pdev->dev, "sor");
-		if (IS_ERR(sor->rst)) {
-			err = PTR_ERR(sor->rst);
+	err = platform_get_irq(pdev, 0);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to get IRQ: %d\n", err);
+		goto remove;
+	}
+
+	sor->irq = err;
+
+	err = devm_request_irq(sor->dev, sor->irq, tegra_sor_irq, 0,
+			       dev_name(sor->dev), sor);
+	if (err < 0) {
+		dev_err(&pdev->dev, "failed to request IRQ: %d\n", err);
+		goto remove;
+	}
+
+	sor->rst = devm_reset_control_get(&pdev->dev, "sor");
+	if (IS_ERR(sor->rst)) {
+		err = PTR_ERR(sor->rst);
+
+		if (err != -EBUSY || WARN_ON(!pdev->dev.pm_domain)) {
 			dev_err(&pdev->dev, "failed to get reset control: %d\n",
 				err);
 			goto remove;
 		}
+
+		/*
+		 * At this point, the reset control is most likely being used
+		 * by the generic power domain implementation. With any luck
+		 * the power domain will have taken care of resetting the SOR
+		 * and we don't have to do anything.
+		 */
+		sor->rst = NULL;
 	}
 
 	sor->clk = devm_clk_get(&pdev->dev, NULL);
diff --git a/drivers/gpu/drm/tegra/sor.h b/drivers/gpu/drm/tegra/sor.h
index fb0854d92a27..13f7e68bec42 100644
--- a/drivers/gpu/drm/tegra/sor.h
+++ b/drivers/gpu/drm/tegra/sor.h
@@ -364,12 +364,28 @@
 #define  INFOFRAME_HEADER_VERSION(x) (((x) & 0xff) << 8)
 #define  INFOFRAME_HEADER_TYPE(x) (((x) & 0xff) << 0)
 
+#define SOR_HDMI_ACR_CTRL 0xb1
+
+#define SOR_HDMI_ACR_0320_SUBPACK_LOW 0xb2
+#define  SOR_HDMI_ACR_SUBPACK_LOW_SB1(x) (((x) & 0xff) << 24)
+
+#define SOR_HDMI_ACR_0320_SUBPACK_HIGH 0xb3
+#define  SOR_HDMI_ACR_SUBPACK_HIGH_ENABLE (1 << 31)
+
+#define SOR_HDMI_ACR_0441_SUBPACK_LOW 0xb4
+#define SOR_HDMI_ACR_0441_SUBPACK_HIGH 0xb5
+
 #define SOR_HDMI_CTRL 0xc0
 #define  SOR_HDMI_CTRL_ENABLE (1 << 30)
 #define  SOR_HDMI_CTRL_MAX_AC_PACKET(x) (((x) & 0x1f) << 16)
 #define  SOR_HDMI_CTRL_AUDIO_LAYOUT (1 << 10)
 #define  SOR_HDMI_CTRL_REKEY(x) (((x) & 0x7f) << 0)
 
+#define SOR_HDMI_SPARE 0xcb
+#define  SOR_HDMI_SPARE_ACR_PRIORITY_HIGH (1 << 31)
+#define  SOR_HDMI_SPARE_CTS_RESET(x) (((x) & 0x7) << 16)
+#define  SOR_HDMI_SPARE_HW_CTS_ENABLE (1 << 0)
+
 #define SOR_REFCLK 0xe6
 #define  SOR_REFCLK_DIV_INT(x) ((((x) >> 2) & 0xff) << 8)
 #define  SOR_REFCLK_DIV_FRAC(x) (((x) & 0x3) << 6)
@@ -378,10 +394,62 @@
 #define  SOR_INPUT_CONTROL_ARM_VIDEO_RANGE_LIMITED (1 << 1)
 #define  SOR_INPUT_CONTROL_HDMI_SRC_SELECT(x) (((x) & 0x1) << 0)
 
+#define SOR_AUDIO_CNTRL 0xfc
+#define  SOR_AUDIO_CNTRL_INJECT_NULLSMPL (1 << 29)
+#define  SOR_AUDIO_CNTRL_SOURCE_SELECT(x) (((x) & 0x3) << 20)
+#define   SOURCE_SELECT_MASK 0x3
+#define   SOURCE_SELECT_HDA 0x2
+#define   SOURCE_SELECT_SPDIF 0x1
+#define   SOURCE_SELECT_AUTO 0x0
+#define  SOR_AUDIO_CNTRL_AFIFO_FLUSH (1 << 12)
+
+#define SOR_AUDIO_SPARE 0xfe
+#define  SOR_AUDIO_SPARE_HBR_ENABLE (1 << 27)
+
+#define SOR_AUDIO_NVAL_0320 0xff
+#define SOR_AUDIO_NVAL_0441 0x100
+#define SOR_AUDIO_NVAL_0882 0x101
+#define SOR_AUDIO_NVAL_1764 0x102
+#define SOR_AUDIO_NVAL_0480 0x103
+#define SOR_AUDIO_NVAL_0960 0x104
+#define SOR_AUDIO_NVAL_1920 0x105
+
+#define SOR_AUDIO_HDA_CODEC_SCRATCH0 0x10a
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_VALID (1 << 30)
+#define  SOR_AUDIO_HDA_CODEC_SCRATCH0_FMT_MASK 0xffff
+
+#define SOR_AUDIO_HDA_ELD_BUFWR 0x10c
+#define  SOR_AUDIO_HDA_ELD_BUFWR_INDEX(x) (((x) & 0xff) << 8)
+#define  SOR_AUDIO_HDA_ELD_BUFWR_DATA(x) (((x) & 0xff) << 0)
+
+#define SOR_AUDIO_HDA_PRESENSE 0x10d
+#define  SOR_AUDIO_HDA_PRESENSE_ELDV (1 << 1)
+#define  SOR_AUDIO_HDA_PRESENSE_PD (1 << 0)
+
+#define SOR_AUDIO_AVAL_0320 0x10f
+#define SOR_AUDIO_AVAL_0441 0x110
+#define SOR_AUDIO_AVAL_0882 0x111
+#define SOR_AUDIO_AVAL_1764 0x112
+#define SOR_AUDIO_AVAL_0480 0x113
+#define SOR_AUDIO_AVAL_0960 0x114
+#define SOR_AUDIO_AVAL_1920 0x115
+
+#define SOR_INT_STATUS 0x11c
+#define  SOR_INT_CODEC_CP_REQUEST (1 << 2)
+#define  SOR_INT_CODEC_SCRATCH1 (1 << 1)
+#define  SOR_INT_CODEC_SCRATCH0 (1 << 0)
+
+#define SOR_INT_MASK 0x11d
+#define SOR_INT_ENABLE 0x11e
+
 #define SOR_HDMI_VSI_INFOFRAME_CTRL 0x123
 #define SOR_HDMI_VSI_INFOFRAME_STATUS 0x124
 #define SOR_HDMI_VSI_INFOFRAME_HEADER 0x125
 
+#define SOR_HDMI_AUDIO_N 0x13c
+#define SOR_HDMI_AUDIO_N_LOOKUP (1 << 28)
+#define SOR_HDMI_AUDIO_N_RESET (1 << 20)
+
 #define SOR_HDMI2_CTRL 0x13e
 #define  SOR_HDMI2_CTRL_CLOCK_MODE_DIV_BY_4 (1 << 1)
 #define  SOR_HDMI2_CTRL_SCRAMBLE (1 << 0)
diff --git a/drivers/gpu/drm/tegra/vic.c b/drivers/gpu/drm/tegra/vic.c
index 9f657a63b0bb..d47983deb1cf 100644
--- a/drivers/gpu/drm/tegra/vic.c
+++ b/drivers/gpu/drm/tegra/vic.c
@@ -38,6 +38,7 @@ struct vic {
 	struct iommu_domain *domain;
 	struct device *dev;
 	struct clk *clk;
+	struct reset_control *rst;
 
 	/* Platform configuration */
 	const struct vic_config *config;
@@ -56,13 +57,37 @@ static void vic_writel(struct vic *vic, u32 value, unsigned int offset)
 static int vic_runtime_resume(struct device *dev)
 {
 	struct vic *vic = dev_get_drvdata(dev);
+	int err;
+
+	err = clk_prepare_enable(vic->clk);
+	if (err < 0)
+		return err;
+
+	usleep_range(10, 20);
+
+	err = reset_control_deassert(vic->rst);
+	if (err < 0)
+		goto disable;
+
+	usleep_range(10, 20);
+
+	return 0;
 
-	return clk_prepare_enable(vic->clk);
+disable:
+	clk_disable_unprepare(vic->clk);
+	return err;
 }
 
 static int vic_runtime_suspend(struct device *dev)
 {
 	struct vic *vic = dev_get_drvdata(dev);
+	int err;
+
+	err = reset_control_assert(vic->rst);
+	if (err < 0)
+		return err;
+
+	usleep_range(2000, 4000);
 
 	clk_disable_unprepare(vic->clk);
 
@@ -282,10 +307,18 @@ static const struct vic_config vic_t186_config = {
 	.version = 0x18,
 };
 
+#define NVIDIA_TEGRA_194_VIC_FIRMWARE "nvidia/tegra194/vic.bin"
+
+static const struct vic_config vic_t194_config = {
+	.firmware = NVIDIA_TEGRA_194_VIC_FIRMWARE,
+	.version = 0x19,
+};
+
 static const struct of_device_id vic_match[] = {
 	{ .compatible = "nvidia,tegra124-vic", .data = &vic_t124_config },
 	{ .compatible = "nvidia,tegra210-vic", .data = &vic_t210_config },
 	{ .compatible = "nvidia,tegra186-vic", .data = &vic_t186_config },
+	{ .compatible = "nvidia,tegra194-vic", .data = &vic_t194_config },
 	{ },
 };
 
@@ -323,6 +356,14 @@ static int vic_probe(struct platform_device *pdev)
 		return PTR_ERR(vic->clk);
 	}
 
+	if (!dev->pm_domain) {
+		vic->rst = devm_reset_control_get(dev, "vic");
+		if (IS_ERR(vic->rst)) {
+			dev_err(&pdev->dev, "failed to get reset\n");
+			return PTR_ERR(vic->rst);
+		}
+	}
+
 	vic->falcon.dev = dev;
 	vic->falcon.regs = vic->regs;
 	vic->falcon.ops = &vic_falcon_ops;
@@ -418,3 +459,6 @@ MODULE_FIRMWARE(NVIDIA_TEGRA_210_VIC_FIRMWARE);
 #if IS_ENABLED(CONFIG_ARCH_TEGRA_186_SOC)
 MODULE_FIRMWARE(NVIDIA_TEGRA_186_VIC_FIRMWARE);
 #endif
+#if IS_ENABLED(CONFIG_ARCH_TEGRA_194_SOC)
+MODULE_FIRMWARE(NVIDIA_TEGRA_194_VIC_FIRMWARE);
+#endif
diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c
index d87935bf8e30..0ec08394e17a 100644
--- a/drivers/gpu/drm/ttm/ttm_bo.c
+++ b/drivers/gpu/drm/ttm/ttm_bo.c
@@ -77,38 +77,39 @@ static inline int ttm_mem_type_from_place(const struct ttm_place *place,
 	return 0;
 }
 
-static void ttm_mem_type_debug(struct ttm_bo_device *bdev, int mem_type)
+static void ttm_mem_type_debug(struct ttm_bo_device *bdev, struct drm_printer *p,
+			       int mem_type)
 {
 	struct ttm_mem_type_manager *man = &bdev->man[mem_type];
-	struct drm_printer p = drm_debug_printer(TTM_PFX);
 
-	pr_err("    has_type: %d\n", man->has_type);
-	pr_err("    use_type: %d\n", man->use_type);
-	pr_err("    flags: 0x%08X\n", man->flags);
-	pr_err("    gpu_offset: 0x%08llX\n", man->gpu_offset);
-	pr_err("    size: %llu\n", man->size);
-	pr_err("    available_caching: 0x%08X\n", man->available_caching);
-	pr_err("    default_caching: 0x%08X\n", man->default_caching);
+	drm_printf(p, "    has_type: %d\n", man->has_type);
+	drm_printf(p, "    use_type: %d\n", man->use_type);
+	drm_printf(p, "    flags: 0x%08X\n", man->flags);
+	drm_printf(p, "    gpu_offset: 0x%08llX\n", man->gpu_offset);
+	drm_printf(p, "    size: %llu\n", man->size);
+	drm_printf(p, "    available_caching: 0x%08X\n", man->available_caching);
+	drm_printf(p, "    default_caching: 0x%08X\n", man->default_caching);
 	if (mem_type != TTM_PL_SYSTEM)
-		(*man->func->debug)(man, &p);
+		(*man->func->debug)(man, p);
 }
 
 static void ttm_bo_mem_space_debug(struct ttm_buffer_object *bo,
 					struct ttm_placement *placement)
 {
+	struct drm_printer p = drm_debug_printer(TTM_PFX);
 	int i, ret, mem_type;
 
-	pr_err("No space for %p (%lu pages, %luK, %luM)\n",
-	       bo, bo->mem.num_pages, bo->mem.size >> 10,
-	       bo->mem.size >> 20);
+	drm_printf(&p, "No space for %p (%lu pages, %luK, %luM)\n",
+		   bo, bo->mem.num_pages, bo->mem.size >> 10,
+		   bo->mem.size >> 20);
 	for (i = 0; i < placement->num_placement; i++) {
 		ret = ttm_mem_type_from_place(&placement->placement[i],
 						&mem_type);
 		if (ret)
 			return;
-		pr_err("  placement[%d]=0x%08X (%d)\n",
-		       i, placement->placement[i].flags, mem_type);
-		ttm_mem_type_debug(bo->bdev, mem_type);
+		drm_printf(&p, "  placement[%d]=0x%08X (%d)\n",
+			   i, placement->placement[i].flags, mem_type);
+		ttm_mem_type_debug(bo->bdev, &p, mem_type);
 	}
 }
 
diff --git a/drivers/gpu/drm/ttm/ttm_execbuf_util.c b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
index efa005a1c1b7..93860346c426 100644
--- a/drivers/gpu/drm/ttm/ttm_execbuf_util.c
+++ b/drivers/gpu/drm/ttm/ttm_execbuf_util.c
@@ -126,10 +126,11 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 		}
 
 		if (!ret) {
-			if (!entry->shared)
+			if (!entry->num_shared)
 				continue;
 
-			ret = reservation_object_reserve_shared(bo->resv, 1);
+			ret = reservation_object_reserve_shared(bo->resv,
+								entry->num_shared);
 			if (!ret)
 				continue;
 		}
@@ -150,8 +151,9 @@ int ttm_eu_reserve_buffers(struct ww_acquire_ctx *ticket,
 			}
 		}
 
-		if (!ret && entry->shared)
-			ret = reservation_object_reserve_shared(bo->resv, 1);
+		if (!ret && entry->num_shared)
+			ret = reservation_object_reserve_shared(bo->resv,
+								entry->num_shared);
 
 		if (unlikely(ret != 0)) {
 			if (ret == -EINTR)
@@ -199,7 +201,7 @@ void ttm_eu_fence_buffer_objects(struct ww_acquire_ctx *ticket,
 
 	list_for_each_entry(entry, list, head) {
 		bo = entry->bo;
-		if (entry->shared)
+		if (entry->num_shared)
 			reservation_object_add_shared_fence(bo->resv, fence);
 		else
 			reservation_object_add_excl_fence(bo->resv, fence);
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.c b/drivers/gpu/drm/virtio/virtgpu_drv.c
index f7f32a885af7..2d1aaca49105 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.c
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.c
@@ -127,14 +127,10 @@ static struct drm_driver driver = {
 #if defined(CONFIG_DEBUG_FS)
 	.debugfs_init = virtio_gpu_debugfs_init,
 #endif
-	.prime_handle_to_fd = drm_gem_prime_handle_to_fd,
-	.prime_fd_to_handle = drm_gem_prime_fd_to_handle,
 	.gem_prime_export = drm_gem_prime_export,
 	.gem_prime_import = drm_gem_prime_import,
 	.gem_prime_pin = virtgpu_gem_prime_pin,
 	.gem_prime_unpin = virtgpu_gem_prime_unpin,
-	.gem_prime_get_sg_table = virtgpu_gem_prime_get_sg_table,
-	.gem_prime_import_sg_table = virtgpu_gem_prime_import_sg_table,
 	.gem_prime_vmap = virtgpu_gem_prime_vmap,
 	.gem_prime_vunmap = virtgpu_gem_prime_vunmap,
 	.gem_prime_mmap = virtgpu_gem_prime_mmap,
diff --git a/drivers/gpu/drm/virtio/virtgpu_drv.h b/drivers/gpu/drm/virtio/virtgpu_drv.h
index 1deb41d42ea4..0c15000f926e 100644
--- a/drivers/gpu/drm/virtio/virtgpu_drv.h
+++ b/drivers/gpu/drm/virtio/virtgpu_drv.h
@@ -372,10 +372,6 @@ int virtio_gpu_object_wait(struct virtio_gpu_object *bo, bool no_wait);
 /* virtgpu_prime.c */
 int virtgpu_gem_prime_pin(struct drm_gem_object *obj);
 void virtgpu_gem_prime_unpin(struct drm_gem_object *obj);
-struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *sgt);
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj);
 void virtgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
 int virtgpu_gem_prime_mmap(struct drm_gem_object *obj,
diff --git a/drivers/gpu/drm/virtio/virtgpu_prime.c b/drivers/gpu/drm/virtio/virtgpu_prime.c
index 86ce0ae93f59..c59ec34c80a5 100644
--- a/drivers/gpu/drm/virtio/virtgpu_prime.c
+++ b/drivers/gpu/drm/virtio/virtgpu_prime.c
@@ -39,20 +39,6 @@ void virtgpu_gem_prime_unpin(struct drm_gem_object *obj)
 	WARN_ONCE(1, "not implemented");
 }
 
-struct sg_table *virtgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENODEV);
-}
-
-struct drm_gem_object *virtgpu_gem_prime_import_sg_table(
-	struct drm_device *dev, struct dma_buf_attachment *attach,
-	struct sg_table *table)
-{
-	WARN_ONCE(1, "not implemented");
-	return ERR_PTR(-ENODEV);
-}
-
 void *virtgpu_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct virtio_gpu_object *bo = gem_to_virtio_gpu_obj(obj);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9fd8b4e75a8c..25afb1d594e3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -34,7 +34,7 @@
 #include <drm/ttm/ttm_placement.h>
 #include <drm/ttm/ttm_bo_driver.h>
 #include <drm/ttm/ttm_module.h>
-#include <linux/dma_remapping.h>
+#include <linux/intel-iommu.h>
 
 #define VMWGFX_DRIVER_DESC "Linux drm driver for VMware graphics devices"
 #define VMWGFX_CHIP_SVGAII 0
@@ -49,6 +49,8 @@
 
 #define VMWGFX_REPO "In Tree"
 
+#define VMWGFX_VALIDATION_MEM_GRAN (16*PAGE_SIZE)
+
 
 /**
  * Fully encoded drm commands. Might move to vmw_drm.h
@@ -581,7 +583,7 @@ static int vmw_dma_select_mode(struct vmw_private *dev_priv)
 
 	dev_priv->map_mode = vmw_dma_map_populate;
 
-	if (dma_ops->sync_single_for_cpu)
+	if (dma_ops && dma_ops->sync_single_for_cpu)
 		dev_priv->map_mode = vmw_dma_alloc_coherent;
 #ifdef CONFIG_SWIOTLB
 	if (swiotlb_nr_tbl() == 0)
@@ -911,7 +913,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 		spin_unlock(&dev_priv->cap_lock);
 	}
 
-
+	vmw_validation_mem_init_ttm(dev_priv, VMWGFX_VALIDATION_MEM_GRAN);
 	ret = vmw_kms_init(dev_priv);
 	if (unlikely(ret != 0))
 		goto out_no_kms;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index d7f6cb9331de..cd607ba9c2fe 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -593,6 +593,9 @@ struct vmw_private {
 
 	struct vmw_cmdbuf_man *cman;
 	DECLARE_BITMAP(irqthread_pending, VMW_IRQTHREAD_MAX);
+
+	/* Validation memory reservation */
+	struct vmw_validation_mem vvm;
 };
 
 static inline struct vmw_surface *vmw_res_to_srf(struct vmw_resource *res)
@@ -831,6 +834,8 @@ extern int vmw_fifo_flush(struct vmw_private *dev_priv,
 
 extern int vmw_mmap(struct file *filp, struct vm_area_struct *vma);
 
+extern void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv,
+					size_t gran);
 /**
  * TTM buffer object driver - vmwgfx_ttm_buffer.c
  */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 260650bb5560..f2d13a72c05d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -3835,6 +3835,8 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	struct sync_file *sync_file = NULL;
 	DECLARE_VAL_CONTEXT(val_ctx, &sw_context->res_ht, 1);
 
+	vmw_validation_set_val_mem(&val_ctx, &dev_priv->vvm);
+
 	if (flags & DRM_VMW_EXECBUF_FLAG_EXPORT_FENCE_FD) {
 		out_fence_fd = get_unused_fd_flags(O_CLOEXEC);
 		if (out_fence_fd < 0) {
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 8a029bade32a..3025bfc001a1 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -85,7 +85,7 @@ static void vmw_resource_release(struct kref *kref)
 			struct ttm_validate_buffer val_buf;
 
 			val_buf.bo = bo;
-			val_buf.shared = false;
+			val_buf.num_shared = 0;
 			res->func->unbind(res, false, &val_buf);
 		}
 		res->backup_dirty = false;
@@ -462,7 +462,7 @@ vmw_resource_check_buffer(struct ww_acquire_ctx *ticket,
 
 	INIT_LIST_HEAD(&val_list);
 	val_buf->bo = ttm_bo_reference(&res->backup->base);
-	val_buf->shared = false;
+	val_buf->num_shared = 0;
 	list_add_tail(&val_buf->head, &val_list);
 	ret = ttm_eu_reserve_buffers(ticket, &val_list, interruptible, NULL);
 	if (unlikely(ret != 0))
@@ -565,7 +565,7 @@ static int vmw_resource_do_evict(struct ww_acquire_ctx *ticket,
 	BUG_ON(!func->may_evict);
 
 	val_buf.bo = NULL;
-	val_buf.shared = false;
+	val_buf.num_shared = 0;
 	ret = vmw_resource_check_buffer(ticket, res, interruptible, &val_buf);
 	if (unlikely(ret != 0))
 		return ret;
@@ -614,7 +614,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
 		return 0;
 
 	val_buf.bo = NULL;
-	val_buf.shared = false;
+	val_buf.num_shared = 0;
 	if (res->backup)
 		val_buf.bo = &res->backup->base;
 	do {
@@ -685,7 +685,7 @@ void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
 	struct vmw_resource *res, *next;
 	struct ttm_validate_buffer val_buf = {
 		.bo = &vbo->base,
-		.shared = false
+		.num_shared = 0
 	};
 
 	lockdep_assert_held(&vbo->base.resv->lock.base);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 154eb09aa91e..e6d75e377dd8 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -42,3 +42,39 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
 	dev_priv = vmw_priv(file_priv->minor->dev);
 	return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
 }
+
+/* struct vmw_validation_mem callback */
+static int vmw_vmt_reserve(struct vmw_validation_mem *m, size_t size)
+{
+	static struct ttm_operation_ctx ctx = {.interruptible = false,
+					       .no_wait_gpu = false};
+	struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm);
+
+	return ttm_mem_global_alloc(vmw_mem_glob(dev_priv), size, &ctx);
+}
+
+/* struct vmw_validation_mem callback */
+static void vmw_vmt_unreserve(struct vmw_validation_mem *m, size_t size)
+{
+	struct vmw_private *dev_priv = container_of(m, struct vmw_private, vvm);
+
+	return ttm_mem_global_free(vmw_mem_glob(dev_priv), size);
+}
+
+/**
+ * vmw_validation_mem_init_ttm - Interface the validation memory tracker
+ * to ttm.
+ * @dev_priv: Pointer to struct vmw_private. The reason we choose a vmw private
+ * rather than a struct vmw_validation_mem is to make sure assumption in the
+ * callbacks that struct vmw_private derives from struct vmw_validation_mem
+ * holds true.
+ * @gran: The recommended allocation granularity
+ */
+void vmw_validation_mem_init_ttm(struct vmw_private *dev_priv, size_t gran)
+{
+	struct vmw_validation_mem *vvm = &dev_priv->vvm;
+
+	vvm->reserve_mem = vmw_vmt_reserve;
+	vvm->unreserve_mem = vmw_vmt_unreserve;
+	vvm->gran = gran;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 184025fa938e..b3f547fc5d3d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -104,11 +104,25 @@ void *vmw_validation_mem_alloc(struct vmw_validation_context *ctx,
 		return NULL;
 
 	if (ctx->mem_size_left < size) {
-		struct page *page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		struct page *page;
 
+		if (ctx->vm && ctx->vm_size_left < PAGE_SIZE) {
+			int ret = ctx->vm->reserve_mem(ctx->vm, ctx->vm->gran);
+
+			if (ret)
+				return NULL;
+
+			ctx->vm_size_left += ctx->vm->gran;
+			ctx->total_mem += ctx->vm->gran;
+		}
+
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
 		if (!page)
 			return NULL;
 
+		if (ctx->vm)
+			ctx->vm_size_left -= PAGE_SIZE;
+
 		list_add_tail(&page->lru, &ctx->page_list);
 		ctx->page_address = page_address(page);
 		ctx->mem_size_left = PAGE_SIZE;
@@ -138,6 +152,11 @@ static void vmw_validation_mem_free(struct vmw_validation_context *ctx)
 	}
 
 	ctx->mem_size_left = 0;
+	if (ctx->vm && ctx->total_mem) {
+		ctx->vm->unreserve_mem(ctx->vm, ctx->total_mem);
+		ctx->total_mem = 0;
+		ctx->vm_size_left = 0;
+	}
 }
 
 /**
@@ -266,7 +285,7 @@ int vmw_validation_add_bo(struct vmw_validation_context *ctx,
 		val_buf->bo = ttm_bo_get_unless_zero(&vbo->base);
 		if (!val_buf->bo)
 			return -ESRCH;
-		val_buf->shared = false;
+		val_buf->num_shared = 0;
 		list_add_tail(&val_buf->head, &ctx->bo_list);
 		bo_node->as_mob = as_mob;
 		bo_node->cpu_blit = cpu_blit;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index b57e3292c386..3b396fea40d7 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -34,6 +34,21 @@
 #include <drm/ttm/ttm_execbuf_util.h>
 
 /**
+ * struct vmw_validation_mem - Custom interface to provide memory reservations
+ * for the validation code.
+ * @reserve_mem: Callback to reserve memory
+ * @unreserve_mem: Callback to unreserve memory
+ * @gran: Reservation granularity. Contains a hint how much memory should
+ * be reserved in each call to @reserve_mem(). A slow implementation may want
+ * reservation to be done in large batches.
+ */
+struct vmw_validation_mem {
+	int (*reserve_mem)(struct vmw_validation_mem *m, size_t size);
+	void (*unreserve_mem)(struct vmw_validation_mem *m, size_t size);
+	size_t gran;
+};
+
+/**
  * struct vmw_validation_context - Per command submission validation context
  * @ht: Hash table used to find resource- or buffer object duplicates
  * @resource_list: List head for resource validation metadata
@@ -47,6 +62,10 @@
  * buffer objects
  * @mem_size_left: Free memory left in the last page in @page_list
  * @page_address: Kernel virtual address of the last page in @page_list
+ * @vm: A pointer to the memory reservation interface or NULL if no
+ * memory reservation is needed.
+ * @vm_size_left: Amount of reserved memory that so far has not been allocated.
+ * @total_mem: Amount of reserved memory.
  */
 struct vmw_validation_context {
 	struct drm_open_hash *ht;
@@ -59,6 +78,9 @@ struct vmw_validation_context {
 	unsigned int merge_dups;
 	unsigned int mem_size_left;
 	u8 *page_address;
+	struct vmw_validation_mem *vm;
+	size_t vm_size_left;
+	size_t total_mem;
 };
 
 struct vmw_buffer_object;
@@ -102,6 +124,21 @@ vmw_validation_has_bos(struct vmw_validation_context *ctx)
 }
 
 /**
+ * vmw_validation_set_val_mem - Register a validation mem object for
+ * validation memory reservation
+ * @ctx: The validation context
+ * @vm: Pointer to a struct vmw_validation_mem
+ *
+ * Must be set before the first attempt to allocate validation memory.
+ */
+static inline void
+vmw_validation_set_val_mem(struct vmw_validation_context *ctx,
+			   struct vmw_validation_mem *vm)
+{
+	ctx->vm = vm;
+}
+
+/**
  * vmw_validation_set_ht - Register a hash table for duplicate finding
  * @ctx: The validation context
  * @ht: Pointer to a hash table to use for duplicate finding
diff --git a/drivers/gpu/drm/xen/Kconfig b/drivers/gpu/drm/xen/Kconfig
index 4cca160782ab..f969d486855d 100644
--- a/drivers/gpu/drm/xen/Kconfig
+++ b/drivers/gpu/drm/xen/Kconfig
@@ -12,6 +12,7 @@ config DRM_XEN_FRONTEND
 	select DRM_KMS_HELPER
 	select VIDEOMODE_HELPERS
 	select XEN_XENBUS_FRONTEND
+	select XEN_FRONT_PGDIR_SHBUF
 	help
 	  Choose this option if you want to enable a para-virtualized
 	  frontend DRM/KMS driver for Xen guest OSes.
diff --git a/drivers/gpu/drm/xen/Makefile b/drivers/gpu/drm/xen/Makefile
index 712afff5ffc3..825905f67faa 100644
--- a/drivers/gpu/drm/xen/Makefile
+++ b/drivers/gpu/drm/xen/Makefile
@@ -4,7 +4,6 @@ drm_xen_front-objs := xen_drm_front.o \
 		      xen_drm_front_kms.o \
 		      xen_drm_front_conn.o \
 		      xen_drm_front_evtchnl.o \
-		      xen_drm_front_shbuf.o \
 		      xen_drm_front_cfg.o \
 		      xen_drm_front_gem.o
 
diff --git a/drivers/gpu/drm/xen/xen_drm_front.c b/drivers/gpu/drm/xen/xen_drm_front.c
index 6b6d5ab82ec3..4d3d36fc3a5d 100644
--- a/drivers/gpu/drm/xen/xen_drm_front.c
+++ b/drivers/gpu/drm/xen/xen_drm_front.c
@@ -19,6 +19,7 @@
 #include <xen/xen.h>
 #include <xen/xenbus.h>
 
+#include <xen/xen-front-pgdir-shbuf.h>
 #include <xen/interface/io/displif.h>
 
 #include "xen_drm_front.h"
@@ -26,28 +27,20 @@
 #include "xen_drm_front_evtchnl.h"
 #include "xen_drm_front_gem.h"
 #include "xen_drm_front_kms.h"
-#include "xen_drm_front_shbuf.h"
 
 struct xen_drm_front_dbuf {
 	struct list_head list;
 	u64 dbuf_cookie;
 	u64 fb_cookie;
-	struct xen_drm_front_shbuf *shbuf;
+
+	struct xen_front_pgdir_shbuf shbuf;
 };
 
-static int dbuf_add_to_list(struct xen_drm_front_info *front_info,
-			    struct xen_drm_front_shbuf *shbuf, u64 dbuf_cookie)
+static void dbuf_add_to_list(struct xen_drm_front_info *front_info,
+			     struct xen_drm_front_dbuf *dbuf, u64 dbuf_cookie)
 {
-	struct xen_drm_front_dbuf *dbuf;
-
-	dbuf = kzalloc(sizeof(*dbuf), GFP_KERNEL);
-	if (!dbuf)
-		return -ENOMEM;
-
 	dbuf->dbuf_cookie = dbuf_cookie;
-	dbuf->shbuf = shbuf;
 	list_add(&dbuf->list, &front_info->dbuf_list);
-	return 0;
 }
 
 static struct xen_drm_front_dbuf *dbuf_get(struct list_head *dbuf_list,
@@ -62,15 +55,6 @@ static struct xen_drm_front_dbuf *dbuf_get(struct list_head *dbuf_list,
 	return NULL;
 }
 
-static void dbuf_flush_fb(struct list_head *dbuf_list, u64 fb_cookie)
-{
-	struct xen_drm_front_dbuf *buf, *q;
-
-	list_for_each_entry_safe(buf, q, dbuf_list, list)
-		if (buf->fb_cookie == fb_cookie)
-			xen_drm_front_shbuf_flush(buf->shbuf);
-}
-
 static void dbuf_free(struct list_head *dbuf_list, u64 dbuf_cookie)
 {
 	struct xen_drm_front_dbuf *buf, *q;
@@ -78,8 +62,8 @@ static void dbuf_free(struct list_head *dbuf_list, u64 dbuf_cookie)
 	list_for_each_entry_safe(buf, q, dbuf_list, list)
 		if (buf->dbuf_cookie == dbuf_cookie) {
 			list_del(&buf->list);
-			xen_drm_front_shbuf_unmap(buf->shbuf);
-			xen_drm_front_shbuf_free(buf->shbuf);
+			xen_front_pgdir_shbuf_unmap(&buf->shbuf);
+			xen_front_pgdir_shbuf_free(&buf->shbuf);
 			kfree(buf);
 			break;
 		}
@@ -91,8 +75,8 @@ static void dbuf_free_all(struct list_head *dbuf_list)
 
 	list_for_each_entry_safe(buf, q, dbuf_list, list) {
 		list_del(&buf->list);
-		xen_drm_front_shbuf_unmap(buf->shbuf);
-		xen_drm_front_shbuf_free(buf->shbuf);
+		xen_front_pgdir_shbuf_unmap(&buf->shbuf);
+		xen_front_pgdir_shbuf_free(&buf->shbuf);
 		kfree(buf);
 	}
 }
@@ -171,9 +155,9 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
 			      u32 bpp, u64 size, struct page **pages)
 {
 	struct xen_drm_front_evtchnl *evtchnl;
-	struct xen_drm_front_shbuf *shbuf;
+	struct xen_drm_front_dbuf *dbuf;
 	struct xendispl_req *req;
-	struct xen_drm_front_shbuf_cfg buf_cfg;
+	struct xen_front_pgdir_shbuf_cfg buf_cfg;
 	unsigned long flags;
 	int ret;
 
@@ -181,28 +165,29 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
 	if (unlikely(!evtchnl))
 		return -EIO;
 
+	dbuf = kzalloc(sizeof(*dbuf), GFP_KERNEL);
+	if (!dbuf)
+		return -ENOMEM;
+
+	dbuf_add_to_list(front_info, dbuf, dbuf_cookie);
+
 	memset(&buf_cfg, 0, sizeof(buf_cfg));
 	buf_cfg.xb_dev = front_info->xb_dev;
+	buf_cfg.num_pages = DIV_ROUND_UP(size, PAGE_SIZE);
 	buf_cfg.pages = pages;
-	buf_cfg.size = size;
+	buf_cfg.pgdir = &dbuf->shbuf;
 	buf_cfg.be_alloc = front_info->cfg.be_alloc;
 
-	shbuf = xen_drm_front_shbuf_alloc(&buf_cfg);
-	if (IS_ERR(shbuf))
-		return PTR_ERR(shbuf);
-
-	ret = dbuf_add_to_list(front_info, shbuf, dbuf_cookie);
-	if (ret < 0) {
-		xen_drm_front_shbuf_free(shbuf);
-		return ret;
-	}
+	ret = xen_front_pgdir_shbuf_alloc(&buf_cfg);
+	if (ret < 0)
+		goto fail_shbuf_alloc;
 
 	mutex_lock(&evtchnl->u.req.req_io_lock);
 
 	spin_lock_irqsave(&front_info->io_lock, flags);
 	req = be_prepare_req(evtchnl, XENDISPL_OP_DBUF_CREATE);
 	req->op.dbuf_create.gref_directory =
-			xen_drm_front_shbuf_get_dir_start(shbuf);
+			xen_front_pgdir_shbuf_get_dir_start(&dbuf->shbuf);
 	req->op.dbuf_create.buffer_sz = size;
 	req->op.dbuf_create.dbuf_cookie = dbuf_cookie;
 	req->op.dbuf_create.width = width;
@@ -221,7 +206,7 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
 	if (ret < 0)
 		goto fail;
 
-	ret = xen_drm_front_shbuf_map(shbuf);
+	ret = xen_front_pgdir_shbuf_map(&dbuf->shbuf);
 	if (ret < 0)
 		goto fail;
 
@@ -230,6 +215,7 @@ int xen_drm_front_dbuf_create(struct xen_drm_front_info *front_info,
 
 fail:
 	mutex_unlock(&evtchnl->u.req.req_io_lock);
+fail_shbuf_alloc:
 	dbuf_free(&front_info->dbuf_list, dbuf_cookie);
 	return ret;
 }
@@ -358,7 +344,6 @@ int xen_drm_front_page_flip(struct xen_drm_front_info *front_info,
 	if (unlikely(conn_idx >= front_info->num_evt_pairs))
 		return -EINVAL;
 
-	dbuf_flush_fb(&front_info->dbuf_list, fb_cookie);
 	evtchnl = &front_info->evt_pairs[conn_idx].req;
 
 	mutex_lock(&evtchnl->u.req.req_io_lock);
diff --git a/drivers/gpu/drm/xen/xen_drm_front_gem.c b/drivers/gpu/drm/xen/xen_drm_front_gem.c
index 47ff019d3aef..28bc501af450 100644
--- a/drivers/gpu/drm/xen/xen_drm_front_gem.c
+++ b/drivers/gpu/drm/xen/xen_drm_front_gem.c
@@ -22,7 +22,6 @@
 #include <xen/balloon.h>
 
 #include "xen_drm_front.h"
-#include "xen_drm_front_shbuf.h"
 
 struct xen_gem_object {
 	struct drm_gem_object base;
diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c b/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
deleted file mode 100644
index d333b67cc1a0..000000000000
--- a/drivers/gpu/drm/xen/xen_drm_front_shbuf.c
+++ /dev/null
@@ -1,414 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 OR MIT
-
-/*
- *  Xen para-virtual DRM device
- *
- * Copyright (C) 2016-2018 EPAM Systems Inc.
- *
- * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
- */
-
-#include <drm/drmP.h>
-
-#if defined(CONFIG_X86)
-#include <drm/drm_cache.h>
-#endif
-#include <linux/errno.h>
-#include <linux/mm.h>
-
-#include <asm/xen/hypervisor.h>
-#include <xen/balloon.h>
-#include <xen/xen.h>
-#include <xen/xenbus.h>
-#include <xen/interface/io/ring.h>
-#include <xen/interface/io/displif.h>
-
-#include "xen_drm_front.h"
-#include "xen_drm_front_shbuf.h"
-
-struct xen_drm_front_shbuf_ops {
-	/*
-	 * Calculate number of grefs required to handle this buffer,
-	 * e.g. if grefs are required for page directory only or the buffer
-	 * pages as well.
-	 */
-	void (*calc_num_grefs)(struct xen_drm_front_shbuf *buf);
-	/* Fill page directory according to para-virtual display protocol. */
-	void (*fill_page_dir)(struct xen_drm_front_shbuf *buf);
-	/* Claim grant references for the pages of the buffer. */
-	int (*grant_refs_for_buffer)(struct xen_drm_front_shbuf *buf,
-				     grant_ref_t *priv_gref_head, int gref_idx);
-	/* Map grant references of the buffer. */
-	int (*map)(struct xen_drm_front_shbuf *buf);
-	/* Unmap grant references of the buffer. */
-	int (*unmap)(struct xen_drm_front_shbuf *buf);
-};
-
-grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf)
-{
-	if (!buf->grefs)
-		return GRANT_INVALID_REF;
-
-	return buf->grefs[0];
-}
-
-int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf)
-{
-	if (buf->ops->map)
-		return buf->ops->map(buf);
-
-	/* no need to map own grant references */
-	return 0;
-}
-
-int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf)
-{
-	if (buf->ops->unmap)
-		return buf->ops->unmap(buf);
-
-	/* no need to unmap own grant references */
-	return 0;
-}
-
-void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf)
-{
-#if defined(CONFIG_X86)
-	drm_clflush_pages(buf->pages, buf->num_pages);
-#endif
-}
-
-void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf)
-{
-	if (buf->grefs) {
-		int i;
-
-		for (i = 0; i < buf->num_grefs; i++)
-			if (buf->grefs[i] != GRANT_INVALID_REF)
-				gnttab_end_foreign_access(buf->grefs[i],
-							  0, 0UL);
-	}
-	kfree(buf->grefs);
-	kfree(buf->directory);
-	kfree(buf);
-}
-
-/*
- * number of grefs a page can hold with respect to the
- * struct xendispl_page_directory header
- */
-#define XEN_DRM_NUM_GREFS_PER_PAGE ((PAGE_SIZE - \
-		offsetof(struct xendispl_page_directory, gref)) / \
-		sizeof(grant_ref_t))
-
-static int get_num_pages_dir(struct xen_drm_front_shbuf *buf)
-{
-	/* number of pages the page directory consumes itself */
-	return DIV_ROUND_UP(buf->num_pages, XEN_DRM_NUM_GREFS_PER_PAGE);
-}
-
-static void backend_calc_num_grefs(struct xen_drm_front_shbuf *buf)
-{
-	/* only for pages the page directory consumes itself */
-	buf->num_grefs = get_num_pages_dir(buf);
-}
-
-static void guest_calc_num_grefs(struct xen_drm_front_shbuf *buf)
-{
-	/*
-	 * number of pages the page directory consumes itself
-	 * plus grefs for the buffer pages
-	 */
-	buf->num_grefs = get_num_pages_dir(buf) + buf->num_pages;
-}
-
-#define xen_page_to_vaddr(page) \
-		((uintptr_t)pfn_to_kaddr(page_to_xen_pfn(page)))
-
-static int backend_unmap(struct xen_drm_front_shbuf *buf)
-{
-	struct gnttab_unmap_grant_ref *unmap_ops;
-	int i, ret;
-
-	if (!buf->pages || !buf->backend_map_handles || !buf->grefs)
-		return 0;
-
-	unmap_ops = kcalloc(buf->num_pages, sizeof(*unmap_ops),
-			    GFP_KERNEL);
-	if (!unmap_ops) {
-		DRM_ERROR("Failed to get memory while unmapping\n");
-		return -ENOMEM;
-	}
-
-	for (i = 0; i < buf->num_pages; i++) {
-		phys_addr_t addr;
-
-		addr = xen_page_to_vaddr(buf->pages[i]);
-		gnttab_set_unmap_op(&unmap_ops[i], addr, GNTMAP_host_map,
-				    buf->backend_map_handles[i]);
-	}
-
-	ret = gnttab_unmap_refs(unmap_ops, NULL, buf->pages,
-				buf->num_pages);
-
-	for (i = 0; i < buf->num_pages; i++) {
-		if (unlikely(unmap_ops[i].status != GNTST_okay))
-			DRM_ERROR("Failed to unmap page %d: %d\n",
-				  i, unmap_ops[i].status);
-	}
-
-	if (ret)
-		DRM_ERROR("Failed to unmap grant references, ret %d", ret);
-
-	kfree(unmap_ops);
-	kfree(buf->backend_map_handles);
-	buf->backend_map_handles = NULL;
-	return ret;
-}
-
-static int backend_map(struct xen_drm_front_shbuf *buf)
-{
-	struct gnttab_map_grant_ref *map_ops = NULL;
-	unsigned char *ptr;
-	int ret, cur_gref, cur_dir_page, cur_page, grefs_left;
-
-	map_ops = kcalloc(buf->num_pages, sizeof(*map_ops), GFP_KERNEL);
-	if (!map_ops)
-		return -ENOMEM;
-
-	buf->backend_map_handles = kcalloc(buf->num_pages,
-					   sizeof(*buf->backend_map_handles),
-					   GFP_KERNEL);
-	if (!buf->backend_map_handles) {
-		kfree(map_ops);
-		return -ENOMEM;
-	}
-
-	/*
-	 * read page directory to get grefs from the backend: for external
-	 * buffer we only allocate buf->grefs for the page directory,
-	 * so buf->num_grefs has number of pages in the page directory itself
-	 */
-	ptr = buf->directory;
-	grefs_left = buf->num_pages;
-	cur_page = 0;
-	for (cur_dir_page = 0; cur_dir_page < buf->num_grefs; cur_dir_page++) {
-		struct xendispl_page_directory *page_dir =
-				(struct xendispl_page_directory *)ptr;
-		int to_copy = XEN_DRM_NUM_GREFS_PER_PAGE;
-
-		if (to_copy > grefs_left)
-			to_copy = grefs_left;
-
-		for (cur_gref = 0; cur_gref < to_copy; cur_gref++) {
-			phys_addr_t addr;
-
-			addr = xen_page_to_vaddr(buf->pages[cur_page]);
-			gnttab_set_map_op(&map_ops[cur_page], addr,
-					  GNTMAP_host_map,
-					  page_dir->gref[cur_gref],
-					  buf->xb_dev->otherend_id);
-			cur_page++;
-		}
-
-		grefs_left -= to_copy;
-		ptr += PAGE_SIZE;
-	}
-	ret = gnttab_map_refs(map_ops, NULL, buf->pages, buf->num_pages);
-
-	/* save handles even if error, so we can unmap */
-	for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
-		buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
-		if (unlikely(map_ops[cur_page].status != GNTST_okay))
-			DRM_ERROR("Failed to map page %d: %d\n",
-				  cur_page, map_ops[cur_page].status);
-	}
-
-	if (ret) {
-		DRM_ERROR("Failed to map grant references, ret %d", ret);
-		backend_unmap(buf);
-	}
-
-	kfree(map_ops);
-	return ret;
-}
-
-static void backend_fill_page_dir(struct xen_drm_front_shbuf *buf)
-{
-	struct xendispl_page_directory *page_dir;
-	unsigned char *ptr;
-	int i, num_pages_dir;
-
-	ptr = buf->directory;
-	num_pages_dir = get_num_pages_dir(buf);
-
-	/* fill only grefs for the page directory itself */
-	for (i = 0; i < num_pages_dir - 1; i++) {
-		page_dir = (struct xendispl_page_directory *)ptr;
-
-		page_dir->gref_dir_next_page = buf->grefs[i + 1];
-		ptr += PAGE_SIZE;
-	}
-	/* last page must say there is no more pages */
-	page_dir = (struct xendispl_page_directory *)ptr;
-	page_dir->gref_dir_next_page = GRANT_INVALID_REF;
-}
-
-static void guest_fill_page_dir(struct xen_drm_front_shbuf *buf)
-{
-	unsigned char *ptr;
-	int cur_gref, grefs_left, to_copy, i, num_pages_dir;
-
-	ptr = buf->directory;
-	num_pages_dir = get_num_pages_dir(buf);
-
-	/*
-	 * while copying, skip grefs at start, they are for pages
-	 * granted for the page directory itself
-	 */
-	cur_gref = num_pages_dir;
-	grefs_left = buf->num_pages;
-	for (i = 0; i < num_pages_dir; i++) {
-		struct xendispl_page_directory *page_dir =
-				(struct xendispl_page_directory *)ptr;
-
-		if (grefs_left <= XEN_DRM_NUM_GREFS_PER_PAGE) {
-			to_copy = grefs_left;
-			page_dir->gref_dir_next_page = GRANT_INVALID_REF;
-		} else {
-			to_copy = XEN_DRM_NUM_GREFS_PER_PAGE;
-			page_dir->gref_dir_next_page = buf->grefs[i + 1];
-		}
-		memcpy(&page_dir->gref, &buf->grefs[cur_gref],
-		       to_copy * sizeof(grant_ref_t));
-		ptr += PAGE_SIZE;
-		grefs_left -= to_copy;
-		cur_gref += to_copy;
-	}
-}
-
-static int guest_grant_refs_for_buffer(struct xen_drm_front_shbuf *buf,
-				       grant_ref_t *priv_gref_head,
-				       int gref_idx)
-{
-	int i, cur_ref, otherend_id;
-
-	otherend_id = buf->xb_dev->otherend_id;
-	for (i = 0; i < buf->num_pages; i++) {
-		cur_ref = gnttab_claim_grant_reference(priv_gref_head);
-		if (cur_ref < 0)
-			return cur_ref;
-
-		gnttab_grant_foreign_access_ref(cur_ref, otherend_id,
-						xen_page_to_gfn(buf->pages[i]),
-						0);
-		buf->grefs[gref_idx++] = cur_ref;
-	}
-	return 0;
-}
-
-static int grant_references(struct xen_drm_front_shbuf *buf)
-{
-	grant_ref_t priv_gref_head;
-	int ret, i, j, cur_ref;
-	int otherend_id, num_pages_dir;
-
-	ret = gnttab_alloc_grant_references(buf->num_grefs, &priv_gref_head);
-	if (ret < 0) {
-		DRM_ERROR("Cannot allocate grant references\n");
-		return ret;
-	}
-
-	otherend_id = buf->xb_dev->otherend_id;
-	j = 0;
-	num_pages_dir = get_num_pages_dir(buf);
-	for (i = 0; i < num_pages_dir; i++) {
-		unsigned long frame;
-
-		cur_ref = gnttab_claim_grant_reference(&priv_gref_head);
-		if (cur_ref < 0)
-			return cur_ref;
-
-		frame = xen_page_to_gfn(virt_to_page(buf->directory +
-					PAGE_SIZE * i));
-		gnttab_grant_foreign_access_ref(cur_ref, otherend_id, frame, 0);
-		buf->grefs[j++] = cur_ref;
-	}
-
-	if (buf->ops->grant_refs_for_buffer) {
-		ret = buf->ops->grant_refs_for_buffer(buf, &priv_gref_head, j);
-		if (ret)
-			return ret;
-	}
-
-	gnttab_free_grant_references(priv_gref_head);
-	return 0;
-}
-
-static int alloc_storage(struct xen_drm_front_shbuf *buf)
-{
-	buf->grefs = kcalloc(buf->num_grefs, sizeof(*buf->grefs), GFP_KERNEL);
-	if (!buf->grefs)
-		return -ENOMEM;
-
-	buf->directory = kcalloc(get_num_pages_dir(buf), PAGE_SIZE, GFP_KERNEL);
-	if (!buf->directory)
-		return -ENOMEM;
-
-	return 0;
-}
-
-/*
- * For be allocated buffers we don't need grant_refs_for_buffer as those
- * grant references are allocated at backend side
- */
-static const struct xen_drm_front_shbuf_ops backend_ops = {
-	.calc_num_grefs = backend_calc_num_grefs,
-	.fill_page_dir = backend_fill_page_dir,
-	.map = backend_map,
-	.unmap = backend_unmap
-};
-
-/* For locally granted references we do not need to map/unmap the references */
-static const struct xen_drm_front_shbuf_ops local_ops = {
-	.calc_num_grefs = guest_calc_num_grefs,
-	.fill_page_dir = guest_fill_page_dir,
-	.grant_refs_for_buffer = guest_grant_refs_for_buffer,
-};
-
-struct xen_drm_front_shbuf *
-xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg)
-{
-	struct xen_drm_front_shbuf *buf;
-	int ret;
-
-	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
-	if (!buf)
-		return ERR_PTR(-ENOMEM);
-
-	if (cfg->be_alloc)
-		buf->ops = &backend_ops;
-	else
-		buf->ops = &local_ops;
-
-	buf->xb_dev = cfg->xb_dev;
-	buf->num_pages = DIV_ROUND_UP(cfg->size, PAGE_SIZE);
-	buf->pages = cfg->pages;
-
-	buf->ops->calc_num_grefs(buf);
-
-	ret = alloc_storage(buf);
-	if (ret)
-		goto fail;
-
-	ret = grant_references(buf);
-	if (ret)
-		goto fail;
-
-	buf->ops->fill_page_dir(buf);
-
-	return buf;
-
-fail:
-	xen_drm_front_shbuf_free(buf);
-	return ERR_PTR(ret);
-}
diff --git a/drivers/gpu/drm/xen/xen_drm_front_shbuf.h b/drivers/gpu/drm/xen/xen_drm_front_shbuf.h
deleted file mode 100644
index 7545c692539e..000000000000
--- a/drivers/gpu/drm/xen/xen_drm_front_shbuf.h
+++ /dev/null
@@ -1,64 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 OR MIT */
-
-/*
- *  Xen para-virtual DRM device
- *
- * Copyright (C) 2016-2018 EPAM Systems Inc.
- *
- * Author: Oleksandr Andrushchenko <oleksandr_andrushchenko@epam.com>
- */
-
-#ifndef __XEN_DRM_FRONT_SHBUF_H_
-#define __XEN_DRM_FRONT_SHBUF_H_
-
-#include <linux/kernel.h>
-#include <linux/scatterlist.h>
-
-#include <xen/grant_table.h>
-
-struct xen_drm_front_shbuf {
-	/*
-	 * number of references granted for the backend use:
-	 *  - for allocated/imported dma-buf's this holds number of grant
-	 *    references for the page directory and pages of the buffer
-	 *  - for the buffer provided by the backend this holds number of
-	 *    grant references for the page directory as grant references for
-	 *    the buffer will be provided by the backend
-	 */
-	int num_grefs;
-	grant_ref_t *grefs;
-	unsigned char *directory;
-
-	int num_pages;
-	struct page **pages;
-
-	struct xenbus_device *xb_dev;
-
-	/* these are the ops used internally depending on be_alloc mode */
-	const struct xen_drm_front_shbuf_ops *ops;
-
-	/* Xen map handles for the buffer allocated by the backend */
-	grant_handle_t *backend_map_handles;
-};
-
-struct xen_drm_front_shbuf_cfg {
-	struct xenbus_device *xb_dev;
-	size_t size;
-	struct page **pages;
-	bool be_alloc;
-};
-
-struct xen_drm_front_shbuf *
-xen_drm_front_shbuf_alloc(struct xen_drm_front_shbuf_cfg *cfg);
-
-grant_ref_t xen_drm_front_shbuf_get_dir_start(struct xen_drm_front_shbuf *buf);
-
-int xen_drm_front_shbuf_map(struct xen_drm_front_shbuf *buf);
-
-int xen_drm_front_shbuf_unmap(struct xen_drm_front_shbuf *buf);
-
-void xen_drm_front_shbuf_flush(struct xen_drm_front_shbuf *buf);
-
-void xen_drm_front_shbuf_free(struct xen_drm_front_shbuf *buf);
-
-#endif /* __XEN_DRM_FRONT_SHBUF_H_ */