102 files changed, 5136 insertions, 2949 deletions
diff --git a/drivers/gpu/drm/i915/Kconfig.debug b/drivers/gpu/drm/i915/Kconfig.debug
index fa36491495b1..108d21f34777 100644
--- a/drivers/gpu/drm/i915/Kconfig.debug
+++ b/drivers/gpu/drm/i915/Kconfig.debug
@@ -29,7 +29,6 @@ config DRM_I915_DEBUG
 	select SW_SYNC # signaling validation framework (igt/syncobj*)
 	select DRM_I915_SW_FENCE_DEBUG_OBJECTS
 	select DRM_I915_SELFTEST
-	select DRM_I915_TRACE_GEM
         default n
         help
           Choose this option to turn on extra driver debugging that may affect
@@ -53,6 +52,7 @@ config DRM_I915_DEBUG_GEM
 
 config DRM_I915_TRACE_GEM
 	bool "Insert extra ftrace output from the GEM internals"
+	depends on DRM_I915_DEBUG_GEM
 	select TRACING
 	default n
 	help
diff --git a/drivers/gpu/drm/i915/gvt/Makefile b/drivers/gpu/drm/i915/gvt/Makefile
index 18e1c172e792..347116faa558 100644
--- a/drivers/gpu/drm/i915/gvt/Makefile
+++ b/drivers/gpu/drm/i915/gvt/Makefile
@@ -2,7 +2,8 @@
 GVT_DIR := gvt
 GVT_SOURCE := gvt.o aperture_gm.o handlers.o vgpu.o trace_points.o firmware.o \
 	interrupt.o gtt.o cfg_space.o opregion.o mmio.o display.o edid.o \
-	execlist.o scheduler.o sched_policy.o render.o cmd_parser.o debugfs.o
+	execlist.o scheduler.o sched_policy.o mmio_context.o cmd_parser.o debugfs.o \
+	fb_decoder.o dmabuf.o
 
 ccflags-y				+= -I$(src) -I$(src)/$(GVT_DIR)
 i915-y					+= $(addprefix $(GVT_DIR)/, $(GVT_SOURCE))
diff --git a/drivers/gpu/drm/i915/gvt/cfg_space.c b/drivers/gpu/drm/i915/gvt/cfg_space.c
index 4ce2e6bd0680..97bfc00d2a82 100644
--- a/drivers/gpu/drm/i915/gvt/cfg_space.c
+++ b/drivers/gpu/drm/i915/gvt/cfg_space.c
@@ -335,7 +335,8 @@ int intel_vgpu_emulate_cfg_write(struct intel_vgpu *vgpu, unsigned int offset,
 	case INTEL_GVT_PCI_OPREGION:
 		if (WARN_ON(!IS_ALIGNED(offset, 4)))
 			return -EINVAL;
-		ret = intel_vgpu_init_opregion(vgpu, *(u32 *)p_data);
+		ret = intel_vgpu_opregion_base_write_handler(vgpu,
+						   *(u32 *)p_data);
 		if (ret)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/gvt/cmd_parser.c b/drivers/gpu/drm/i915/gvt/cmd_parser.c
index 18c45734c7a2..edec15d19538 100644
--- a/drivers/gpu/drm/i915/gvt/cmd_parser.c
+++ b/drivers/gpu/drm/i915/gvt/cmd_parser.c
@@ -825,6 +825,21 @@ static int force_nonpriv_reg_handler(struct parser_exec_state *s,
 	return 0;
 }
 
+static inline bool is_mocs_mmio(unsigned int offset)
+{
+	return ((offset >= 0xc800) && (offset <= 0xcff8)) ||
+		((offset >= 0xb020) && (offset <= 0xb0a0));
+}
+
+static int mocs_cmd_reg_handler(struct parser_exec_state *s,
+				unsigned int offset, unsigned int index)
+{
+	if (!is_mocs_mmio(offset))
+		return -EINVAL;
+	vgpu_vreg(s->vgpu, offset) = cmd_val(s, index + 1);
+	return 0;
+}
+
 static int cmd_reg_handler(struct parser_exec_state *s,
 	unsigned int offset, unsigned int index, char *cmd)
 {
@@ -848,6 +863,10 @@ static int cmd_reg_handler(struct parser_exec_state *s,
 		return 0;
 	}
 
+	if (is_mocs_mmio(offset) &&
+	    mocs_cmd_reg_handler(s, offset, index))
+		return -EINVAL;
+
 	if (is_force_nonpriv_mmio(offset) &&
 		force_nonpriv_reg_handler(s, offset, index))
 		return -EPERM;
@@ -1220,13 +1239,13 @@ static int gen8_check_mi_display_flip(struct parser_exec_state *s,
 		return 0;
 
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		stride = vgpu_vreg(s->vgpu, info->stride_reg) & GENMASK(9, 0);
-		tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) &
+		stride = vgpu_vreg_t(s->vgpu, info->stride_reg) & GENMASK(9, 0);
+		tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) &
 				GENMASK(12, 10)) >> 10;
 	} else {
-		stride = (vgpu_vreg(s->vgpu, info->stride_reg) &
+		stride = (vgpu_vreg_t(s->vgpu, info->stride_reg) &
 				GENMASK(15, 6)) >> 6;
-		tile = (vgpu_vreg(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
+		tile = (vgpu_vreg_t(s->vgpu, info->ctrl_reg) & (1 << 10)) >> 10;
 	}
 
 	if (stride != info->stride_val)
@@ -1245,21 +1264,21 @@ static int gen8_update_plane_mmio_from_mi_display_flip(
 	struct drm_i915_private *dev_priv = s->vgpu->gvt->dev_priv;
 	struct intel_vgpu *vgpu = s->vgpu;
 
-	set_mask_bits(&vgpu_vreg(vgpu, info->surf_reg), GENMASK(31, 12),
+	set_mask_bits(&vgpu_vreg_t(vgpu, info->surf_reg), GENMASK(31, 12),
 		      info->surf_val << 12);
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(9, 0),
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(9, 0),
 			      info->stride_val);
-		set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(12, 10),
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(12, 10),
 			      info->tile_val << 10);
 	} else {
-		set_mask_bits(&vgpu_vreg(vgpu, info->stride_reg), GENMASK(15, 6),
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->stride_reg), GENMASK(15, 6),
 			      info->stride_val << 6);
-		set_mask_bits(&vgpu_vreg(vgpu, info->ctrl_reg), GENMASK(10, 10),
+		set_mask_bits(&vgpu_vreg_t(vgpu, info->ctrl_reg), GENMASK(10, 10),
 			      info->tile_val << 10);
 	}
 
-	vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
+	vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(info->pipe))++;
 	intel_vgpu_trigger_virtual_event(vgpu, info->event);
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/gvt/display.c b/drivers/gpu/drm/i915/gvt/display.c
index 355120865efd..dd96ffc878ac 100644
--- a/drivers/gpu/drm/i915/gvt/display.c
+++ b/drivers/gpu/drm/i915/gvt/display.c
@@ -59,7 +59,7 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
-	if (!(vgpu_vreg(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE))
+	if (!(vgpu_vreg_t(vgpu, PIPECONF(_PIPE_EDP)) & PIPECONF_ENABLE))
 		return 0;
 
 	if (!(vgpu_vreg(vgpu, _TRANS_DDI_FUNC_CTL_EDP) & TRANS_DDI_FUNC_ENABLE))
@@ -67,14 +67,14 @@ static int edp_pipe_is_enabled(struct intel_vgpu *vgpu)
 	return 1;
 }
 
-static int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
+int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 
 	if (WARN_ON(pipe < PIPE_A || pipe >= I915_MAX_PIPES))
 		return -EINVAL;
 
-	if (vgpu_vreg(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE)
+	if (vgpu_vreg_t(vgpu, PIPECONF(pipe)) & PIPECONF_ENABLE)
 		return 1;
 
 	if (edp_pipe_is_enabled(vgpu) &&
@@ -169,103 +169,105 @@ static u8 dpcd_fix_data[DPCD_HEADER_SIZE] = {
 static void emulate_monitor_status_change(struct intel_vgpu *vgpu)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
+	vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTB_HOTPLUG_CPT |
 			SDE_PORTC_HOTPLUG_CPT |
 			SDE_PORTD_HOTPLUG_CPT);
 
 	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		vgpu_vreg(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
+		vgpu_vreg_t(vgpu, SDEISR) &= ~(SDE_PORTA_HOTPLUG_SPT |
 				SDE_PORTE_HOTPLUG_SPT);
-		vgpu_vreg(vgpu, SKL_FUSE_STATUS) |=
+		vgpu_vreg_t(vgpu, SKL_FUSE_STATUS) |=
 				SKL_FUSE_DOWNLOAD_STATUS |
 				SKL_FUSE_PG_DIST_STATUS(SKL_PG0) |
 				SKL_FUSE_PG_DIST_STATUS(SKL_PG1) |
 				SKL_FUSE_PG_DIST_STATUS(SKL_PG2);
-		vgpu_vreg(vgpu, LCPLL1_CTL) |=
+		vgpu_vreg_t(vgpu, LCPLL1_CTL) |=
 				LCPLL_PLL_ENABLE |
 				LCPLL_PLL_LOCK;
-		vgpu_vreg(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
+		vgpu_vreg_t(vgpu, LCPLL2_CTL) |= LCPLL_PLL_ENABLE;
 
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_B)) {
-		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIB_DETECTED;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_B << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) &=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) &=
 				~PORT_CLK_SEL_MASK;
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_B)) |=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_B)) |=
 				PORT_CLK_SEL_LCPLL_810;
 		}
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_B)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTB_HOTPLUG_CPT;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_C)) {
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTC_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_C << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) &=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) &=
 				~PORT_CLK_SEL_MASK;
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_C)) |=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_C)) |=
 				PORT_CLK_SEL_LCPLL_810;
 		}
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
-		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_C)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDIC_DETECTED;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_D)) {
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTD_HOTPLUG_CPT;
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) &=
 			~(TRANS_DDI_BPC_MASK | TRANS_DDI_MODE_SELECT_MASK |
 			TRANS_DDI_PORT_MASK);
-		vgpu_vreg(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
+		vgpu_vreg_t(vgpu, TRANS_DDI_FUNC_CTL(TRANSCODER_A)) |=
 			(TRANS_DDI_BPC_8 | TRANS_DDI_MODE_SELECT_DP_SST |
 			(PORT_D << TRANS_DDI_PORT_SHIFT) |
 			TRANS_DDI_FUNC_ENABLE);
 		if (IS_BROADWELL(dev_priv)) {
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) &=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) &=
 				~PORT_CLK_SEL_MASK;
-			vgpu_vreg(vgpu, PORT_CLK_SEL(PORT_D)) |=
+			vgpu_vreg_t(vgpu, PORT_CLK_SEL(PORT_D)) |=
 				PORT_CLK_SEL_LCPLL_810;
 		}
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
-		vgpu_vreg(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) |= DDI_BUF_CTL_ENABLE;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_D)) &= ~DDI_BUF_IS_IDLE;
+		vgpu_vreg_t(vgpu, SFUSE_STRAP) |= SFUSE_STRAP_DDID_DETECTED;
 	}
 
 	if ((IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) &&
 			intel_vgpu_has_monitor_on_port(vgpu, PORT_E)) {
-		vgpu_vreg(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
+		vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTE_HOTPLUG_SPT;
 	}
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, PORT_A)) {
 		if (IS_BROADWELL(dev_priv))
-			vgpu_vreg(vgpu, GEN8_DE_PORT_ISR) |=
+			vgpu_vreg_t(vgpu, GEN8_DE_PORT_ISR) |=
 				GEN8_PORT_DP_A_HOTPLUG;
 		else
-			vgpu_vreg(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
+			vgpu_vreg_t(vgpu, SDEISR) |= SDE_PORTA_HOTPLUG_SPT;
 
-		vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
+		vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_A)) |= DDI_INIT_DISPLAY_DETECTED;
 	}
 
 	/* Clear host CRT status, so guest couldn't detect this host CRT. */
 	if (IS_BROADWELL(dev_priv))
-		vgpu_vreg(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+		vgpu_vreg_t(vgpu, PCH_ADPA) &= ~ADPA_CRT_HOTPLUG_MONITOR_MASK;
+
+	vgpu_vreg_t(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
 }
 
 static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
@@ -282,7 +284,6 @@ static void clean_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num)
 static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 				    int type, unsigned int resolution)
 {
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 	struct intel_vgpu_port *port = intel_vgpu_port(vgpu, port_num);
 
 	if (WARN_ON(resolution >= GVT_EDID_NUM))
@@ -308,7 +309,7 @@ static int setup_virtual_dp_monitor(struct intel_vgpu *vgpu, int port_num,
 	port->type = type;
 
 	emulate_monitor_status_change(vgpu);
-	vgpu_vreg(vgpu, PIPECONF(PIPE_A)) |= PIPECONF_ENABLE;
+
 	return 0;
 }
 
@@ -368,12 +369,12 @@ static void emulate_vblank_on_pipe(struct intel_vgpu *vgpu, int pipe)
 		if (!pipe_is_enabled(vgpu, pipe))
 			continue;
 
-		vgpu_vreg(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
+		vgpu_vreg_t(vgpu, PIPE_FLIPCOUNT_G4X(pipe))++;
 		intel_vgpu_trigger_virtual_event(vgpu, event);
 	}
 
 	if (pipe_is_enabled(vgpu, pipe)) {
-		vgpu_vreg(vgpu, PIPE_FRMCOUNT_G4X(pipe))++;
+		vgpu_vreg_t(vgpu, PIPE_FRMCOUNT_G4X(pipe))++;
 		intel_vgpu_trigger_virtual_event(vgpu, vblank_event[pipe]);
 	}
 }
diff --git a/drivers/gpu/drm/i915/gvt/display.h b/drivers/gpu/drm/i915/gvt/display.h
index d73de22102e2..b46b86892d58 100644
--- a/drivers/gpu/drm/i915/gvt/display.h
+++ b/drivers/gpu/drm/i915/gvt/display.h
@@ -179,4 +179,6 @@ int intel_vgpu_init_display(struct intel_vgpu *vgpu, u64 resolution);
 void intel_vgpu_reset_display(struct intel_vgpu *vgpu);
 void intel_vgpu_clean_display(struct intel_vgpu *vgpu);
 
+int pipe_is_enabled(struct intel_vgpu *vgpu, int pipe);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.c b/drivers/gpu/drm/i915/gvt/dmabuf.c
new file mode 100644
index 000000000000..2ab584f97dfb
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.c
@@ -0,0 +1,537 @@
+/*
+ * Copyright 2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Xiaoguang Chen
+ *    Tina Zhang <tina.zhang@intel.com>
+ */
+
+#include <linux/dma-buf.h>
+#include <drm/drmP.h>
+#include <linux/vfio.h>
+
+#include "i915_drv.h"
+#include "gvt.h"
+
+#define GEN8_DECODE_PTE(pte) (pte & GENMASK_ULL(63, 12))
+
+static int vgpu_gem_get_pages(
+		struct drm_i915_gem_object *obj)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct sg_table *st;
+	struct scatterlist *sg;
+	int i, ret;
+	gen8_pte_t __iomem *gtt_entries;
+	struct intel_vgpu_fb_info *fb_info;
+
+	fb_info = (struct intel_vgpu_fb_info *)obj->gvt_info;
+	if (WARN_ON(!fb_info))
+		return -ENODEV;
+
+	st = kmalloc(sizeof(*st), GFP_KERNEL);
+	if (unlikely(!st))
+		return -ENOMEM;
+
+	ret = sg_alloc_table(st, fb_info->size, GFP_KERNEL);
+	if (ret) {
+		kfree(st);
+		return ret;
+	}
+	gtt_entries = (gen8_pte_t __iomem *)dev_priv->ggtt.gsm +
+		(fb_info->start >> PAGE_SHIFT);
+	for_each_sg(st->sgl, sg, fb_info->size, i) {
+		sg->offset = 0;
+		sg->length = PAGE_SIZE;
+		sg_dma_address(sg) =
+			GEN8_DECODE_PTE(readq(&gtt_entries[i]));
+		sg_dma_len(sg) = PAGE_SIZE;
+	}
+
+	__i915_gem_object_set_pages(obj, st, PAGE_SIZE);
+
+	return 0;
+}
+
+static void vgpu_gem_put_pages(struct drm_i915_gem_object *obj,
+		struct sg_table *pages)
+{
+	sg_free_table(pages);
+	kfree(pages);
+}
+
+static void dmabuf_gem_object_free(struct kref *kref)
+{
+	struct intel_vgpu_dmabuf_obj *obj =
+		container_of(kref, struct intel_vgpu_dmabuf_obj, kref);
+	struct intel_vgpu *vgpu = obj->vgpu;
+	struct list_head *pos;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+
+	if (vgpu && vgpu->active && !list_empty(&vgpu->dmabuf_obj_list_head)) {
+		list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+			dmabuf_obj = container_of(pos,
+					struct intel_vgpu_dmabuf_obj, list);
+			if (dmabuf_obj == obj) {
+				intel_gvt_hypervisor_put_vfio_device(vgpu);
+				idr_remove(&vgpu->object_idr,
+					   dmabuf_obj->dmabuf_id);
+				kfree(dmabuf_obj->info);
+				kfree(dmabuf_obj);
+				list_del(pos);
+				break;
+			}
+		}
+	} else {
+		/* Free the orphan dmabuf_objs here */
+		kfree(obj->info);
+		kfree(obj);
+	}
+}
+
+
+static inline void dmabuf_obj_get(struct intel_vgpu_dmabuf_obj *obj)
+{
+	kref_get(&obj->kref);
+}
+
+static inline void dmabuf_obj_put(struct intel_vgpu_dmabuf_obj *obj)
+{
+	kref_put(&obj->kref, dmabuf_gem_object_free);
+}
+
+static void vgpu_gem_release(struct drm_i915_gem_object *gem_obj)
+{
+
+	struct intel_vgpu_fb_info *fb_info = gem_obj->gvt_info;
+	struct intel_vgpu_dmabuf_obj *obj = fb_info->obj;
+	struct intel_vgpu *vgpu = obj->vgpu;
+
+	if (vgpu) {
+		mutex_lock(&vgpu->dmabuf_lock);
+		gem_obj->base.dma_buf = NULL;
+		dmabuf_obj_put(obj);
+		mutex_unlock(&vgpu->dmabuf_lock);
+	} else {
+		/* vgpu is NULL, as it has been removed already */
+		gem_obj->base.dma_buf = NULL;
+		dmabuf_obj_put(obj);
+	}
+}
+
+static const struct drm_i915_gem_object_ops intel_vgpu_gem_ops = {
+	.flags = I915_GEM_OBJECT_IS_PROXY,
+	.get_pages = vgpu_gem_get_pages,
+	.put_pages = vgpu_gem_put_pages,
+	.release = vgpu_gem_release,
+};
+
+static struct drm_i915_gem_object *vgpu_create_gem(struct drm_device *dev,
+		struct intel_vgpu_fb_info *info)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct drm_i915_gem_object *obj;
+
+	obj = i915_gem_object_alloc(dev_priv);
+	if (obj == NULL)
+		return NULL;
+
+	drm_gem_private_object_init(dev, &obj->base,
+		info->size << PAGE_SHIFT);
+	i915_gem_object_init(obj, &intel_vgpu_gem_ops);
+
+	obj->base.read_domains = I915_GEM_DOMAIN_GTT;
+	obj->base.write_domain = 0;
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		unsigned int tiling_mode = 0;
+		unsigned int stride = 0;
+
+		switch (info->drm_format_mod << 10) {
+		case PLANE_CTL_TILED_LINEAR:
+			tiling_mode = I915_TILING_NONE;
+			break;
+		case PLANE_CTL_TILED_X:
+			tiling_mode = I915_TILING_X;
+			stride = info->stride;
+			break;
+		case PLANE_CTL_TILED_Y:
+			tiling_mode = I915_TILING_Y;
+			stride = info->stride;
+			break;
+		default:
+			gvt_dbg_core("not supported tiling mode\n");
+		}
+		obj->tiling_and_stride = tiling_mode | stride;
+	} else {
+		obj->tiling_and_stride = info->drm_format_mod ?
+					I915_TILING_X : 0;
+	}
+
+	return obj;
+}
+
+static int vgpu_get_plane_info(struct drm_device *dev,
+		struct intel_vgpu *vgpu,
+		struct intel_vgpu_fb_info *info,
+		int plane_id)
+{
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct intel_vgpu_primary_plane_format p;
+	struct intel_vgpu_cursor_plane_format c;
+	int ret;
+
+	if (plane_id == DRM_PLANE_TYPE_PRIMARY) {
+		ret = intel_vgpu_decode_primary_plane(vgpu, &p);
+		if (ret)
+			return ret;
+		info->start = p.base;
+		info->start_gpa = p.base_gpa;
+		info->width = p.width;
+		info->height = p.height;
+		info->stride = p.stride;
+		info->drm_format = p.drm_format;
+		info->drm_format_mod = p.tiled;
+		info->size = (((p.stride * p.height * p.bpp) / 8) +
+				(PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	} else if (plane_id == DRM_PLANE_TYPE_CURSOR) {
+		ret = intel_vgpu_decode_cursor_plane(vgpu, &c);
+		if (ret)
+			return ret;
+		info->start = c.base;
+		info->start_gpa = c.base_gpa;
+		info->width = c.width;
+		info->height = c.height;
+		info->stride = c.width * (c.bpp / 8);
+		info->drm_format = c.drm_format;
+		info->drm_format_mod = 0;
+		info->x_pos = c.x_pos;
+		info->y_pos = c.y_pos;
+
+		/* The invalid cursor hotspot value is delivered to host
+		 * until we find a way to get the cursor hotspot info of
+		 * guest OS.
+		 */
+		info->x_hot = UINT_MAX;
+		info->y_hot = UINT_MAX;
+		info->size = (((info->stride * c.height * c.bpp) / 8)
+				+ (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	} else {
+		gvt_vgpu_err("invalid plane id:%d\n", plane_id);
+		return -EINVAL;
+	}
+
+	if (info->size == 0) {
+		gvt_vgpu_err("fb size is zero\n");
+		return -EINVAL;
+	}
+
+	if (info->start & (PAGE_SIZE - 1)) {
+		gvt_vgpu_err("Not aligned fb address:0x%llx\n", info->start);
+		return -EFAULT;
+	}
+	if (((info->start >> PAGE_SHIFT) + info->size) >
+		ggtt_total_entries(&dev_priv->ggtt)) {
+		gvt_vgpu_err("Invalid GTT offset or size\n");
+		return -EFAULT;
+	}
+
+	if (!intel_gvt_ggtt_validate_range(vgpu, info->start, info->size)) {
+		gvt_vgpu_err("invalid gma addr\n");
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static struct intel_vgpu_dmabuf_obj *
+pick_dmabuf_by_info(struct intel_vgpu *vgpu,
+		    struct intel_vgpu_fb_info *latest_info)
+{
+	struct list_head *pos;
+	struct intel_vgpu_fb_info *fb_info;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL;
+	struct intel_vgpu_dmabuf_obj *ret = NULL;
+
+	list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj,
+						list);
+		if ((dmabuf_obj == NULL) ||
+		    (dmabuf_obj->info == NULL))
+			continue;
+
+		fb_info = (struct intel_vgpu_fb_info *)dmabuf_obj->info;
+		if ((fb_info->start == latest_info->start) &&
+		    (fb_info->start_gpa == latest_info->start_gpa) &&
+		    (fb_info->size == latest_info->size) &&
+		    (fb_info->drm_format_mod == latest_info->drm_format_mod) &&
+		    (fb_info->drm_format == latest_info->drm_format) &&
+		    (fb_info->width == latest_info->width) &&
+		    (fb_info->height == latest_info->height)) {
+			ret = dmabuf_obj;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static struct intel_vgpu_dmabuf_obj *
+pick_dmabuf_by_num(struct intel_vgpu *vgpu, u32 id)
+{
+	struct list_head *pos;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj = NULL;
+	struct intel_vgpu_dmabuf_obj *ret = NULL;
+
+	list_for_each(pos, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj,
+						list);
+		if (!dmabuf_obj)
+			continue;
+
+		if (dmabuf_obj->dmabuf_id == id) {
+			ret = dmabuf_obj;
+			break;
+		}
+	}
+
+	return ret;
+}
+
+static void update_fb_info(struct vfio_device_gfx_plane_info *gvt_dmabuf,
+		      struct intel_vgpu_fb_info *fb_info)
+{
+	gvt_dmabuf->drm_format = fb_info->drm_format;
+	gvt_dmabuf->width = fb_info->width;
+	gvt_dmabuf->height = fb_info->height;
+	gvt_dmabuf->stride = fb_info->stride;
+	gvt_dmabuf->size = fb_info->size;
+	gvt_dmabuf->x_pos = fb_info->x_pos;
+	gvt_dmabuf->y_pos = fb_info->y_pos;
+	gvt_dmabuf->x_hot = fb_info->x_hot;
+	gvt_dmabuf->y_hot = fb_info->y_hot;
+}
+
+int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args)
+{
+	struct drm_device *dev = &vgpu->gvt->dev_priv->drm;
+	struct vfio_device_gfx_plane_info *gfx_plane_info = args;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+	struct intel_vgpu_fb_info fb_info;
+	int ret = 0;
+
+	if (gfx_plane_info->flags == (VFIO_GFX_PLANE_TYPE_DMABUF |
+				       VFIO_GFX_PLANE_TYPE_PROBE))
+		return ret;
+	else if ((gfx_plane_info->flags & ~VFIO_GFX_PLANE_TYPE_DMABUF) ||
+			(!gfx_plane_info->flags))
+		return -EINVAL;
+
+	ret = vgpu_get_plane_info(dev, vgpu, &fb_info,
+					gfx_plane_info->drm_plane_type);
+	if (ret != 0)
+		goto out;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+	/* If exists, pick up the exposed dmabuf_obj */
+	dmabuf_obj = pick_dmabuf_by_info(vgpu, &fb_info);
+	if (dmabuf_obj) {
+		update_fb_info(gfx_plane_info, &fb_info);
+		gfx_plane_info->dmabuf_id = dmabuf_obj->dmabuf_id;
+
+		/* This buffer may be released between query_plane ioctl and
+		 * get_dmabuf ioctl. Add the refcount to make sure it won't
+		 * be released between the two ioctls.
+		 */
+		if (!dmabuf_obj->initref) {
+			dmabuf_obj->initref = true;
+			dmabuf_obj_get(dmabuf_obj);
+		}
+		ret = 0;
+		gvt_dbg_dpy("vgpu%d: re-use dmabuf_obj ref %d, id %d\n",
+			    vgpu->id, kref_read(&dmabuf_obj->kref),
+			    gfx_plane_info->dmabuf_id);
+		mutex_unlock(&vgpu->dmabuf_lock);
+		goto out;
+	}
+
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	/* Need to allocate a new one*/
+	dmabuf_obj = kmalloc(sizeof(struct intel_vgpu_dmabuf_obj), GFP_KERNEL);
+	if (unlikely(!dmabuf_obj)) {
+		gvt_vgpu_err("alloc dmabuf_obj failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	dmabuf_obj->info = kmalloc(sizeof(struct intel_vgpu_fb_info),
+				   GFP_KERNEL);
+	if (unlikely(!dmabuf_obj->info)) {
+		gvt_vgpu_err("allocate intel vgpu fb info failed\n");
+		ret = -ENOMEM;
+		goto out_free_dmabuf;
+	}
+	memcpy(dmabuf_obj->info, &fb_info, sizeof(struct intel_vgpu_fb_info));
+
+	((struct intel_vgpu_fb_info *)dmabuf_obj->info)->obj = dmabuf_obj;
+
+	dmabuf_obj->vgpu = vgpu;
+
+	ret = idr_alloc(&vgpu->object_idr, dmabuf_obj, 1, 0, GFP_NOWAIT);
+	if (ret < 0)
+		goto out_free_info;
+	gfx_plane_info->dmabuf_id = ret;
+	dmabuf_obj->dmabuf_id = ret;
+
+	dmabuf_obj->initref = true;
+
+	kref_init(&dmabuf_obj->kref);
+
+	mutex_lock(&vgpu->dmabuf_lock);
+	if (intel_gvt_hypervisor_get_vfio_device(vgpu)) {
+		gvt_vgpu_err("get vfio device failed\n");
+		mutex_unlock(&vgpu->dmabuf_lock);
+		goto out_free_info;
+	}
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	update_fb_info(gfx_plane_info, &fb_info);
+
+	INIT_LIST_HEAD(&dmabuf_obj->list);
+	mutex_lock(&vgpu->dmabuf_lock);
+	list_add_tail(&dmabuf_obj->list, &vgpu->dmabuf_obj_list_head);
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	gvt_dbg_dpy("vgpu%d: %s new dmabuf_obj ref %d, id %d\n", vgpu->id,
+		    __func__, kref_read(&dmabuf_obj->kref), ret);
+
+	return 0;
+
+out_free_info:
+	kfree(dmabuf_obj->info);
+out_free_dmabuf:
+	kfree(dmabuf_obj);
+out:
+	/* ENODEV means plane isn't ready, which might be a normal case. */
+	return (ret == -ENODEV) ? 0 : ret;
+}
+
+/* To associate an exposed dmabuf with the dmabuf_obj */
+int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id)
+{
+	struct drm_device *dev = &vgpu->gvt->dev_priv->drm;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+	struct drm_i915_gem_object *obj;
+	struct dma_buf *dmabuf;
+	int dmabuf_fd;
+	int ret = 0;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+
+	dmabuf_obj = pick_dmabuf_by_num(vgpu, dmabuf_id);
+	if (dmabuf_obj == NULL) {
+		gvt_vgpu_err("invalid dmabuf id:%d\n", dmabuf_id);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	obj = vgpu_create_gem(dev, dmabuf_obj->info);
+	if (obj == NULL) {
+		gvt_vgpu_err("create gvt gem obj failed:%d\n", vgpu->id);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	obj->gvt_info = dmabuf_obj->info;
+
+	dmabuf = i915_gem_prime_export(dev, &obj->base, DRM_CLOEXEC | DRM_RDWR);
+	if (IS_ERR(dmabuf)) {
+		gvt_vgpu_err("export dma-buf failed\n");
+		ret = PTR_ERR(dmabuf);
+		goto out_free_gem;
+	}
+	obj->base.dma_buf = dmabuf;
+
+	i915_gem_object_put(obj);
+
+	ret = dma_buf_fd(dmabuf, DRM_CLOEXEC | DRM_RDWR);
+	if (ret < 0) {
+		gvt_vgpu_err("create dma-buf fd failed ret:%d\n", ret);
+		goto out_free_dmabuf;
+	}
+	dmabuf_fd = ret;
+
+	dmabuf_obj_get(dmabuf_obj);
+
+	if (dmabuf_obj->initref) {
+		dmabuf_obj->initref = false;
+		dmabuf_obj_put(dmabuf_obj);
+	}
+
+	mutex_unlock(&vgpu->dmabuf_lock);
+
+	gvt_dbg_dpy("vgpu%d: dmabuf:%d, dmabuf ref %d, fd:%d\n"
+		    "        file count: %ld, GEM ref: %d\n",
+		    vgpu->id, dmabuf_obj->dmabuf_id,
+		    kref_read(&dmabuf_obj->kref),
+		    dmabuf_fd,
+		    file_count(dmabuf->file),
+		    kref_read(&obj->base.refcount));
+
+	return dmabuf_fd;
+
+out_free_dmabuf:
+	dma_buf_put(dmabuf);
+out_free_gem:
+	i915_gem_object_put(obj);
+out:
+	mutex_unlock(&vgpu->dmabuf_lock);
+	return ret;
+}
+
+void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu)
+{
+	struct list_head *pos, *n;
+	struct intel_vgpu_dmabuf_obj *dmabuf_obj;
+
+	mutex_lock(&vgpu->dmabuf_lock);
+	list_for_each_safe(pos, n, &vgpu->dmabuf_obj_list_head) {
+		dmabuf_obj = container_of(pos, struct intel_vgpu_dmabuf_obj,
+						list);
+		dmabuf_obj->vgpu = NULL;
+
+		idr_remove(&vgpu->object_idr, dmabuf_obj->dmabuf_id);
+		intel_gvt_hypervisor_put_vfio_device(vgpu);
+		list_del(pos);
+
+		/* dmabuf_obj might be freed in dmabuf_obj_put */
+		if (dmabuf_obj->initref) {
+			dmabuf_obj->initref = false;
+			dmabuf_obj_put(dmabuf_obj);
+		}
+
+	}
+	mutex_unlock(&vgpu->dmabuf_lock);
+}
diff --git a/drivers/gpu/drm/i915/gvt/dmabuf.h b/drivers/gpu/drm/i915/gvt/dmabuf.h
new file mode 100644
index 000000000000..5f8f03fb1d1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/dmabuf.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright(c) 2017 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Zhiyuan Lv <zhiyuan.lv@intel.com>
+ *
+ * Contributors:
+ *    Xiaoguang Chen
+ *    Tina Zhang <tina.zhang@intel.com>
+ */
+
+#ifndef _GVT_DMABUF_H_
+#define _GVT_DMABUF_H_
+#include <linux/vfio.h>
+
+struct intel_vgpu_fb_info {
+	__u64 start;
+	__u64 start_gpa;
+	__u64 drm_format_mod;
+	__u32 drm_format;	/* drm format of plane */
+	__u32 width;	/* width of plane */
+	__u32 height;	/* height of plane */
+	__u32 stride;	/* stride of plane */
+	__u32 size;	/* size of plane in bytes, align on page */
+	__u32 x_pos;	/* horizontal position of cursor plane */
+	__u32 y_pos;	/* vertical position of cursor plane */
+	__u32 x_hot;    /* horizontal position of cursor hotspot */
+	__u32 y_hot;    /* vertical position of cursor hotspot */
+	struct intel_vgpu_dmabuf_obj *obj;
+};
+
+/**
+ * struct intel_vgpu_dmabuf_obj- Intel vGPU device buffer object
+ */
+struct intel_vgpu_dmabuf_obj {
+	struct intel_vgpu *vgpu;
+	struct intel_vgpu_fb_info *info;
+	__u32 dmabuf_id;
+	struct kref kref;
+	bool initref;
+	struct list_head list;
+};
+
+int intel_vgpu_query_plane(struct intel_vgpu *vgpu, void *args);
+int intel_vgpu_get_dmabuf(struct intel_vgpu *vgpu, unsigned int dmabuf_id);
+void intel_vgpu_dmabuf_cleanup(struct intel_vgpu *vgpu);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/edid.c b/drivers/gpu/drm/i915/gvt/edid.c
index 42cd09ec63fa..f61337632969 100644
--- a/drivers/gpu/drm/i915/gvt/edid.c
+++ b/drivers/gpu/drm/i915/gvt/edid.c
@@ -95,9 +95,9 @@ static inline int get_port_from_gmbus0(u32 gmbus0)
 
 static void reset_gmbus_controller(struct intel_vgpu *vgpu)
 {
-	vgpu_vreg(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY;
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) = GMBUS_HW_RDY;
 	if (!vgpu->display.i2c_edid.edid_available)
-		vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
 	vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
 }
 
@@ -123,16 +123,16 @@ static int gmbus0_mmio_write(struct intel_vgpu *vgpu,
 	vgpu->display.i2c_edid.state = I2C_GMBUS;
 	vgpu->display.i2c_edid.gmbus.phase = GMBUS_IDLE_PHASE;
 
-	vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
-	vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE;
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+	vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY | GMBUS_HW_WAIT_PHASE;
 
 	if (intel_vgpu_has_monitor_on_port(vgpu, port) &&
 			!intel_vgpu_port_is_dp(vgpu, port)) {
 		vgpu->display.i2c_edid.port = port;
 		vgpu->display.i2c_edid.edid_available = true;
-		vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER;
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_SATOER;
 	} else
-		vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
+		vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_SATOER;
 	return 0;
 }
 
@@ -159,8 +159,8 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 		 * 2) HW_RDY bit asserted
 		 */
 		if (wvalue & GMBUS_SW_CLR_INT) {
-			vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_INT;
-			vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY;
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_INT;
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_HW_RDY;
 		}
 
 		/* For virtualization, we suppose that HW is always ready,
@@ -208,7 +208,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 				 * visible in gmbus interface)
 				 */
 				i2c_edid->gmbus.phase = GMBUS_IDLE_PHASE;
-				vgpu_vreg(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
+				vgpu_vreg_t(vgpu, PCH_GMBUS2) &= ~GMBUS_ACTIVE;
 			}
 			break;
 		case NIDX_NS_W:
@@ -220,7 +220,7 @@ static int gmbus1_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 			 * START (-->INDEX) -->DATA
 			 */
 			i2c_edid->gmbus.phase = GMBUS_DATA_PHASE;
-			vgpu_vreg(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
+			vgpu_vreg_t(vgpu, PCH_GMBUS2) |= GMBUS_ACTIVE;
 			break;
 		default:
 			gvt_vgpu_err("Unknown/reserved GMBUS cycle detected!\n");
@@ -256,7 +256,7 @@ static int gmbus3_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 	u32 reg_data = 0;
 
 	/* Data can only be recevied if previous settings correct */
-	if (vgpu_vreg(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) {
+	if (vgpu_vreg_t(vgpu, PCH_GMBUS1) & GMBUS_SLAVE_READ) {
 		if (byte_left <= 0) {
 			memcpy(p_data, &vgpu_vreg(vgpu, offset), bytes);
 			return 0;
diff --git a/drivers/gpu/drm/i915/gvt/execlist.c b/drivers/gpu/drm/i915/gvt/execlist.c
index c9fa0fb488d3..769c1c24ae75 100644
--- a/drivers/gpu/drm/i915/gvt/execlist.c
+++ b/drivers/gpu/drm/i915/gvt/execlist.c
@@ -458,7 +458,7 @@ static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 	gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
 			emulate_schedule_in);
 
-	queue_workload(workload);
+	intel_vgpu_queue_workload(workload);
 	return 0;
 }
 
@@ -528,7 +528,7 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 	vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 }
 
-void clean_execlist(struct intel_vgpu *vgpu)
+static void clean_execlist(struct intel_vgpu *vgpu)
 {
 	enum intel_engine_id i;
 	struct intel_engine_cs *engine;
@@ -542,7 +542,7 @@ void clean_execlist(struct intel_vgpu *vgpu)
 	}
 }
 
-void reset_execlist(struct intel_vgpu *vgpu,
+static void reset_execlist(struct intel_vgpu *vgpu,
 		unsigned long engine_mask)
 {
 	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
@@ -553,7 +553,7 @@ void reset_execlist(struct intel_vgpu *vgpu,
 		init_vgpu_execlist(vgpu, engine->id);
 }
 
-int init_execlist(struct intel_vgpu *vgpu)
+static int init_execlist(struct intel_vgpu *vgpu)
 {
 	reset_execlist(vgpu, ALL_ENGINES);
 	return 0;
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.c b/drivers/gpu/drm/i915/gvt/fb_decoder.c
new file mode 100644
index 000000000000..6b50fe78dc1b
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.c
@@ -0,0 +1,514 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Bing Niu <bing.niu@intel.com>
+ *    Xu Han <xu.han@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ *    Yang Liu <yang2.liu@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#include <uapi/drm/drm_fourcc.h>
+#include "i915_drv.h"
+#include "gvt.h"
+
+#define PRIMARY_FORMAT_NUM	16
+struct pixel_format {
+	int	drm_format;	/* Pixel format in DRM definition */
+	int	bpp;		/* Bits per pixel, 0 indicates invalid */
+	char	*desc;		/* The description */
+};
+
+static struct pixel_format bdw_pixel_formats[] = {
+	{DRM_FORMAT_C8, 8, "8-bit Indexed"},
+	{DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"},
+	{DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"},
+
+	{DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, NULL},
+};
+
+static struct pixel_format skl_pixel_formats[] = {
+	{DRM_FORMAT_YUYV, 16, "16-bit packed YUYV (8:8:8:8 MSB-V:Y2:U:Y1)"},
+	{DRM_FORMAT_UYVY, 16, "16-bit packed UYVY (8:8:8:8 MSB-Y2:V:Y1:U)"},
+	{DRM_FORMAT_YVYU, 16, "16-bit packed YVYU (8:8:8:8 MSB-U:Y2:V:Y1)"},
+	{DRM_FORMAT_VYUY, 16, "16-bit packed VYUY (8:8:8:8 MSB-Y2:U:Y1:V)"},
+
+	{DRM_FORMAT_C8, 8, "8-bit Indexed"},
+	{DRM_FORMAT_RGB565, 16, "16-bit BGRX (5:6:5 MSB-R:G:B)"},
+	{DRM_FORMAT_ABGR8888, 32, "32-bit RGBA (8:8:8:8 MSB-A:B:G:R)"},
+	{DRM_FORMAT_XBGR8888, 32, "32-bit RGBX (8:8:8:8 MSB-X:B:G:R)"},
+
+	{DRM_FORMAT_ARGB8888, 32, "32-bit BGRA (8:8:8:8 MSB-A:R:G:B)"},
+	{DRM_FORMAT_XRGB8888, 32, "32-bit BGRX (8:8:8:8 MSB-X:R:G:B)"},
+	{DRM_FORMAT_XBGR2101010, 32, "32-bit RGBX (2:10:10:10 MSB-X:B:G:R)"},
+	{DRM_FORMAT_XRGB2101010, 32, "32-bit BGRX (2:10:10:10 MSB-X:R:G:B)"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, NULL},
+};
+
+static int bdw_format_to_drm(int format)
+{
+	int bdw_pixel_formats_index = 6;
+
+	switch (format) {
+	case DISPPLANE_8BPP:
+		bdw_pixel_formats_index = 0;
+		break;
+	case DISPPLANE_BGRX565:
+		bdw_pixel_formats_index = 1;
+		break;
+	case DISPPLANE_BGRX888:
+		bdw_pixel_formats_index = 2;
+		break;
+	case DISPPLANE_RGBX101010:
+		bdw_pixel_formats_index = 3;
+		break;
+	case DISPPLANE_BGRX101010:
+		bdw_pixel_formats_index = 4;
+		break;
+	case DISPPLANE_RGBX888:
+		bdw_pixel_formats_index = 5;
+		break;
+
+	default:
+		break;
+	}
+
+	return bdw_pixel_formats_index;
+}
+
+static int skl_format_to_drm(int format, bool rgb_order, bool alpha,
+	int yuv_order)
+{
+	int skl_pixel_formats_index = 12;
+
+	switch (format) {
+	case PLANE_CTL_FORMAT_INDEXED:
+		skl_pixel_formats_index = 4;
+		break;
+	case PLANE_CTL_FORMAT_RGB_565:
+		skl_pixel_formats_index = 5;
+		break;
+	case PLANE_CTL_FORMAT_XRGB_8888:
+		if (rgb_order)
+			skl_pixel_formats_index = alpha ? 6 : 7;
+		else
+			skl_pixel_formats_index = alpha ? 8 : 9;
+		break;
+	case PLANE_CTL_FORMAT_XRGB_2101010:
+		skl_pixel_formats_index = rgb_order ? 10 : 11;
+		break;
+	case PLANE_CTL_FORMAT_YUV422:
+		skl_pixel_formats_index = yuv_order >> 16;
+		if (skl_pixel_formats_index > 3)
+			return -EINVAL;
+		break;
+
+	default:
+		break;
+	}
+
+	return skl_pixel_formats_index;
+}
+
+static u32 intel_vgpu_get_stride(struct intel_vgpu *vgpu, int pipe,
+	u32 tiled, int stride_mask, int bpp)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+
+	u32 stride_reg = vgpu_vreg_t(vgpu, DSPSTRIDE(pipe)) & stride_mask;
+	u32 stride = stride_reg;
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		switch (tiled) {
+		case PLANE_CTL_TILED_LINEAR:
+			stride = stride_reg * 64;
+			break;
+		case PLANE_CTL_TILED_X:
+			stride = stride_reg * 512;
+			break;
+		case PLANE_CTL_TILED_Y:
+			stride = stride_reg * 128;
+			break;
+		case PLANE_CTL_TILED_YF:
+			if (bpp == 8)
+				stride = stride_reg * 64;
+			else if (bpp == 16 || bpp == 32 || bpp == 64)
+				stride = stride_reg * 128;
+			else
+				gvt_dbg_core("skl: unsupported bpp:%d\n", bpp);
+			break;
+		default:
+			gvt_dbg_core("skl: unsupported tile format:%x\n",
+				tiled);
+		}
+	}
+
+	return stride;
+}
+
+static int get_active_pipe(struct intel_vgpu *vgpu)
+{
+	int i;
+
+	for (i = 0; i < I915_MAX_PIPES; i++)
+		if (pipe_is_enabled(vgpu, i))
+			break;
+
+	return i;
+}
+
+/**
+ * intel_vgpu_decode_primary_plane - Decode primary plane
+ * @vgpu: input vgpu
+ * @plane: primary plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_primary_plane_format *plane)
+{
+	u32 val, fmt;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, DSPCNTR(pipe));
+	plane->enabled = !!(val & DISPLAY_PLANE_ENABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
+		plane->tiled = (val & PLANE_CTL_TILED_MASK) >>
+		_PLANE_CTL_TILED_SHIFT;
+		fmt = skl_format_to_drm(
+			val & PLANE_CTL_FORMAT_MASK,
+			val & PLANE_CTL_ORDER_RGBX,
+			val & PLANE_CTL_ALPHA_MASK,
+			val & PLANE_CTL_YUV422_ORDER_MASK);
+
+		if (fmt >= ARRAY_SIZE(skl_pixel_formats)) {
+			gvt_vgpu_err("Out-of-bounds pixel format index\n");
+			return -EINVAL;
+		}
+
+		plane->bpp = skl_pixel_formats[fmt].bpp;
+		plane->drm_format = skl_pixel_formats[fmt].drm_format;
+	} else {
+		plane->tiled = !!(val & DISPPLANE_TILED);
+		fmt = bdw_format_to_drm(val & DISPPLANE_PIXFORMAT_MASK);
+		plane->bpp = bdw_pixel_formats[fmt].bpp;
+		plane->drm_format = bdw_pixel_formats[fmt].drm_format;
+	}
+
+	if (!plane->bpp) {
+		gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt);
+		return -EINVAL;
+	}
+
+	plane->hw_format = fmt;
+
+	plane->base = vgpu_vreg_t(vgpu, DSPSURF(pipe)) & I915_GTT_PAGE_MASK;
+	if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+			     (unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+				(unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	plane->stride = intel_vgpu_get_stride(vgpu, pipe, (plane->tiled << 10),
+		(IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) ?
+			(_PRI_PLANE_STRIDE_MASK >> 6) :
+				_PRI_PLANE_STRIDE_MASK, plane->bpp);
+
+	plane->width = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) & _PIPE_H_SRCSZ_MASK) >>
+		_PIPE_H_SRCSZ_SHIFT;
+	plane->width += 1;
+	plane->height = (vgpu_vreg_t(vgpu, PIPESRC(pipe)) &
+			_PIPE_V_SRCSZ_MASK) >> _PIPE_V_SRCSZ_SHIFT;
+	plane->height += 1;	/* raw height is one minus the real value */
+
+	val = vgpu_vreg_t(vgpu, DSPTILEOFF(pipe));
+	plane->x_offset = (val & _PRI_PLANE_X_OFF_MASK) >>
+		_PRI_PLANE_X_OFF_SHIFT;
+	plane->y_offset = (val & _PRI_PLANE_Y_OFF_MASK) >>
+		_PRI_PLANE_Y_OFF_SHIFT;
+
+	return 0;
+}
+
+#define CURSOR_FORMAT_NUM	(1 << 6)
+struct cursor_mode_format {
+	int	drm_format;	/* Pixel format in DRM definition */
+	u8	bpp;		/* Bits per pixel; 0 indicates invalid */
+	u32	width;		/* In pixel */
+	u32	height;		/* In lines */
+	char	*desc;		/* The description */
+};
+
+static struct cursor_mode_format cursor_pixel_formats[] = {
+	{DRM_FORMAT_ARGB8888, 32, 128, 128, "128x128 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 256, 256, "256x256 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"},
+	{DRM_FORMAT_ARGB8888, 32, 64, 64, "64x64 32bpp ARGB"},
+
+	/* non-supported format has bpp default to 0 */
+	{0, 0, 0, 0, NULL},
+};
+
+static int cursor_mode_to_drm(int mode)
+{
+	int cursor_pixel_formats_index = 4;
+
+	switch (mode) {
+	case CURSOR_MODE_128_ARGB_AX:
+		cursor_pixel_formats_index = 0;
+		break;
+	case CURSOR_MODE_256_ARGB_AX:
+		cursor_pixel_formats_index = 1;
+		break;
+	case CURSOR_MODE_64_ARGB_AX:
+		cursor_pixel_formats_index = 2;
+		break;
+	case CURSOR_MODE_64_32B_AX:
+		cursor_pixel_formats_index = 3;
+		break;
+
+	default:
+		break;
+	}
+
+	return cursor_pixel_formats_index;
+}
+
+/**
+ * intel_vgpu_decode_cursor_plane - Decode sprite plane
+ * @vgpu: input vgpu
+ * @plane: cursor plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_cursor_plane_format *plane)
+{
+	u32 val, mode, index;
+	u32 alpha_plane, alpha_force;
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, CURCNTR(pipe));
+	mode = val & CURSOR_MODE;
+	plane->enabled = (mode != CURSOR_MODE_DISABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	index = cursor_mode_to_drm(mode);
+
+	if (!cursor_pixel_formats[index].bpp) {
+		gvt_vgpu_err("Non-supported cursor mode (0x%x)\n", mode);
+		return -EINVAL;
+	}
+	plane->mode = mode;
+	plane->bpp = cursor_pixel_formats[index].bpp;
+	plane->drm_format = cursor_pixel_formats[index].drm_format;
+	plane->width = cursor_pixel_formats[index].width;
+	plane->height = cursor_pixel_formats[index].height;
+
+	alpha_plane = (val & _CURSOR_ALPHA_PLANE_MASK) >>
+				_CURSOR_ALPHA_PLANE_SHIFT;
+	alpha_force = (val & _CURSOR_ALPHA_FORCE_MASK) >>
+				_CURSOR_ALPHA_FORCE_SHIFT;
+	if (alpha_plane || alpha_force)
+		gvt_dbg_core("alpha_plane=0x%x, alpha_force=0x%x\n",
+			alpha_plane, alpha_force);
+
+	plane->base = vgpu_vreg_t(vgpu, CURBASE(pipe)) & I915_GTT_PAGE_MASK;
+	if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+			     (unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+				(unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	val = vgpu_vreg_t(vgpu, CURPOS(pipe));
+	plane->x_pos = (val & _CURSOR_POS_X_MASK) >> _CURSOR_POS_X_SHIFT;
+	plane->x_sign = (val & _CURSOR_SIGN_X_MASK) >> _CURSOR_SIGN_X_SHIFT;
+	plane->y_pos = (val & _CURSOR_POS_Y_MASK) >> _CURSOR_POS_Y_SHIFT;
+	plane->y_sign = (val & _CURSOR_SIGN_Y_MASK) >> _CURSOR_SIGN_Y_SHIFT;
+
+	return 0;
+}
+
+#define SPRITE_FORMAT_NUM	(1 << 3)
+
+static struct pixel_format sprite_pixel_formats[SPRITE_FORMAT_NUM] = {
+	[0x0] = {DRM_FORMAT_YUV422, 16, "YUV 16-bit 4:2:2 packed"},
+	[0x1] = {DRM_FORMAT_XRGB2101010, 32, "RGB 32-bit 2:10:10:10"},
+	[0x2] = {DRM_FORMAT_XRGB8888, 32, "RGB 32-bit 8:8:8:8"},
+	[0x4] = {DRM_FORMAT_AYUV, 32,
+		"YUV 32-bit 4:4:4 packed (8:8:8:8 MSB-X:Y:U:V)"},
+};
+
+/**
+ * intel_vgpu_decode_sprite_plane - Decode sprite plane
+ * @vgpu: input vgpu
+ * @plane: sprite plane to save decoded info
+ * This function is called for decoding plane
+ *
+ * Returns:
+ * 0 on success, non-zero if failed.
+ */
+int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_sprite_plane_format *plane)
+{
+	u32 val, fmt;
+	u32 color_order, yuv_order;
+	int drm_format;
+	int pipe;
+
+	pipe = get_active_pipe(vgpu);
+	if (pipe >= I915_MAX_PIPES)
+		return -ENODEV;
+
+	val = vgpu_vreg_t(vgpu, SPRCTL(pipe));
+	plane->enabled = !!(val & SPRITE_ENABLE);
+	if (!plane->enabled)
+		return -ENODEV;
+
+	plane->tiled = !!(val & SPRITE_TILED);
+	color_order = !!(val & SPRITE_RGB_ORDER_RGBX);
+	yuv_order = (val & SPRITE_YUV_BYTE_ORDER_MASK) >>
+				_SPRITE_YUV_ORDER_SHIFT;
+
+	fmt = (val & SPRITE_PIXFORMAT_MASK) >> _SPRITE_FMT_SHIFT;
+	if (!sprite_pixel_formats[fmt].bpp) {
+		gvt_vgpu_err("Non-supported pixel format (0x%x)\n", fmt);
+		return -EINVAL;
+	}
+	plane->hw_format = fmt;
+	plane->bpp = sprite_pixel_formats[fmt].bpp;
+	drm_format = sprite_pixel_formats[fmt].drm_format;
+
+	/* Order of RGB values in an RGBxxx buffer may be ordered RGB or
+	 * BGR depending on the state of the color_order field
+	 */
+	if (!color_order) {
+		if (drm_format == DRM_FORMAT_XRGB2101010)
+			drm_format = DRM_FORMAT_XBGR2101010;
+		else if (drm_format == DRM_FORMAT_XRGB8888)
+			drm_format = DRM_FORMAT_XBGR8888;
+	}
+
+	if (drm_format == DRM_FORMAT_YUV422) {
+		switch (yuv_order) {
+		case 0:
+			drm_format = DRM_FORMAT_YUYV;
+			break;
+		case 1:
+			drm_format = DRM_FORMAT_UYVY;
+			break;
+		case 2:
+			drm_format = DRM_FORMAT_YVYU;
+			break;
+		case 3:
+			drm_format = DRM_FORMAT_VYUY;
+			break;
+		default:
+			/* yuv_order has only 2 bits */
+			break;
+		}
+	}
+
+	plane->drm_format = drm_format;
+
+	plane->base = vgpu_vreg_t(vgpu, SPRSURF(pipe)) & I915_GTT_PAGE_MASK;
+	if (!intel_gvt_ggtt_validate_range(vgpu, plane->base, 0)) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+			     (unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	plane->base_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm, plane->base);
+	if (plane->base_gpa == INTEL_GVT_INVALID_ADDR) {
+		gvt_vgpu_err("invalid gma address: %lx\n",
+				(unsigned long)plane->base);
+		return  -EINVAL;
+	}
+
+	plane->stride = vgpu_vreg_t(vgpu, SPRSTRIDE(pipe)) &
+				_SPRITE_STRIDE_MASK;
+
+	val = vgpu_vreg_t(vgpu, SPRSIZE(pipe));
+	plane->height = (val & _SPRITE_SIZE_HEIGHT_MASK) >>
+		_SPRITE_SIZE_HEIGHT_SHIFT;
+	plane->width = (val & _SPRITE_SIZE_WIDTH_MASK) >>
+		_SPRITE_SIZE_WIDTH_SHIFT;
+	plane->height += 1;	/* raw height is one minus the real value */
+	plane->width += 1;	/* raw width is one minus the real value */
+
+	val = vgpu_vreg_t(vgpu, SPRPOS(pipe));
+	plane->x_pos = (val & _SPRITE_POS_X_MASK) >> _SPRITE_POS_X_SHIFT;
+	plane->y_pos = (val & _SPRITE_POS_Y_MASK) >> _SPRITE_POS_Y_SHIFT;
+
+	val = vgpu_vreg_t(vgpu, SPROFFSET(pipe));
+	plane->x_offset = (val & _SPRITE_OFFSET_START_X_MASK) >>
+			   _SPRITE_OFFSET_START_X_SHIFT;
+	plane->y_offset = (val & _SPRITE_OFFSET_START_Y_MASK) >>
+			   _SPRITE_OFFSET_START_Y_SHIFT;
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/i915/gvt/fb_decoder.h b/drivers/gpu/drm/i915/gvt/fb_decoder.h
new file mode 100644
index 000000000000..cb055f3c81a2
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/fb_decoder.h
@@ -0,0 +1,169 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Bing Niu <bing.niu@intel.com>
+ *    Xu Han <xu.han@intel.com>
+ *    Ping Gao <ping.a.gao@intel.com>
+ *    Xiaoguang Chen <xiaoguang.chen@intel.com>
+ *    Yang Liu <yang2.liu@intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *
+ */
+
+#ifndef _GVT_FB_DECODER_H_
+#define _GVT_FB_DECODER_H_
+
+#define _PLANE_CTL_FORMAT_SHIFT		24
+#define _PLANE_CTL_TILED_SHIFT		10
+#define _PIPE_V_SRCSZ_SHIFT		0
+#define _PIPE_V_SRCSZ_MASK		(0xfff << _PIPE_V_SRCSZ_SHIFT)
+#define _PIPE_H_SRCSZ_SHIFT		16
+#define _PIPE_H_SRCSZ_MASK		(0x1fff << _PIPE_H_SRCSZ_SHIFT)
+
+#define _PRI_PLANE_FMT_SHIFT		26
+#define _PRI_PLANE_STRIDE_MASK		(0x3ff << 6)
+#define _PRI_PLANE_X_OFF_SHIFT		0
+#define _PRI_PLANE_X_OFF_MASK		(0x1fff << _PRI_PLANE_X_OFF_SHIFT)
+#define _PRI_PLANE_Y_OFF_SHIFT		16
+#define _PRI_PLANE_Y_OFF_MASK		(0xfff << _PRI_PLANE_Y_OFF_SHIFT)
+
+#define _CURSOR_MODE			0x3f
+#define _CURSOR_ALPHA_FORCE_SHIFT	8
+#define _CURSOR_ALPHA_FORCE_MASK	(0x3 << _CURSOR_ALPHA_FORCE_SHIFT)
+#define _CURSOR_ALPHA_PLANE_SHIFT	10
+#define _CURSOR_ALPHA_PLANE_MASK	(0x3 << _CURSOR_ALPHA_PLANE_SHIFT)
+#define _CURSOR_POS_X_SHIFT		0
+#define _CURSOR_POS_X_MASK		(0x1fff << _CURSOR_POS_X_SHIFT)
+#define _CURSOR_SIGN_X_SHIFT		15
+#define _CURSOR_SIGN_X_MASK		(1 << _CURSOR_SIGN_X_SHIFT)
+#define _CURSOR_POS_Y_SHIFT		16
+#define _CURSOR_POS_Y_MASK		(0xfff << _CURSOR_POS_Y_SHIFT)
+#define _CURSOR_SIGN_Y_SHIFT		31
+#define _CURSOR_SIGN_Y_MASK		(1 << _CURSOR_SIGN_Y_SHIFT)
+
+#define _SPRITE_FMT_SHIFT		25
+#define _SPRITE_COLOR_ORDER_SHIFT	20
+#define _SPRITE_YUV_ORDER_SHIFT		16
+#define _SPRITE_STRIDE_SHIFT		6
+#define _SPRITE_STRIDE_MASK		(0x1ff << _SPRITE_STRIDE_SHIFT)
+#define _SPRITE_SIZE_WIDTH_SHIFT	0
+#define _SPRITE_SIZE_HEIGHT_SHIFT	16
+#define _SPRITE_SIZE_WIDTH_MASK		(0x1fff << _SPRITE_SIZE_WIDTH_SHIFT)
+#define _SPRITE_SIZE_HEIGHT_MASK	(0xfff << _SPRITE_SIZE_HEIGHT_SHIFT)
+#define _SPRITE_POS_X_SHIFT		0
+#define _SPRITE_POS_Y_SHIFT		16
+#define _SPRITE_POS_X_MASK		(0x1fff << _SPRITE_POS_X_SHIFT)
+#define _SPRITE_POS_Y_MASK		(0xfff << _SPRITE_POS_Y_SHIFT)
+#define _SPRITE_OFFSET_START_X_SHIFT	0
+#define _SPRITE_OFFSET_START_Y_SHIFT	16
+#define _SPRITE_OFFSET_START_X_MASK	(0x1fff << _SPRITE_OFFSET_START_X_SHIFT)
+#define _SPRITE_OFFSET_START_Y_MASK	(0xfff << _SPRITE_OFFSET_START_Y_SHIFT)
+
+enum GVT_FB_EVENT {
+	FB_MODE_SET_START = 1,
+	FB_MODE_SET_END,
+	FB_DISPLAY_FLIP,
+};
+
+enum DDI_PORT {
+	DDI_PORT_NONE	= 0,
+	DDI_PORT_B	= 1,
+	DDI_PORT_C	= 2,
+	DDI_PORT_D	= 3,
+	DDI_PORT_E	= 4
+};
+
+struct intel_gvt;
+
+/* color space conversion and gamma correction are not included */
+struct intel_vgpu_primary_plane_format {
+	u8	enabled;	/* plane is enabled */
+	u8	tiled;		/* X-tiled */
+	u8	bpp;		/* bits per pixel */
+	u32	hw_format;	/* format field in the PRI_CTL register */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* framebuffer base in graphics memory */
+	u64     base_gpa;
+	u32	x_offset;	/* in pixels */
+	u32	y_offset;	/* in lines */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	stride;		/* in bytes */
+};
+
+struct intel_vgpu_sprite_plane_format {
+	u8	enabled;	/* plane is enabled */
+	u8	tiled;		/* X-tiled */
+	u8	bpp;		/* bits per pixel */
+	u32	hw_format;	/* format field in the SPR_CTL register */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* sprite base in graphics memory */
+	u64     base_gpa;
+	u32	x_pos;		/* in pixels */
+	u32	y_pos;		/* in lines */
+	u32	x_offset;	/* in pixels */
+	u32	y_offset;	/* in lines */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	stride;		/* in bytes */
+};
+
+struct intel_vgpu_cursor_plane_format {
+	u8	enabled;
+	u8	mode;		/* cursor mode select */
+	u8	bpp;		/* bits per pixel */
+	u32	drm_format;	/* format in DRM definition */
+	u32	base;		/* cursor base in graphics memory */
+	u64     base_gpa;
+	u32	x_pos;		/* in pixels */
+	u32	y_pos;		/* in lines */
+	u8	x_sign;		/* X Position Sign */
+	u8	y_sign;		/* Y Position Sign */
+	u32	width;		/* in pixels */
+	u32	height;		/* in lines */
+	u32	x_hot;		/* in pixels */
+	u32	y_hot;		/* in pixels */
+};
+
+struct intel_vgpu_pipe_format {
+	struct intel_vgpu_primary_plane_format	primary;
+	struct intel_vgpu_sprite_plane_format	sprite;
+	struct intel_vgpu_cursor_plane_format	cursor;
+	enum DDI_PORT ddi_port;  /* the DDI port that pipe is connected to */
+};
+
+struct intel_vgpu_fb_format {
+	struct intel_vgpu_pipe_format	pipes[I915_MAX_PIPES];
+};
+
+int intel_vgpu_decode_primary_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_primary_plane_format *plane);
+int intel_vgpu_decode_cursor_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_cursor_plane_format *plane);
+int intel_vgpu_decode_sprite_plane(struct intel_vgpu *vgpu,
+	struct intel_vgpu_sprite_plane_format *plane);
+
+#endif
diff --git a/drivers/gpu/drm/i915/gvt/gtt.c b/drivers/gpu/drm/i915/gvt/gtt.c
index 71a0f2b87b3a..c4f752eeadcc 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.c
+++ b/drivers/gpu/drm/i915/gvt/gtt.c
@@ -1968,6 +1968,39 @@ int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu, unsigned int off,
 	return ret;
 }
 
+int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
+				     void *p_data, unsigned int bytes)
+{
+	struct intel_gvt *gvt = vgpu->gvt;
+	int ret = 0;
+
+	if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
+		struct intel_vgpu_page_track *t;
+
+		mutex_lock(&gvt->lock);
+
+		t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
+		if (t) {
+			if (unlikely(vgpu->failsafe)) {
+				/* remove write protection to prevent furture traps */
+				intel_vgpu_clean_page_track(vgpu, t);
+			} else {
+				ret = t->handler(t, pa, p_data, bytes);
+				if (ret) {
+					gvt_err("guest page write error %d, "
+						"gfn 0x%lx, pa 0x%llx, "
+						"var 0x%x, len %d\n",
+						ret, t->gfn, pa,
+						*(u32 *)p_data, bytes);
+				}
+			}
+		}
+		mutex_unlock(&gvt->lock);
+	}
+	return ret;
+}
+
+
 static int alloc_scratch_pages(struct intel_vgpu *vgpu,
 		intel_gvt_gtt_type_t type)
 {
@@ -2244,7 +2277,7 @@ struct intel_vgpu_mm *intel_vgpu_find_ppgtt_mm(struct intel_vgpu *vgpu,
 int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
 		int page_table_level)
 {
-	u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
+	u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
 	struct intel_vgpu_mm *mm;
 
 	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
@@ -2279,7 +2312,7 @@ int intel_vgpu_g2v_create_ppgtt_mm(struct intel_vgpu *vgpu,
 int intel_vgpu_g2v_destroy_ppgtt_mm(struct intel_vgpu *vgpu,
 		int page_table_level)
 {
-	u64 *pdp = (u64 *)&vgpu_vreg64(vgpu, vgtif_reg(pdp[0]));
+	u64 *pdp = (u64 *)&vgpu_vreg64_t(vgpu, vgtif_reg(pdp[0]));
 	struct intel_vgpu_mm *mm;
 
 	if (WARN_ON((page_table_level != 4) && (page_table_level != 3)))
diff --git a/drivers/gpu/drm/i915/gvt/gtt.h b/drivers/gpu/drm/i915/gvt/gtt.h
index f98c1c19b4cb..4cc13b5934f1 100644
--- a/drivers/gpu/drm/i915/gvt/gtt.h
+++ b/drivers/gpu/drm/i915/gvt/gtt.h
@@ -308,4 +308,7 @@ int intel_vgpu_emulate_gtt_mmio_read(struct intel_vgpu *vgpu,
 int intel_vgpu_emulate_gtt_mmio_write(struct intel_vgpu *vgpu,
 	unsigned int off, void *p_data, unsigned int bytes);
 
+int intel_vgpu_write_protect_handler(struct intel_vgpu *vgpu, u64 pa,
+				     void *p_data, unsigned int bytes);
+
 #endif /* _GVT_GTT_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/gvt.c b/drivers/gpu/drm/i915/gvt/gvt.c
index 3a74a408a966..fac54f32d33f 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.c
+++ b/drivers/gpu/drm/i915/gvt/gvt.c
@@ -181,6 +181,9 @@ static const struct intel_gvt_ops intel_gvt_ops = {
 	.vgpu_deactivate = intel_gvt_deactivate_vgpu,
 	.gvt_find_vgpu_type = intel_gvt_find_vgpu_type,
 	.get_gvt_attrs = intel_get_gvt_attrs,
+	.vgpu_query_plane = intel_vgpu_query_plane,
+	.vgpu_get_dmabuf = intel_vgpu_get_dmabuf,
+	.write_protect_handler = intel_vgpu_write_protect_handler,
 };
 
 /**
@@ -384,6 +387,8 @@ int intel_gvt_init_device(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto out_clean_idr;
 
+	intel_gvt_init_engine_mmio_context(gvt);
+
 	ret = intel_gvt_load_firmware(gvt);
 	if (ret)
 		goto out_clean_mmio_info;
diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 393066726993..7dc7a80213a8 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -44,8 +44,10 @@
 #include "execlist.h"
 #include "scheduler.h"
 #include "sched_policy.h"
-#include "render.h"
+#include "mmio_context.h"
 #include "cmd_parser.h"
+#include "fb_decoder.h"
+#include "dmabuf.h"
 
 #define GVT_MAX_VGPU 8
 
@@ -123,7 +125,9 @@ struct intel_vgpu_irq {
 };
 
 struct intel_vgpu_opregion {
+	bool mapped;
 	void *va;
+	void *va_gopregion;
 	u32 gfn[INTEL_GVT_OPREGION_PAGES];
 };
 
@@ -206,8 +210,16 @@ struct intel_vgpu {
 		struct kvm *kvm;
 		struct work_struct release_work;
 		atomic_t released;
+		struct vfio_device *vfio_device;
 	} vdev;
 #endif
+
+	struct list_head dmabuf_obj_list_head;
+	struct mutex dmabuf_lock;
+	struct idr object_idr;
+
+	struct completion vblank_done;
+
 };
 
 /* validating GM healthy status*/
@@ -298,6 +310,8 @@ struct intel_gvt {
 	wait_queue_head_t service_thread_wq;
 	unsigned long service_request;
 
+	struct engine_mmio *engine_mmio_list;
+
 	struct dentry *debugfs_root;
 };
 
@@ -336,7 +350,7 @@ int intel_gvt_load_firmware(struct intel_gvt *gvt);
 
 /* Aperture/GM space definitions for GVT device */
 #define gvt_aperture_sz(gvt)	  (gvt->dev_priv->ggtt.mappable_end)
-#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.mappable_base)
+#define gvt_aperture_pa_base(gvt) (gvt->dev_priv->ggtt.gmadr.start)
 
 #define gvt_ggtt_gm_sz(gvt)	  (gvt->dev_priv->ggtt.base.total)
 #define gvt_ggtt_sz(gvt) \
@@ -398,23 +412,20 @@ void intel_vgpu_free_resource(struct intel_vgpu *vgpu);
 void intel_vgpu_write_fence(struct intel_vgpu *vgpu,
 	u32 fence, u64 value);
 
-/* Macros for easily accessing vGPU virtual/shadow register */
-#define vgpu_vreg(vgpu, reg) \
-	(*(u32 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg8(vgpu, reg) \
-	(*(u8 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg16(vgpu, reg) \
-	(*(u16 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_vreg64(vgpu, reg) \
-	(*(u64 *)(vgpu->mmio.vreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg(vgpu, reg) \
-	(*(u32 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg8(vgpu, reg) \
-	(*(u8 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg16(vgpu, reg) \
-	(*(u16 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
-#define vgpu_sreg64(vgpu, reg) \
-	(*(u64 *)(vgpu->mmio.sreg + INTEL_GVT_MMIO_OFFSET(reg)))
+/* Macros for easily accessing vGPU virtual/shadow register.
+   Explicitly seperate use for typed MMIO reg or real offset.*/
+#define vgpu_vreg_t(vgpu, reg) \
+	(*(u32 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg(vgpu, offset) \
+	(*(u32 *)(vgpu->mmio.vreg + (offset)))
+#define vgpu_vreg64_t(vgpu, reg) \
+	(*(u64 *)(vgpu->mmio.vreg + i915_mmio_reg_offset(reg)))
+#define vgpu_vreg64(vgpu, offset) \
+	(*(u64 *)(vgpu->mmio.vreg + (offset)))
+#define vgpu_sreg_t(vgpu, reg) \
+	(*(u32 *)(vgpu->mmio.sreg + i915_mmio_reg_offset(reg)))
+#define vgpu_sreg(vgpu, offset) \
+	(*(u32 *)(vgpu->mmio.sreg + (offset)))
 
 #define for_each_active_vgpu(gvt, vgpu, id) \
 	idr_for_each_entry((&(gvt)->vgpu_idr), (vgpu), (id)) \
@@ -505,7 +516,8 @@ static inline u64 intel_vgpu_get_bar_gpa(struct intel_vgpu *vgpu, int bar)
 }
 
 void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu);
-int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa);
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu);
+int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa);
 
 int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci);
 void populate_pvinfo_page(struct intel_vgpu *vgpu);
@@ -532,6 +544,10 @@ struct intel_gvt_ops {
 			const char *name);
 	bool (*get_gvt_attrs)(struct attribute ***type_attrs,
 			struct attribute_group ***intel_vgpu_type_groups);
+	int (*vgpu_query_plane)(struct intel_vgpu *vgpu, void *);
+	int (*vgpu_get_dmabuf)(struct intel_vgpu *vgpu, unsigned int);
+	int (*write_protect_handler)(struct intel_vgpu *, u64, void *,
+				     unsigned int);
 };
 
 
diff --git a/drivers/gpu/drm/i915/gvt/handlers.c b/drivers/gpu/drm/i915/gvt/handlers.c
index 94fc04210bac..92d6468daeee 100644
--- a/drivers/gpu/drm/i915/gvt/handlers.c
+++ b/drivers/gpu/drm/i915/gvt/handlers.c
@@ -174,8 +174,10 @@ void enter_failsafe_mode(struct intel_vgpu *vgpu, int reason)
 		break;
 	case GVT_FAILSAFE_INSUFFICIENT_RESOURCE:
 		pr_err("Graphics resource is not enough for the guest\n");
+		break;
 	case GVT_FAILSAFE_GUEST_ERR:
 		pr_err("GVT Internal error  for the guest\n");
+		break;
 	default:
 		break;
 	}
@@ -341,13 +343,13 @@ static int pch_pp_control_mmio_write(struct intel_vgpu *vgpu,
 	write_vreg(vgpu, offset, p_data, bytes);
 
 	if (vgpu_vreg(vgpu, offset) & PANEL_POWER_ON) {
-		vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_ON;
-		vgpu_vreg(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE;
-		vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN;
-		vgpu_vreg(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_ON;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) |= PP_SEQUENCE_STATE_ON_IDLE;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_SEQUENCE_POWER_DOWN;
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &= ~PP_CYCLE_DELAY_ACTIVE;
 
 	} else
-		vgpu_vreg(vgpu, PCH_PP_STATUS) &=
+		vgpu_vreg_t(vgpu, PCH_PP_STATUS) &=
 			~(PP_ON | PP_SEQUENCE_POWER_DOWN
 					| PP_CYCLE_DELAY_ACTIVE);
 	return 0;
@@ -501,7 +503,7 @@ static int ddi_buf_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	} else {
 		vgpu_vreg(vgpu, offset) |= DDI_BUF_IS_IDLE;
 		if (offset == i915_mmio_reg_offset(DDI_BUF_CTL(PORT_E)))
-			vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E))
+			vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E))
 				&= ~DP_TP_STATUS_AUTOTRAIN_DONE;
 	}
 	return 0;
@@ -519,9 +521,9 @@ static int fdi_rx_iir_mmio_write(struct intel_vgpu *vgpu,
 
 static int fdi_auto_training_started(struct intel_vgpu *vgpu)
 {
-	u32 ddi_buf_ctl = vgpu_vreg(vgpu, DDI_BUF_CTL(PORT_E));
+	u32 ddi_buf_ctl = vgpu_vreg_t(vgpu, DDI_BUF_CTL(PORT_E));
 	u32 rx_ctl = vgpu_vreg(vgpu, _FDI_RXA_CTL);
-	u32 tx_ctl = vgpu_vreg(vgpu, DP_TP_CTL(PORT_E));
+	u32 tx_ctl = vgpu_vreg_t(vgpu, DP_TP_CTL(PORT_E));
 
 	if ((ddi_buf_ctl & DDI_BUF_CTL_ENABLE) &&
 			(rx_ctl & FDI_RX_ENABLE) &&
@@ -562,12 +564,12 @@ static int check_fdi_rx_train_status(struct intel_vgpu *vgpu,
 	fdi_tx_check_bits = FDI_TX_ENABLE | fdi_tx_train_bits;
 
 	/* If imr bit has been masked */
-	if (vgpu_vreg(vgpu, fdi_rx_imr) & fdi_iir_check_bits)
+	if (vgpu_vreg_t(vgpu, fdi_rx_imr) & fdi_iir_check_bits)
 		return 0;
 
-	if (((vgpu_vreg(vgpu, fdi_tx_ctl) & fdi_tx_check_bits)
+	if (((vgpu_vreg_t(vgpu, fdi_tx_ctl) & fdi_tx_check_bits)
 			== fdi_tx_check_bits)
-		&& ((vgpu_vreg(vgpu, fdi_rx_ctl) & fdi_rx_check_bits)
+		&& ((vgpu_vreg_t(vgpu, fdi_rx_ctl) & fdi_rx_check_bits)
 			== fdi_rx_check_bits))
 		return 1;
 	else
@@ -624,17 +626,17 @@ static int update_fdi_rx_iir_status(struct intel_vgpu *vgpu,
 	if (ret < 0)
 		return ret;
 	if (ret)
-		vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK;
+		vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_BIT_LOCK;
 
 	ret = check_fdi_rx_train_status(vgpu, index, FDI_LINK_TRAIN_PATTERN2);
 	if (ret < 0)
 		return ret;
 	if (ret)
-		vgpu_vreg(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK;
+		vgpu_vreg_t(vgpu, fdi_rx_iir) |= FDI_RX_SYMBOL_LOCK;
 
 	if (offset == _FDI_RXA_CTL)
 		if (fdi_auto_training_started(vgpu))
-			vgpu_vreg(vgpu, DP_TP_STATUS(PORT_E)) |=
+			vgpu_vreg_t(vgpu, DP_TP_STATUS(PORT_E)) |=
 				DP_TP_STATUS_AUTOTRAIN_DONE;
 	return 0;
 }
@@ -655,7 +657,7 @@ static int dp_tp_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	data = (vgpu_vreg(vgpu, offset) & GENMASK(10, 8)) >> 8;
 	if (data == 0x2) {
 		status_reg = DP_TP_STATUS(index);
-		vgpu_vreg(vgpu, status_reg) |= (1 << 25);
+		vgpu_vreg_t(vgpu, status_reg) |= (1 << 25);
 	}
 	return 0;
 }
@@ -719,7 +721,7 @@ static int pri_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	};
 
 	write_vreg(vgpu, offset, p_data, bytes);
-	vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
+	vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
 
 	set_bit(flip_event[index], vgpu->irq.flip_done_event[index]);
 	return 0;
@@ -740,7 +742,7 @@ static int spr_surf_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	};
 
 	write_vreg(vgpu, offset, p_data, bytes);
-	vgpu_vreg(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
+	vgpu_vreg_t(vgpu, surflive_reg) = vgpu_vreg(vgpu, offset);
 
 	set_bit(flip_event[index], vgpu->irq.flip_done_event[index]);
 	return 0;
@@ -1062,9 +1064,9 @@ static void write_virtual_sbi_register(struct intel_vgpu *vgpu,
 static int sbi_data_mmio_read(struct intel_vgpu *vgpu, unsigned int offset,
 		void *p_data, unsigned int bytes)
 {
-	if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+	if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
 				SBI_OPCODE_SHIFT) == SBI_CMD_CRRD) {
-		unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) &
+		unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
 				SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
 		vgpu_vreg(vgpu, offset) = read_virtual_sbi_register(vgpu,
 				sbi_offset);
@@ -1089,13 +1091,13 @@ static int sbi_ctl_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 
 	vgpu_vreg(vgpu, offset) = data;
 
-	if (((vgpu_vreg(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
+	if (((vgpu_vreg_t(vgpu, SBI_CTL_STAT) & SBI_OPCODE_MASK) >>
 				SBI_OPCODE_SHIFT) == SBI_CMD_CRWR) {
-		unsigned int sbi_offset = (vgpu_vreg(vgpu, SBI_ADDR) &
+		unsigned int sbi_offset = (vgpu_vreg_t(vgpu, SBI_ADDR) &
 				SBI_ADDR_OFFSET_MASK) >> SBI_ADDR_OFFSET_SHIFT;
 
 		write_virtual_sbi_register(vgpu, sbi_offset,
-				vgpu_vreg(vgpu, SBI_DATA));
+					   vgpu_vreg_t(vgpu, SBI_DATA));
 	}
 	return 0;
 }
@@ -1341,7 +1343,7 @@ static int mailbox_write(struct intel_vgpu *vgpu, unsigned int offset,
 {
 	u32 value = *(u32 *)p_data;
 	u32 cmd = value & 0xff;
-	u32 *data0 = &vgpu_vreg(vgpu, GEN6_PCODE_DATA);
+	u32 *data0 = &vgpu_vreg_t(vgpu, GEN6_PCODE_DATA);
 
 	switch (cmd) {
 	case GEN9_PCODE_READ_MEM_LATENCY:
@@ -1396,7 +1398,7 @@ static int hws_pga_write(struct intel_vgpu *vgpu, unsigned int offset,
 	 * update the VM CSB status correctly. Here listed registers can
 	 * support BDW, SKL or other platforms with same HWSP registers.
 	 */
-	if (unlikely(ring_id < 0 || ring_id > I915_NUM_ENGINES)) {
+	if (unlikely(ring_id < 0 || ring_id >= I915_NUM_ENGINES)) {
 		gvt_vgpu_err("VM(%d) access unknown hardware status page register:0x%x\n",
 			     vgpu->id, offset);
 		return -EINVAL;
@@ -1471,7 +1473,7 @@ static int elsp_mmio_write(struct intel_vgpu *vgpu, unsigned int offset,
 	u32 data = *(u32 *)p_data;
 	int ret = 0;
 
-	if (WARN_ON(ring_id < 0 || ring_id > I915_NUM_ENGINES - 1))
+	if (WARN_ON(ring_id < 0 || ring_id >= I915_NUM_ENGINES))
 		return -EINVAL;
 
 	execlist = &vgpu->submission.execlist[ring_id];
@@ -1584,7 +1586,7 @@ static int ring_reset_ctl_write(struct intel_vgpu *vgpu,
 }
 
 #define MMIO_F(reg, s, f, am, rm, d, r, w) do { \
-	ret = new_mmio_info(gvt, INTEL_GVT_MMIO_OFFSET(reg), \
+	ret = new_mmio_info(gvt, i915_mmio_reg_offset(reg), \
 		f, s, am, rm, d, r, w); \
 	if (ret) \
 		return ret; \
@@ -1652,22 +1654,22 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_GM_RDR(BLT_HWS_PGA_GEN7, D_ALL, NULL, NULL);
 	MMIO_GM_RDR(VEBOX_HWS_PGA_GEN7, D_ALL, NULL, NULL);
 
-#define RING_REG(base) (base + 0x28)
+#define RING_REG(base) _MMIO((base) + 0x28)
 	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x134)
+#define RING_REG(base) _MMIO((base) + 0x134)
 	MMIO_RING_DFH(RING_REG, D_ALL, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x6c)
+#define RING_REG(base) _MMIO((base) + 0x6c)
 	MMIO_RING_DFH(RING_REG, D_ALL, 0, mmio_read_from_hw, NULL);
 #undef RING_REG
 	MMIO_DH(GEN7_SC_INSTDONE, D_BDW_PLUS, mmio_read_from_hw, NULL);
 
-	MMIO_GM_RDR(0x2148, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(_MMIO(0x2148), D_ALL, NULL, NULL);
 	MMIO_GM_RDR(CCID, D_ALL, NULL, NULL);
-	MMIO_GM_RDR(0x12198, D_ALL, NULL, NULL);
+	MMIO_GM_RDR(_MMIO(0x12198), D_ALL, NULL, NULL);
 	MMIO_D(GEN7_CXT_SIZE, D_ALL);
 
 	MMIO_RING_DFH(RING_TAIL, D_ALL, F_CMD_ACCESS, NULL, NULL);
@@ -1677,7 +1679,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_GM_RDR(RING_START, D_ALL, NULL, NULL);
 
 	/* RING MODE */
-#define RING_REG(base) (base + 0x29c)
+#define RING_REG(base) _MMIO((base) + 0x29c)
 	MMIO_RING_DFH(RING_REG, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL,
 		ring_mode_mmio_write);
 #undef RING_REG
@@ -1696,37 +1698,37 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 		NULL, NULL);
 	MMIO_DFH(CACHE_MODE_1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(CACHE_MODE_0, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2124, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2124), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_DFH(0x20dc, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20dc), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(_3D_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2088, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x20e4, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2470, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2088), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20e4), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2470), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GAM_ECOCHK, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_COMMON_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
 	MMIO_DFH(COMMON_SLICE_CHICKEN2, D_ALL, F_MODE_MASK | F_CMD_ACCESS,
 		 NULL, NULL);
-	MMIO_DFH(0x9030, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x20a0, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2420, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2430, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2434, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2438, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x243c, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x7018, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x9030), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20a0), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2420), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2430), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2434), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2438), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x243c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x7018), D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN3, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN7_HALF_SLICE_CHICKEN1, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* display */
-	MMIO_F(0x60220, 0x20, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_D(0x602a0, D_ALL);
+	MMIO_F(_MMIO(0x60220), 0x20, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_D(_MMIO(0x602a0), D_ALL);
 
-	MMIO_D(0x65050, D_ALL);
-	MMIO_D(0x650b4, D_ALL);
+	MMIO_D(_MMIO(0x65050), D_ALL);
+	MMIO_D(_MMIO(0x650b4), D_ALL);
 
-	MMIO_D(0xc4040, D_ALL);
+	MMIO_D(_MMIO(0xc4040), D_ALL);
 	MMIO_D(DERRMR, D_ALL);
 
 	MMIO_D(PIPEDSL(PIPE_A), D_ALL);
@@ -1766,14 +1768,14 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(CURBASE(PIPE_B), D_ALL);
 	MMIO_D(CURBASE(PIPE_C), D_ALL);
 
-	MMIO_D(0x700ac, D_ALL);
-	MMIO_D(0x710ac, D_ALL);
-	MMIO_D(0x720ac, D_ALL);
+	MMIO_D(_MMIO(0x700ac), D_ALL);
+	MMIO_D(_MMIO(0x710ac), D_ALL);
+	MMIO_D(_MMIO(0x720ac), D_ALL);
 
-	MMIO_D(0x70090, D_ALL);
-	MMIO_D(0x70094, D_ALL);
-	MMIO_D(0x70098, D_ALL);
-	MMIO_D(0x7009c, D_ALL);
+	MMIO_D(_MMIO(0x70090), D_ALL);
+	MMIO_D(_MMIO(0x70094), D_ALL);
+	MMIO_D(_MMIO(0x70098), D_ALL);
+	MMIO_D(_MMIO(0x7009c), D_ALL);
 
 	MMIO_D(DSPCNTR(PIPE_A), D_ALL);
 	MMIO_D(DSPADDR(PIPE_A), D_ALL);
@@ -1949,24 +1951,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(BLC_PWM_PCH_CTL1, D_ALL);
 	MMIO_D(BLC_PWM_PCH_CTL2, D_ALL);
 
-	MMIO_D(0x48268, D_ALL);
+	MMIO_D(_MMIO(0x48268), D_ALL);
 
 	MMIO_F(PCH_GMBUS0, 4 * 4, 0, 0, 0, D_ALL, gmbus_mmio_read,
 		gmbus_mmio_write);
 	MMIO_F(PCH_GPIOA, 6 * 4, F_UNALIGN, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0xe4f00, 0x28, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0xe4f00), 0x28, 0, 0, 0, D_ALL, NULL, NULL);
 
-	MMIO_F(_PCH_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+	MMIO_F(_MMIO(_PCH_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
 		dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_PCH_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+	MMIO_F(_MMIO(_PCH_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
 		dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_PCH_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
+	MMIO_F(_MMIO(_PCH_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_PRE_SKL, NULL,
 		dp_aux_ch_ctl_mmio_write);
 
 	MMIO_DH(PCH_ADPA, D_PRE_SKL, NULL, pch_adpa_mmio_write);
 
-	MMIO_DH(_PCH_TRANSACONF, D_ALL, NULL, transconf_mmio_write);
-	MMIO_DH(_PCH_TRANSBCONF, D_ALL, NULL, transconf_mmio_write);
+	MMIO_DH(_MMIO(_PCH_TRANSACONF), D_ALL, NULL, transconf_mmio_write);
+	MMIO_DH(_MMIO(_PCH_TRANSBCONF), D_ALL, NULL, transconf_mmio_write);
 
 	MMIO_DH(FDI_RX_IIR(PIPE_A), D_ALL, NULL, fdi_rx_iir_mmio_write);
 	MMIO_DH(FDI_RX_IIR(PIPE_B), D_ALL, NULL, fdi_rx_iir_mmio_write);
@@ -1978,30 +1980,30 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(FDI_RX_CTL(PIPE_B), D_ALL, NULL, update_fdi_rx_iir_status);
 	MMIO_DH(FDI_RX_CTL(PIPE_C), D_ALL, NULL, update_fdi_rx_iir_status);
 
-	MMIO_D(_PCH_TRANS_HTOTAL_A, D_ALL);
-	MMIO_D(_PCH_TRANS_HBLANK_A, D_ALL);
-	MMIO_D(_PCH_TRANS_HSYNC_A, D_ALL);
-	MMIO_D(_PCH_TRANS_VTOTAL_A, D_ALL);
-	MMIO_D(_PCH_TRANS_VBLANK_A, D_ALL);
-	MMIO_D(_PCH_TRANS_VSYNC_A, D_ALL);
-	MMIO_D(_PCH_TRANS_VSYNCSHIFT_A, D_ALL);
-
-	MMIO_D(_PCH_TRANS_HTOTAL_B, D_ALL);
-	MMIO_D(_PCH_TRANS_HBLANK_B, D_ALL);
-	MMIO_D(_PCH_TRANS_HSYNC_B, D_ALL);
-	MMIO_D(_PCH_TRANS_VTOTAL_B, D_ALL);
-	MMIO_D(_PCH_TRANS_VBLANK_B, D_ALL);
-	MMIO_D(_PCH_TRANS_VSYNC_B, D_ALL);
-	MMIO_D(_PCH_TRANS_VSYNCSHIFT_B, D_ALL);
-
-	MMIO_D(_PCH_TRANSA_DATA_M1, D_ALL);
-	MMIO_D(_PCH_TRANSA_DATA_N1, D_ALL);
-	MMIO_D(_PCH_TRANSA_DATA_M2, D_ALL);
-	MMIO_D(_PCH_TRANSA_DATA_N2, D_ALL);
-	MMIO_D(_PCH_TRANSA_LINK_M1, D_ALL);
-	MMIO_D(_PCH_TRANSA_LINK_N1, D_ALL);
-	MMIO_D(_PCH_TRANSA_LINK_M2, D_ALL);
-	MMIO_D(_PCH_TRANSA_LINK_N2, D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_HBLANK_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_HSYNC_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VBLANK_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VSYNC_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_A), D_ALL);
+
+	MMIO_D(_MMIO(_PCH_TRANS_HTOTAL_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_HBLANK_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_HSYNC_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VTOTAL_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VBLANK_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VSYNC_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANS_VSYNCSHIFT_B), D_ALL);
+
+	MMIO_D(_MMIO(_PCH_TRANSA_DATA_M1), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_DATA_N1), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_DATA_M2), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_DATA_N2), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_LINK_M1), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_LINK_N1), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_LINK_M2), D_ALL);
+	MMIO_D(_MMIO(_PCH_TRANSA_LINK_N2), D_ALL);
 
 	MMIO_D(TRANS_DP_CTL(PIPE_A), D_ALL);
 	MMIO_D(TRANS_DP_CTL(PIPE_B), D_ALL);
@@ -2019,38 +2021,38 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(TVIDEO_DIP_DATA(PIPE_C), D_ALL);
 	MMIO_D(TVIDEO_DIP_GCP(PIPE_C), D_ALL);
 
-	MMIO_D(_FDI_RXA_MISC, D_ALL);
-	MMIO_D(_FDI_RXB_MISC, D_ALL);
-	MMIO_D(_FDI_RXA_TUSIZE1, D_ALL);
-	MMIO_D(_FDI_RXA_TUSIZE2, D_ALL);
-	MMIO_D(_FDI_RXB_TUSIZE1, D_ALL);
-	MMIO_D(_FDI_RXB_TUSIZE2, D_ALL);
+	MMIO_D(_MMIO(_FDI_RXA_MISC), D_ALL);
+	MMIO_D(_MMIO(_FDI_RXB_MISC), D_ALL);
+	MMIO_D(_MMIO(_FDI_RXA_TUSIZE1), D_ALL);
+	MMIO_D(_MMIO(_FDI_RXA_TUSIZE2), D_ALL);
+	MMIO_D(_MMIO(_FDI_RXB_TUSIZE1), D_ALL);
+	MMIO_D(_MMIO(_FDI_RXB_TUSIZE2), D_ALL);
 
 	MMIO_DH(PCH_PP_CONTROL, D_ALL, NULL, pch_pp_control_mmio_write);
 	MMIO_D(PCH_PP_DIVISOR, D_ALL);
 	MMIO_D(PCH_PP_STATUS,  D_ALL);
 	MMIO_D(PCH_LVDS, D_ALL);
-	MMIO_D(_PCH_DPLL_A, D_ALL);
-	MMIO_D(_PCH_DPLL_B, D_ALL);
-	MMIO_D(_PCH_FPA0, D_ALL);
-	MMIO_D(_PCH_FPA1, D_ALL);
-	MMIO_D(_PCH_FPB0, D_ALL);
-	MMIO_D(_PCH_FPB1, D_ALL);
+	MMIO_D(_MMIO(_PCH_DPLL_A), D_ALL);
+	MMIO_D(_MMIO(_PCH_DPLL_B), D_ALL);
+	MMIO_D(_MMIO(_PCH_FPA0), D_ALL);
+	MMIO_D(_MMIO(_PCH_FPA1), D_ALL);
+	MMIO_D(_MMIO(_PCH_FPB0), D_ALL);
+	MMIO_D(_MMIO(_PCH_FPB1), D_ALL);
 	MMIO_D(PCH_DREF_CONTROL, D_ALL);
 	MMIO_D(PCH_RAWCLK_FREQ, D_ALL);
 	MMIO_D(PCH_DPLL_SEL, D_ALL);
 
-	MMIO_D(0x61208, D_ALL);
-	MMIO_D(0x6120c, D_ALL);
+	MMIO_D(_MMIO(0x61208), D_ALL);
+	MMIO_D(_MMIO(0x6120c), D_ALL);
 	MMIO_D(PCH_PP_ON_DELAYS, D_ALL);
 	MMIO_D(PCH_PP_OFF_DELAYS, D_ALL);
 
-	MMIO_DH(0xe651c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe661c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe671c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe681c, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe6c04, D_ALL, dpy_reg_mmio_read, NULL);
-	MMIO_DH(0xe6e1c, D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe651c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe661c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe671c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe681c), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe6c04), D_ALL, dpy_reg_mmio_read, NULL);
+	MMIO_DH(_MMIO(0xe6e1c), D_ALL, dpy_reg_mmio_read, NULL);
 
 	MMIO_RO(PCH_PORT_HOTPLUG, D_ALL, 0,
 		PORTA_HOTPLUG_STATUS_MASK
@@ -2072,11 +2074,11 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(SOUTH_CHICKEN1, D_ALL);
 	MMIO_DH(SOUTH_CHICKEN2, D_ALL, NULL, south_chicken2_mmio_write);
-	MMIO_D(_TRANSA_CHICKEN1, D_ALL);
-	MMIO_D(_TRANSB_CHICKEN1, D_ALL);
+	MMIO_D(_MMIO(_TRANSA_CHICKEN1), D_ALL);
+	MMIO_D(_MMIO(_TRANSB_CHICKEN1), D_ALL);
 	MMIO_D(SOUTH_DSPCLK_GATE_D, D_ALL);
-	MMIO_D(_TRANSA_CHICKEN2, D_ALL);
-	MMIO_D(_TRANSB_CHICKEN2, D_ALL);
+	MMIO_D(_MMIO(_TRANSA_CHICKEN2), D_ALL);
+	MMIO_D(_MMIO(_TRANSB_CHICKEN2), D_ALL);
 
 	MMIO_D(ILK_DPFC_CB_BASE, D_ALL);
 	MMIO_D(ILK_DPFC_CONTROL, D_ALL);
@@ -2142,24 +2144,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(PREC_PAL_DATA(PIPE_C), D_ALL);
 	MMIO_F(PREC_PAL_GC_MAX(PIPE_C, 0), 4 * 3, 0, 0, 0, D_ALL, NULL, NULL);
 
-	MMIO_D(0x60110, D_ALL);
-	MMIO_D(0x61110, D_ALL);
-	MMIO_F(0x70400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x71400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x72400, 0x40, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x70440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
-	MMIO_F(0x71440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
-	MMIO_F(0x72440, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
-	MMIO_F(0x7044c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
-	MMIO_F(0x7144c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
-	MMIO_F(0x7244c, 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_D(_MMIO(0x60110), D_ALL);
+	MMIO_D(_MMIO(0x61110), D_ALL);
+	MMIO_F(_MMIO(0x70400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x71400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x72400), 0x40, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x70440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_F(_MMIO(0x71440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_F(_MMIO(0x72440), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_F(_MMIO(0x7044c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_F(_MMIO(0x7144c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
+	MMIO_F(_MMIO(0x7244c), 0xc, 0, 0, 0, D_PRE_SKL, NULL, NULL);
 
 	MMIO_D(PIPE_WM_LINETIME(PIPE_A), D_ALL);
 	MMIO_D(PIPE_WM_LINETIME(PIPE_B), D_ALL);
 	MMIO_D(PIPE_WM_LINETIME(PIPE_C), D_ALL);
 	MMIO_D(SPLL_CTL, D_ALL);
-	MMIO_D(_WRPLL_CTL1, D_ALL);
-	MMIO_D(_WRPLL_CTL2, D_ALL);
+	MMIO_D(_MMIO(_WRPLL_CTL1), D_ALL);
+	MMIO_D(_MMIO(_WRPLL_CTL2), D_ALL);
 	MMIO_D(PORT_CLK_SEL(PORT_A), D_ALL);
 	MMIO_D(PORT_CLK_SEL(PORT_B), D_ALL);
 	MMIO_D(PORT_CLK_SEL(PORT_C), D_ALL);
@@ -2170,15 +2172,15 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(TRANS_CLK_SEL(TRANSCODER_C), D_ALL);
 
 	MMIO_D(HSW_NDE_RSTWRN_OPT, D_ALL);
-	MMIO_D(0x46508, D_ALL);
+	MMIO_D(_MMIO(0x46508), D_ALL);
 
-	MMIO_D(0x49080, D_ALL);
-	MMIO_D(0x49180, D_ALL);
-	MMIO_D(0x49280, D_ALL);
+	MMIO_D(_MMIO(0x49080), D_ALL);
+	MMIO_D(_MMIO(0x49180), D_ALL);
+	MMIO_D(_MMIO(0x49280), D_ALL);
 
-	MMIO_F(0x49090, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x49190, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x49290, 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x49090), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x49190), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x49290), 0x14, 0, 0, 0, D_ALL, NULL, NULL);
 
 	MMIO_D(GAMMA_MODE(PIPE_A), D_ALL);
 	MMIO_D(GAMMA_MODE(PIPE_B), D_ALL);
@@ -2198,7 +2200,7 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(SBI_CTL_STAT, D_ALL, NULL, sbi_ctl_mmio_write);
 	MMIO_D(PIXCLK_GATE, D_ALL);
 
-	MMIO_F(_DPA_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_ALL, NULL,
+	MMIO_F(_MMIO(_DPA_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_ALL, NULL,
 		dp_aux_ch_ctl_mmio_write);
 
 	MMIO_DH(DDI_BUF_CTL(PORT_A), D_ALL, NULL, ddi_buf_ctl_mmio_write);
@@ -2219,24 +2221,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(DP_TP_STATUS(PORT_D), D_ALL, NULL, dp_tp_status_mmio_write);
 	MMIO_DH(DP_TP_STATUS(PORT_E), D_ALL, NULL, NULL);
 
-	MMIO_F(_DDI_BUF_TRANS_A, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x64e60, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x64eC0, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x64f20, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x64f80, 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(_DDI_BUF_TRANS_A), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x64e60), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x64eC0), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x64f20), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x64f80), 0x50, 0, 0, 0, D_ALL, NULL, NULL);
 
 	MMIO_D(HSW_AUD_CFG(PIPE_A), D_ALL);
 	MMIO_D(HSW_AUD_PIN_ELD_CP_VLD, D_ALL);
 
-	MMIO_DH(_TRANS_DDI_FUNC_CTL_A, D_ALL, NULL, NULL);
-	MMIO_DH(_TRANS_DDI_FUNC_CTL_B, D_ALL, NULL, NULL);
-	MMIO_DH(_TRANS_DDI_FUNC_CTL_C, D_ALL, NULL, NULL);
-	MMIO_DH(_TRANS_DDI_FUNC_CTL_EDP, D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_A), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_B), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_C), D_ALL, NULL, NULL);
+	MMIO_DH(_MMIO(_TRANS_DDI_FUNC_CTL_EDP), D_ALL, NULL, NULL);
 
-	MMIO_D(_TRANSA_MSA_MISC, D_ALL);
-	MMIO_D(_TRANSB_MSA_MISC, D_ALL);
-	MMIO_D(_TRANSC_MSA_MISC, D_ALL);
-	MMIO_D(_TRANS_EDP_MSA_MISC, D_ALL);
+	MMIO_D(_MMIO(_TRANSA_MSA_MISC), D_ALL);
+	MMIO_D(_MMIO(_TRANSB_MSA_MISC), D_ALL);
+	MMIO_D(_MMIO(_TRANSC_MSA_MISC), D_ALL);
+	MMIO_D(_MMIO(_TRANS_EDP_MSA_MISC), D_ALL);
 
 	MMIO_DH(FORCEWAKE, D_ALL, NULL, NULL);
 	MMIO_D(FORCEWAKE_ACK, D_ALL);
@@ -2302,101 +2304,101 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN6_UCGCTL1, D_ALL);
 	MMIO_D(GEN6_UCGCTL2, D_ALL);
 
-	MMIO_F(0x4f000, 0x90, 0, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x4f000), 0x90, 0, 0, 0, D_ALL, NULL, NULL);
 
 	MMIO_D(GEN6_PCODE_DATA, D_ALL);
-	MMIO_D(0x13812c, D_ALL);
+	MMIO_D(_MMIO(0x13812c), D_ALL);
 	MMIO_DH(GEN7_ERR_INT, D_ALL, NULL, NULL);
 	MMIO_D(HSW_EDRAM_CAP, D_ALL);
 	MMIO_D(HSW_IDICR, D_ALL);
 	MMIO_DH(GFX_FLSH_CNTL_GEN6, D_ALL, NULL, NULL);
 
-	MMIO_D(0x3c, D_ALL);
-	MMIO_D(0x860, D_ALL);
+	MMIO_D(_MMIO(0x3c), D_ALL);
+	MMIO_D(_MMIO(0x860), D_ALL);
 	MMIO_D(ECOSKPD, D_ALL);
-	MMIO_D(0x121d0, D_ALL);
+	MMIO_D(_MMIO(0x121d0), D_ALL);
 	MMIO_D(GEN6_BLITTER_ECOSKPD, D_ALL);
-	MMIO_D(0x41d0, D_ALL);
+	MMIO_D(_MMIO(0x41d0), D_ALL);
 	MMIO_D(GAC_ECO_BITS, D_ALL);
-	MMIO_D(0x6200, D_ALL);
-	MMIO_D(0x6204, D_ALL);
-	MMIO_D(0x6208, D_ALL);
-	MMIO_D(0x7118, D_ALL);
-	MMIO_D(0x7180, D_ALL);
-	MMIO_D(0x7408, D_ALL);
-	MMIO_D(0x7c00, D_ALL);
+	MMIO_D(_MMIO(0x6200), D_ALL);
+	MMIO_D(_MMIO(0x6204), D_ALL);
+	MMIO_D(_MMIO(0x6208), D_ALL);
+	MMIO_D(_MMIO(0x7118), D_ALL);
+	MMIO_D(_MMIO(0x7180), D_ALL);
+	MMIO_D(_MMIO(0x7408), D_ALL);
+	MMIO_D(_MMIO(0x7c00), D_ALL);
 	MMIO_DH(GEN6_MBCTL, D_ALL, NULL, mbctl_write);
-	MMIO_D(0x911c, D_ALL);
-	MMIO_D(0x9120, D_ALL);
+	MMIO_D(_MMIO(0x911c), D_ALL);
+	MMIO_D(_MMIO(0x9120), D_ALL);
 	MMIO_DFH(GEN7_UCGCTL4, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_D(GAB_CTL, D_ALL);
-	MMIO_D(0x48800, D_ALL);
-	MMIO_D(0xce044, D_ALL);
-	MMIO_D(0xe6500, D_ALL);
-	MMIO_D(0xe6504, D_ALL);
-	MMIO_D(0xe6600, D_ALL);
-	MMIO_D(0xe6604, D_ALL);
-	MMIO_D(0xe6700, D_ALL);
-	MMIO_D(0xe6704, D_ALL);
-	MMIO_D(0xe6800, D_ALL);
-	MMIO_D(0xe6804, D_ALL);
+	MMIO_D(_MMIO(0x48800), D_ALL);
+	MMIO_D(_MMIO(0xce044), D_ALL);
+	MMIO_D(_MMIO(0xe6500), D_ALL);
+	MMIO_D(_MMIO(0xe6504), D_ALL);
+	MMIO_D(_MMIO(0xe6600), D_ALL);
+	MMIO_D(_MMIO(0xe6604), D_ALL);
+	MMIO_D(_MMIO(0xe6700), D_ALL);
+	MMIO_D(_MMIO(0xe6704), D_ALL);
+	MMIO_D(_MMIO(0xe6800), D_ALL);
+	MMIO_D(_MMIO(0xe6804), D_ALL);
 	MMIO_D(PCH_GMBUS4, D_ALL);
 	MMIO_D(PCH_GMBUS5, D_ALL);
 
-	MMIO_D(0x902c, D_ALL);
-	MMIO_D(0xec008, D_ALL);
-	MMIO_D(0xec00c, D_ALL);
-	MMIO_D(0xec008 + 0x18, D_ALL);
-	MMIO_D(0xec00c + 0x18, D_ALL);
-	MMIO_D(0xec008 + 0x18 * 2, D_ALL);
-	MMIO_D(0xec00c + 0x18 * 2, D_ALL);
-	MMIO_D(0xec008 + 0x18 * 3, D_ALL);
-	MMIO_D(0xec00c + 0x18 * 3, D_ALL);
-	MMIO_D(0xec408, D_ALL);
-	MMIO_D(0xec40c, D_ALL);
-	MMIO_D(0xec408 + 0x18, D_ALL);
-	MMIO_D(0xec40c + 0x18, D_ALL);
-	MMIO_D(0xec408 + 0x18 * 2, D_ALL);
-	MMIO_D(0xec40c + 0x18 * 2, D_ALL);
-	MMIO_D(0xec408 + 0x18 * 3, D_ALL);
-	MMIO_D(0xec40c + 0x18 * 3, D_ALL);
-	MMIO_D(0xfc810, D_ALL);
-	MMIO_D(0xfc81c, D_ALL);
-	MMIO_D(0xfc828, D_ALL);
-	MMIO_D(0xfc834, D_ALL);
-	MMIO_D(0xfcc00, D_ALL);
-	MMIO_D(0xfcc0c, D_ALL);
-	MMIO_D(0xfcc18, D_ALL);
-	MMIO_D(0xfcc24, D_ALL);
-	MMIO_D(0xfd000, D_ALL);
-	MMIO_D(0xfd00c, D_ALL);
-	MMIO_D(0xfd018, D_ALL);
-	MMIO_D(0xfd024, D_ALL);
-	MMIO_D(0xfd034, D_ALL);
+	MMIO_D(_MMIO(0x902c), D_ALL);
+	MMIO_D(_MMIO(0xec008), D_ALL);
+	MMIO_D(_MMIO(0xec00c), D_ALL);
+	MMIO_D(_MMIO(0xec008 + 0x18), D_ALL);
+	MMIO_D(_MMIO(0xec00c + 0x18), D_ALL);
+	MMIO_D(_MMIO(0xec008 + 0x18 * 2), D_ALL);
+	MMIO_D(_MMIO(0xec00c + 0x18 * 2), D_ALL);
+	MMIO_D(_MMIO(0xec008 + 0x18 * 3), D_ALL);
+	MMIO_D(_MMIO(0xec00c + 0x18 * 3), D_ALL);
+	MMIO_D(_MMIO(0xec408), D_ALL);
+	MMIO_D(_MMIO(0xec40c), D_ALL);
+	MMIO_D(_MMIO(0xec408 + 0x18), D_ALL);
+	MMIO_D(_MMIO(0xec40c + 0x18), D_ALL);
+	MMIO_D(_MMIO(0xec408 + 0x18 * 2), D_ALL);
+	MMIO_D(_MMIO(0xec40c + 0x18 * 2), D_ALL);
+	MMIO_D(_MMIO(0xec408 + 0x18 * 3), D_ALL);
+	MMIO_D(_MMIO(0xec40c + 0x18 * 3), D_ALL);
+	MMIO_D(_MMIO(0xfc810), D_ALL);
+	MMIO_D(_MMIO(0xfc81c), D_ALL);
+	MMIO_D(_MMIO(0xfc828), D_ALL);
+	MMIO_D(_MMIO(0xfc834), D_ALL);
+	MMIO_D(_MMIO(0xfcc00), D_ALL);
+	MMIO_D(_MMIO(0xfcc0c), D_ALL);
+	MMIO_D(_MMIO(0xfcc18), D_ALL);
+	MMIO_D(_MMIO(0xfcc24), D_ALL);
+	MMIO_D(_MMIO(0xfd000), D_ALL);
+	MMIO_D(_MMIO(0xfd00c), D_ALL);
+	MMIO_D(_MMIO(0xfd018), D_ALL);
+	MMIO_D(_MMIO(0xfd024), D_ALL);
+	MMIO_D(_MMIO(0xfd034), D_ALL);
 
 	MMIO_DH(FPGA_DBG, D_ALL, NULL, fpga_dbg_mmio_write);
-	MMIO_D(0x2054, D_ALL);
-	MMIO_D(0x12054, D_ALL);
-	MMIO_D(0x22054, D_ALL);
-	MMIO_D(0x1a054, D_ALL);
-
-	MMIO_D(0x44070, D_ALL);
-	MMIO_DFH(0x215c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2178, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x12178, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x1217c, D_ALL, F_CMD_ACCESS, NULL, NULL);
-
-	MMIO_F(0x2290, 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
-	MMIO_D(0x2b00, D_BDW_PLUS);
-	MMIO_D(0x2360, D_BDW_PLUS);
-	MMIO_F(0x5200, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5240, 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
-	MMIO_F(0x5280, 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
-
-	MMIO_DFH(0x1c17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x1c178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(_MMIO(0x2054), D_ALL);
+	MMIO_D(_MMIO(0x12054), D_ALL);
+	MMIO_D(_MMIO(0x22054), D_ALL);
+	MMIO_D(_MMIO(0x1a054), D_ALL);
+
+	MMIO_D(_MMIO(0x44070), D_ALL);
+	MMIO_DFH(_MMIO(0x215c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x12178), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1217c), D_ALL, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_F(_MMIO(0x2290), 8, F_CMD_ACCESS, 0, 0, D_BDW_PLUS, NULL, NULL);
+	MMIO_D(_MMIO(0x2b00), D_BDW_PLUS);
+	MMIO_D(_MMIO(0x2360), D_BDW_PLUS);
+	MMIO_F(_MMIO(0x5200), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x5240), 32, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+	MMIO_F(_MMIO(0x5280), 16, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x1c17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1c178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(BCS_SWCTRL, D_ALL, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_F(HS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
@@ -2410,24 +2412,24 @@ static int init_generic_mmio_info(struct intel_gvt *gvt)
 	MMIO_F(CL_PRIMITIVES_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 	MMIO_F(PS_INVOCATION_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
 	MMIO_F(PS_DEPTH_COUNT, 8, F_CMD_ACCESS, 0, 0, D_ALL, NULL, NULL);
-	MMIO_DH(0x4260, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
-	MMIO_DH(0x4264, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
-	MMIO_DH(0x4268, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
-	MMIO_DH(0x426c, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
-	MMIO_DH(0x4270, D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
-	MMIO_DFH(0x4094, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DH(_MMIO(0x4260), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4264), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4268), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x426c), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DH(_MMIO(0x4270), D_BDW_PLUS, NULL, gvt_reg_tlb_control_handler);
+	MMIO_DFH(_MMIO(0x4094), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
 	MMIO_DFH(ARB_MODE, D_ALL, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_RING_GM_RDR(RING_BBADDR, D_ALL, NULL, NULL);
-	MMIO_DFH(0x2220, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x12220, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x22220, D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x12220), D_ALL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x22220), D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_RING_DFH(RING_SYNC_1, D_ALL, F_CMD_ACCESS, NULL, NULL);
 	MMIO_RING_DFH(RING_SYNC_0, D_ALL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x22178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x1a178, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x1a17c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2217c, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x22178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1a178), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x1a17c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2217c), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2501,40 +2503,40 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_RING_DFH(RING_ACTHD_UDW, D_BDW_PLUS, F_CMD_ACCESS,
 		mmio_read_from_hw, NULL);
 
-#define RING_REG(base) (base + 0xd0)
+#define RING_REG(base) _MMIO((base) + 0xd0)
 	MMIO_RING_F(RING_REG, 4, F_RO, 0,
 		~_MASKED_BIT_ENABLE(RESET_CTL_REQUEST_RESET), D_BDW_PLUS, NULL,
 		ring_reset_ctl_write);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x230)
+#define RING_REG(base) _MMIO((base) + 0x230)
 	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, 0, NULL, elsp_mmio_write);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x234)
+#define RING_REG(base) _MMIO((base) + 0x234)
 	MMIO_RING_F(RING_REG, 8, F_RO | F_CMD_ACCESS, 0, ~0, D_BDW_PLUS,
 		NULL, NULL);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x244)
+#define RING_REG(base) _MMIO((base) + 0x244)
 	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x370)
+#define RING_REG(base) _MMIO((base) + 0x370)
 	MMIO_RING_F(RING_REG, 48, F_RO, 0, ~0, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
-#define RING_REG(base) (base + 0x3a0)
+#define RING_REG(base) _MMIO((base) + 0x3a0)
 	MMIO_RING_DFH(RING_REG, D_BDW_PLUS, F_MODE_MASK, NULL, NULL);
 #undef RING_REG
 
 	MMIO_D(PIPEMISC(PIPE_A), D_BDW_PLUS);
 	MMIO_D(PIPEMISC(PIPE_B), D_BDW_PLUS);
 	MMIO_D(PIPEMISC(PIPE_C), D_BDW_PLUS);
-	MMIO_D(0x1c1d0, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x1c1d0), D_BDW_PLUS);
 	MMIO_D(GEN6_MBCUNIT_SNPCR, D_BDW_PLUS);
 	MMIO_D(GEN7_MISCCPCTL, D_BDW_PLUS);
-	MMIO_D(0x1c054, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x1c054), D_BDW_PLUS);
 
 	MMIO_DH(GEN6_PCODE_MAILBOX, D_BDW_PLUS, NULL, mailbox_write);
 
@@ -2543,7 +2545,7 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 
 	MMIO_D(GAMTARBMODE, D_BDW_PLUS);
 
-#define RING_REG(base) (base + 0x270)
+#define RING_REG(base) _MMIO((base) + 0x270)
 	MMIO_RING_F(RING_REG, 32, 0, 0, 0, D_BDW_PLUS, NULL, NULL);
 #undef RING_REG
 
@@ -2556,10 +2558,10 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(CHICKEN_PIPESL_1(PIPE_C), D_BDW_PLUS);
 
 	MMIO_D(WM_MISC, D_BDW);
-	MMIO_D(BDW_EDP_PSR_BASE, D_BDW);
+	MMIO_D(_MMIO(BDW_EDP_PSR_BASE), D_BDW);
 
-	MMIO_D(0x66c00, D_BDW_PLUS);
-	MMIO_D(0x66c04, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x66c00), D_BDW_PLUS);
+	MMIO_D(_MMIO(0x66c04), D_BDW_PLUS);
 
 	MMIO_D(HSW_GTT_CACHE_EN, D_BDW_PLUS);
 
@@ -2567,54 +2569,54 @@ static int init_broadwell_mmio_info(struct intel_gvt *gvt)
 	MMIO_D(GEN8_EU_DISABLE1, D_BDW_PLUS);
 	MMIO_D(GEN8_EU_DISABLE2, D_BDW_PLUS);
 
-	MMIO_D(0xfdc, D_BDW_PLUS);
+	MMIO_D(_MMIO(0xfdc), D_BDW_PLUS);
 	MMIO_DFH(GEN8_ROW_CHICKEN, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
 	MMIO_DFH(GEN7_ROW_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
 	MMIO_DFH(GEN8_UCGCTL6, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_DFH(0xb1f0, D_BDW, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xb1c0, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb1f0), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb1c0), D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN8_L3SQCREG4, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xb100, D_BDW, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xb10c, D_BDW, F_CMD_ACCESS, NULL, NULL);
-	MMIO_D(0xb110, D_BDW);
+	MMIO_DFH(_MMIO(0xb100), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xb10c), D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(_MMIO(0xb110), D_BDW);
 
-	MMIO_F(0x24d0, 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
+	MMIO_F(_MMIO(0x24d0), 48, F_CMD_ACCESS, 0, 0, D_BDW_PLUS,
 		NULL, force_nonpriv_write);
 
-	MMIO_D(0x44484, D_BDW_PLUS);
-	MMIO_D(0x4448c, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x44484), D_BDW_PLUS);
+	MMIO_D(_MMIO(0x4448c), D_BDW_PLUS);
 
-	MMIO_DFH(0x83a4, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x83a4), D_BDW, F_CMD_ACCESS, NULL, NULL);
 	MMIO_D(GEN8_L3_LRA_1_GPGPU, D_BDW_PLUS);
 
-	MMIO_DFH(0x8430, D_BDW, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x8430), D_BDW, F_CMD_ACCESS, NULL, NULL);
 
-	MMIO_D(0x110000, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x110000), D_BDW_PLUS);
 
-	MMIO_D(0x48400, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x48400), D_BDW_PLUS);
 
-	MMIO_D(0x6e570, D_BDW_PLUS);
-	MMIO_D(0x65f10, D_BDW_PLUS);
+	MMIO_D(_MMIO(0x6e570), D_BDW_PLUS);
+	MMIO_D(_MMIO(0x65f10), D_BDW_PLUS);
 
-	MMIO_DFH(0xe194, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe188, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe194), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe188), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(HALF_SLICE_CHICKEN2, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x2580, D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
-
-	MMIO_DFH(0x2248, D_BDW, F_CMD_ACCESS, NULL, NULL);
-
-	MMIO_DFH(0xe220, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe230, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe240, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe260, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe270, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe280, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe2a0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe2b0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0xe2c0, D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x2580), D_BDW_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0x2248), D_BDW, F_CMD_ACCESS, NULL, NULL);
+
+	MMIO_DFH(_MMIO(0xe220), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe230), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe240), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe260), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe270), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe280), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2a0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2b0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0xe2c0), D_BDW_PLUS, F_CMD_ACCESS, NULL, NULL);
 	return 0;
 }
 
@@ -2630,11 +2632,11 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(FORCEWAKE_MEDIA_GEN9, D_SKL_PLUS, NULL, mul_force_wake_write);
 	MMIO_DH(FORCEWAKE_ACK_MEDIA_GEN9, D_SKL_PLUS, NULL, NULL);
 
-	MMIO_F(_DPB_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(_MMIO(_DPB_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPC_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(_MMIO(_DPC_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
-	MMIO_F(_DPD_AUX_CH_CTL, 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
+	MMIO_F(_MMIO(_DPD_AUX_CH_CTL), 6 * 4, 0, 0, 0, D_SKL_PLUS, NULL,
 						dp_aux_ch_ctl_mmio_write);
 
 	/*
@@ -2645,26 +2647,26 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(HSW_PWR_WELL_CTL_DRIVER(SKL_DISP_PW_MISC_IO), D_SKL_PLUS, NULL,
 		skl_power_well_ctl_write);
 
-	MMIO_D(0xa210, D_SKL_PLUS);
+	MMIO_D(_MMIO(0xa210), D_SKL_PLUS);
 	MMIO_D(GEN9_MEDIA_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_D(GEN9_RENDER_PG_IDLE_HYSTERESIS, D_SKL_PLUS);
 	MMIO_DFH(GEN9_GAMT_ECO_REG_RW_IA, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DH(0x4ddc, D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(0x42080, D_SKL_PLUS, NULL, NULL);
-	MMIO_D(0x45504, D_SKL_PLUS);
-	MMIO_D(0x45520, D_SKL_PLUS);
-	MMIO_D(0x46000, D_SKL_PLUS);
-	MMIO_DH(0x46010, D_SKL | D_KBL, NULL, skl_lcpll_write);
-	MMIO_DH(0x46014, D_SKL | D_KBL, NULL, skl_lcpll_write);
-	MMIO_D(0x6C040, D_SKL | D_KBL);
-	MMIO_D(0x6C048, D_SKL | D_KBL);
-	MMIO_D(0x6C050, D_SKL | D_KBL);
-	MMIO_D(0x6C044, D_SKL | D_KBL);
-	MMIO_D(0x6C04C, D_SKL | D_KBL);
-	MMIO_D(0x6C054, D_SKL | D_KBL);
-	MMIO_D(0x6c058, D_SKL | D_KBL);
-	MMIO_D(0x6c05c, D_SKL | D_KBL);
-	MMIO_DH(0X6c060, D_SKL | D_KBL, dpll_status_read, NULL);
+	MMIO_DH(_MMIO(0x4ddc), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(0x42080), D_SKL_PLUS, NULL, NULL);
+	MMIO_D(_MMIO(0x45504), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x45520), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x46000), D_SKL_PLUS);
+	MMIO_DH(_MMIO(0x46010), D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_DH(_MMIO(0x46014), D_SKL | D_KBL, NULL, skl_lcpll_write);
+	MMIO_D(_MMIO(0x6C040), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6C048), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6C050), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6C044), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6C04C), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6C054), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6c058), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6c05c), D_SKL | D_KBL);
+	MMIO_DH(_MMIO(0x6c060), D_SKL | D_KBL, dpll_status_read, NULL);
 
 	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 0), D_SKL_PLUS, NULL, pf_write);
 	MMIO_DH(SKL_PS_WIN_POS(PIPE_A, 1), D_SKL_PLUS, NULL, pf_write);
@@ -2753,105 +2755,105 @@ static int init_skl_mmio_info(struct intel_gvt *gvt)
 	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
 	MMIO_DH(PLANE_NV12_BUF_CFG(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C0(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C0(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C0(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C0(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C0(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C4(PIPE_A, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_A, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_A, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C4(PIPE_B, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_B, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_B, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_DH(_REG_701C4(PIPE_C, 1), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 2), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 3), D_SKL_PLUS, NULL, NULL);
-	MMIO_DH(_REG_701C4(PIPE_C, 4), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 1)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 2)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 3)), D_SKL_PLUS, NULL, NULL);
+	MMIO_DH(_MMIO(_REG_701C4(PIPE_C, 4)), D_SKL_PLUS, NULL, NULL);
 
-	MMIO_D(0x70380, D_SKL_PLUS);
-	MMIO_D(0x71380, D_SKL_PLUS);
-	MMIO_D(0x72380, D_SKL_PLUS);
-	MMIO_D(0x7039c, D_SKL_PLUS);
+	MMIO_D(_MMIO(0x70380), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x71380), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x72380), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x7039c), D_SKL_PLUS);
 
-	MMIO_D(0x8f074, D_SKL | D_KBL);
-	MMIO_D(0x8f004, D_SKL | D_KBL);
-	MMIO_D(0x8f034, D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x8f074), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x8f004), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x8f034), D_SKL | D_KBL);
 
-	MMIO_D(0xb11c, D_SKL | D_KBL);
+	MMIO_D(_MMIO(0xb11c), D_SKL | D_KBL);
 
-	MMIO_D(0x51000, D_SKL | D_KBL);
-	MMIO_D(0x6c00c, D_SKL_PLUS);
+	MMIO_D(_MMIO(0x51000), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6c00c), D_SKL_PLUS);
 
-	MMIO_F(0xc800, 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
-	MMIO_F(0xb020, 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+	MMIO_F(_MMIO(0xc800), 0x7f8, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
+	MMIO_F(_MMIO(0xb020), 0x80, F_CMD_ACCESS, 0, 0, D_SKL | D_KBL, NULL, NULL);
 
-	MMIO_D(0xd08, D_SKL_PLUS);
-	MMIO_DFH(0x20e0, D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
-	MMIO_DFH(0x20ec, D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
+	MMIO_D(_MMIO(0xd08), D_SKL_PLUS);
+	MMIO_DFH(_MMIO(0x20e0), D_SKL_PLUS, F_MODE_MASK, NULL, NULL);
+	MMIO_DFH(_MMIO(0x20ec), D_SKL_PLUS, F_MODE_MASK | F_CMD_ACCESS, NULL, NULL);
 
 	/* TRTT */
-	MMIO_DFH(0x4de0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4de8, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4dec, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df0, D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
-	MMIO_DFH(0x4df4, D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
-	MMIO_DH(0x4dfc, D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);
+	MMIO_DFH(_MMIO(0x4de0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x4de4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x4de8), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x4dec), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x4df0), D_SKL | D_KBL, F_CMD_ACCESS, NULL, NULL);
+	MMIO_DFH(_MMIO(0x4df4), D_SKL | D_KBL, F_CMD_ACCESS, NULL, gen9_trtte_write);
+	MMIO_DH(_MMIO(0x4dfc), D_SKL | D_KBL, NULL, gen9_trtt_chicken_write);
 
-	MMIO_D(0x45008, D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x45008), D_SKL | D_KBL);
 
-	MMIO_D(0x46430, D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x46430), D_SKL | D_KBL);
 
-	MMIO_D(0x46520, D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x46520), D_SKL | D_KBL);
 
-	MMIO_D(0xc403c, D_SKL | D_KBL);
-	MMIO_D(0xb004, D_SKL_PLUS);
+	MMIO_D(_MMIO(0xc403c), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0xb004), D_SKL_PLUS);
 	MMIO_DH(DMA_CTRL, D_SKL_PLUS, NULL, dma_ctrl_write);
 
-	MMIO_D(0x65900, D_SKL_PLUS);
-	MMIO_D(0x1082c0, D_SKL | D_KBL);
-	MMIO_D(0x4068, D_SKL | D_KBL);
-	MMIO_D(0x67054, D_SKL | D_KBL);
-	MMIO_D(0x6e560, D_SKL | D_KBL);
-	MMIO_D(0x6e554, D_SKL | D_KBL);
-	MMIO_D(0x2b20, D_SKL | D_KBL);
-	MMIO_D(0x65f00, D_SKL | D_KBL);
-	MMIO_D(0x65f08, D_SKL | D_KBL);
-	MMIO_D(0x320f0, D_SKL | D_KBL);
-
-	MMIO_D(0x70034, D_SKL_PLUS);
-	MMIO_D(0x71034, D_SKL_PLUS);
-	MMIO_D(0x72034, D_SKL_PLUS);
-
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_A), D_SKL_PLUS);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_B), D_SKL_PLUS);
-	MMIO_D(_PLANE_KEYVAL_1(PIPE_C), D_SKL_PLUS);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_A), D_SKL_PLUS);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_B), D_SKL_PLUS);
-	MMIO_D(_PLANE_KEYMSK_1(PIPE_C), D_SKL_PLUS);
-
-	MMIO_D(0x44500, D_SKL_PLUS);
+	MMIO_D(_MMIO(0x65900), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x1082c0), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x4068), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x67054), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6e560), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x6e554), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x2b20), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x65f00), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x65f08), D_SKL | D_KBL);
+	MMIO_D(_MMIO(0x320f0), D_SKL | D_KBL);
+
+	MMIO_D(_MMIO(0x70034), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x71034), D_SKL_PLUS);
+	MMIO_D(_MMIO(0x72034), D_SKL_PLUS);
+
+	MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_A)), D_SKL_PLUS);
+	MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_B)), D_SKL_PLUS);
+	MMIO_D(_MMIO(_PLANE_KEYVAL_1(PIPE_C)), D_SKL_PLUS);
+	MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_A)), D_SKL_PLUS);
+	MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_B)), D_SKL_PLUS);
+	MMIO_D(_MMIO(_PLANE_KEYMSK_1(PIPE_C)), D_SKL_PLUS);
+
+	MMIO_D(_MMIO(0x44500), D_SKL_PLUS);
 	MMIO_DFH(GEN9_CSFE_CHICKEN1_RCS, D_SKL_PLUS, F_CMD_ACCESS, NULL, NULL);
 	MMIO_DFH(GEN8_HDC_CHICKEN1, D_SKL | D_KBL, F_MODE_MASK | F_CMD_ACCESS,
 		NULL, NULL);
 
-	MMIO_D(0x4ab8, D_KBL);
-	MMIO_D(0x2248, D_SKL_PLUS | D_KBL);
+	MMIO_D(_MMIO(0x4ab8), D_KBL);
+	MMIO_D(_MMIO(0x2248), D_SKL_PLUS | D_KBL);
 
 	return 0;
 }
@@ -2867,8 +2869,8 @@ static struct gvt_mmio_block *find_mmio_block(struct intel_gvt *gvt,
 	for (i = 0; i < num; i++, block++) {
 		if (!(device & block->device))
 			continue;
-		if (offset >= INTEL_GVT_MMIO_OFFSET(block->offset) &&
-		    offset < INTEL_GVT_MMIO_OFFSET(block->offset) + block->size)
+		if (offset >= i915_mmio_reg_offset(block->offset) &&
+		    offset < i915_mmio_reg_offset(block->offset) + block->size)
 			return block;
 	}
 	return NULL;
@@ -2980,8 +2982,8 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
 	for (i = 0; i < gvt->mmio.num_mmio_block; i++, block++) {
 		for (j = 0; j < block->size; j += 4) {
 			ret = handler(gvt,
-				INTEL_GVT_MMIO_OFFSET(block->offset) + j,
-				data);
+				      i915_mmio_reg_offset(block->offset) + j,
+				      data);
 			if (ret)
 				return ret;
 		}
diff --git a/drivers/gpu/drm/i915/gvt/hypercall.h b/drivers/gpu/drm/i915/gvt/hypercall.h
index df7f33abd393..a1bd82feb827 100644
--- a/drivers/gpu/drm/i915/gvt/hypercall.h
+++ b/drivers/gpu/drm/i915/gvt/hypercall.h
@@ -55,6 +55,9 @@ struct intel_gvt_mpt {
 			      unsigned long mfn, unsigned int nr, bool map);
 	int (*set_trap_area)(unsigned long handle, u64 start, u64 end,
 			     bool map);
+	int (*set_opregion)(void *vgpu);
+	int (*get_vfio_device)(void *vgpu);
+	void (*put_vfio_device)(void *vgpu);
 };
 
 extern struct intel_gvt_mpt xengt_mpt;
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index 110f07e8bcfb..45bab5a6290b 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -53,11 +53,23 @@ static const struct intel_gvt_ops *intel_gvt_ops;
 #define VFIO_PCI_INDEX_TO_OFFSET(index) ((u64)(index) << VFIO_PCI_OFFSET_SHIFT)
 #define VFIO_PCI_OFFSET_MASK    (((u64)(1) << VFIO_PCI_OFFSET_SHIFT) - 1)
 
+#define OPREGION_SIGNATURE "IntelGraphicsMem"
+
+struct vfio_region;
+struct intel_vgpu_regops {
+	size_t (*rw)(struct intel_vgpu *vgpu, char *buf,
+			size_t count, loff_t *ppos, bool iswrite);
+	void (*release)(struct intel_vgpu *vgpu,
+			struct vfio_region *region);
+};
+
 struct vfio_region {
 	u32				type;
 	u32				subtype;
 	size_t				size;
 	u32				flags;
+	const struct intel_vgpu_regops	*ops;
+	void				*data;
 };
 
 struct kvmgt_pgfn {
@@ -316,6 +328,108 @@ static void kvmgt_protect_table_del(struct kvmgt_guest_info *info,
 	}
 }
 
+static size_t intel_vgpu_reg_rw_opregion(struct intel_vgpu *vgpu, char *buf,
+		size_t count, loff_t *ppos, bool iswrite)
+{
+	unsigned int i = VFIO_PCI_OFFSET_TO_INDEX(*ppos) -
+			VFIO_PCI_NUM_REGIONS;
+	void *base = vgpu->vdev.region[i].data;
+	loff_t pos = *ppos & VFIO_PCI_OFFSET_MASK;
+
+	if (pos >= vgpu->vdev.region[i].size || iswrite) {
+		gvt_vgpu_err("invalid op or offset for Intel vgpu OpRegion\n");
+		return -EINVAL;
+	}
+	count = min(count, (size_t)(vgpu->vdev.region[i].size - pos));
+	memcpy(buf, base + pos, count);
+
+	return count;
+}
+
+static void intel_vgpu_reg_release_opregion(struct intel_vgpu *vgpu,
+		struct vfio_region *region)
+{
+}
+
+static const struct intel_vgpu_regops intel_vgpu_regops_opregion = {
+	.rw = intel_vgpu_reg_rw_opregion,
+	.release = intel_vgpu_reg_release_opregion,
+};
+
+static int intel_vgpu_register_reg(struct intel_vgpu *vgpu,
+		unsigned int type, unsigned int subtype,
+		const struct intel_vgpu_regops *ops,
+		size_t size, u32 flags, void *data)
+{
+	struct vfio_region *region;
+
+	region = krealloc(vgpu->vdev.region,
+			(vgpu->vdev.num_regions + 1) * sizeof(*region),
+			GFP_KERNEL);
+	if (!region)
+		return -ENOMEM;
+
+	vgpu->vdev.region = region;
+	vgpu->vdev.region[vgpu->vdev.num_regions].type = type;
+	vgpu->vdev.region[vgpu->vdev.num_regions].subtype = subtype;
+	vgpu->vdev.region[vgpu->vdev.num_regions].ops = ops;
+	vgpu->vdev.region[vgpu->vdev.num_regions].size = size;
+	vgpu->vdev.region[vgpu->vdev.num_regions].flags = flags;
+	vgpu->vdev.region[vgpu->vdev.num_regions].data = data;
+	vgpu->vdev.num_regions++;
+	return 0;
+}
+
+static int kvmgt_get_vfio_device(void *p_vgpu)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+
+	vgpu->vdev.vfio_device = vfio_device_get_from_dev(
+		mdev_dev(vgpu->vdev.mdev));
+	if (!vgpu->vdev.vfio_device) {
+		gvt_vgpu_err("failed to get vfio device\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+
+static int kvmgt_set_opregion(void *p_vgpu)
+{
+	struct intel_vgpu *vgpu = (struct intel_vgpu *)p_vgpu;
+	void *base;
+	int ret;
+
+	/* Each vgpu has its own opregion, although VFIO would create another
+	 * one later. This one is used to expose opregion to VFIO. And the
+	 * other one created by VFIO later, is used by guest actually.
+	 */
+	base = vgpu_opregion(vgpu)->va;
+	if (!base)
+		return -ENOMEM;
+
+	if (memcmp(base, OPREGION_SIGNATURE, 16)) {
+		memunmap(base);
+		return -EINVAL;
+	}
+
+	ret = intel_vgpu_register_reg(vgpu,
+			PCI_VENDOR_ID_INTEL | VFIO_REGION_TYPE_PCI_VENDOR_TYPE,
+			VFIO_REGION_SUBTYPE_INTEL_IGD_OPREGION,
+			&intel_vgpu_regops_opregion, OPREGION_SIZE,
+			VFIO_REGION_INFO_FLAG_READ, base);
+
+	return ret;
+}
+
+static void kvmgt_put_vfio_device(void *vgpu)
+{
+	if (WARN_ON(!((struct intel_vgpu *)vgpu)->vdev.vfio_device))
+		return;
+
+	vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device);
+}
+
 static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
 {
 	struct intel_vgpu *vgpu = NULL;
@@ -546,7 +660,7 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	int ret = -EINVAL;
 
 
-	if (index >= VFIO_PCI_NUM_REGIONS) {
+	if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions) {
 		gvt_vgpu_err("invalid index: %u\n", index);
 		return -EINVAL;
 	}
@@ -574,8 +688,14 @@ static ssize_t intel_vgpu_rw(struct mdev_device *mdev, char *buf,
 	case VFIO_PCI_BAR5_REGION_INDEX:
 	case VFIO_PCI_VGA_REGION_INDEX:
 	case VFIO_PCI_ROM_REGION_INDEX:
+		break;
 	default:
-		gvt_vgpu_err("unsupported region: %u\n", index);
+		if (index >= VFIO_PCI_NUM_REGIONS + vgpu->vdev.num_regions)
+			return -EINVAL;
+
+		index -= VFIO_PCI_NUM_REGIONS;
+		return vgpu->vdev.region[index].ops->rw(vgpu, buf, count,
+				ppos, is_write);
 	}
 
 	return ret == 0 ? count : ret;
@@ -838,7 +958,8 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 
 		info.flags = VFIO_DEVICE_FLAGS_PCI;
 		info.flags |= VFIO_DEVICE_FLAGS_RESET;
-		info.num_regions = VFIO_PCI_NUM_REGIONS;
+		info.num_regions = VFIO_PCI_NUM_REGIONS +
+				vgpu->vdev.num_regions;
 		info.num_irqs = VFIO_PCI_NUM_IRQS;
 
 		return copy_to_user((void __user *)arg, &info, minsz) ?
@@ -908,13 +1029,17 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		case VFIO_PCI_BAR3_REGION_INDEX ... VFIO_PCI_BAR5_REGION_INDEX:
 			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
 			info.size = 0;
-
 			info.flags = 0;
+
 			gvt_dbg_core("get region info bar:%d\n", info.index);
 			break;
 
 		case VFIO_PCI_ROM_REGION_INDEX:
 		case VFIO_PCI_VGA_REGION_INDEX:
+			info.offset = VFIO_PCI_INDEX_TO_OFFSET(info.index);
+			info.size = 0;
+			info.flags = 0;
+
 			gvt_dbg_core("get region info index:%d\n", info.index);
 			break;
 		default:
@@ -959,6 +1084,7 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 		}
 
 		if (caps.size) {
+			info.flags |= VFIO_REGION_INFO_FLAG_CAPS;
 			if (info.argsz < sizeof(info) + caps.size) {
 				info.argsz = sizeof(info) + caps.size;
 				info.cap_offset = 0;
@@ -1045,6 +1171,33 @@ static long intel_vgpu_ioctl(struct mdev_device *mdev, unsigned int cmd,
 	} else if (cmd == VFIO_DEVICE_RESET) {
 		intel_gvt_ops->vgpu_reset(vgpu);
 		return 0;
+	} else if (cmd == VFIO_DEVICE_QUERY_GFX_PLANE) {
+		struct vfio_device_gfx_plane_info dmabuf;
+		int ret = 0;
+
+		minsz = offsetofend(struct vfio_device_gfx_plane_info,
+				    dmabuf_id);
+		if (copy_from_user(&dmabuf, (void __user *)arg, minsz))
+			return -EFAULT;
+		if (dmabuf.argsz < minsz)
+			return -EINVAL;
+
+		ret = intel_gvt_ops->vgpu_query_plane(vgpu, &dmabuf);
+		if (ret != 0)
+			return ret;
+
+		return copy_to_user((void __user *)arg, &dmabuf, minsz) ?
+								-EFAULT : 0;
+	} else if (cmd == VFIO_DEVICE_GET_GFX_DMABUF) {
+		__u32 dmabuf_id;
+		__s32 dmabuf_fd;
+
+		if (get_user(dmabuf_id, (__u32 __user *)arg))
+			return -EFAULT;
+
+		dmabuf_fd = intel_gvt_ops->vgpu_get_dmabuf(vgpu, dmabuf_id);
+		return dmabuf_fd;
+
 	}
 
 	return 0;
@@ -1207,8 +1360,8 @@ static void kvmgt_page_track_write(struct kvm_vcpu *vcpu, gpa_t gpa,
 					struct kvmgt_guest_info, track_node);
 
 	if (kvmgt_gfn_is_write_protected(info, gpa_to_gfn(gpa)))
-		intel_gvt_ops->emulate_mmio_write(info->vgpu, gpa,
-					(void *)val, len);
+		intel_gvt_ops->write_protect_handler(info->vgpu, gpa,
+						     (void *)val, len);
 }
 
 static void kvmgt_page_track_flush_slot(struct kvm *kvm,
@@ -1286,6 +1439,9 @@ static int kvmgt_guest_init(struct mdev_device *mdev)
 	kvmgt_protect_table_init(info);
 	gvt_cache_init(vgpu);
 
+	mutex_init(&vgpu->dmabuf_lock);
+	init_completion(&vgpu->vblank_done);
+
 	info->track_node.track_write = kvmgt_page_track_write;
 	info->track_node.track_flush_slot = kvmgt_page_track_flush_slot;
 	kvm_page_track_register_notifier(kvm, &info->track_node);
@@ -1426,6 +1582,9 @@ struct intel_gvt_mpt kvmgt_mpt = {
 	.read_gpa = kvmgt_read_gpa,
 	.write_gpa = kvmgt_write_gpa,
 	.gfn_to_mfn = kvmgt_gfn_to_pfn,
+	.set_opregion = kvmgt_set_opregion,
+	.get_vfio_device = kvmgt_get_vfio_device,
+	.put_vfio_device = kvmgt_put_vfio_device,
 };
 EXPORT_SYMBOL_GPL(kvmgt_mpt);
 
diff --git a/drivers/gpu/drm/i915/gvt/mmio.c b/drivers/gpu/drm/i915/gvt/mmio.c
index 4ea0feb5f04d..562b5ad857a4 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.c
+++ b/drivers/gpu/drm/i915/gvt/mmio.c
@@ -117,25 +117,6 @@ static void failsafe_emulate_mmio_rw(struct intel_vgpu *vgpu, uint64_t pa,
 		else
 			memcpy(pt, p_data, bytes);
 
-	} else if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
-		struct intel_vgpu_page_track *t;
-
-		/* Since we enter the failsafe mode early during guest boot,
-		 * guest may not have chance to set up its ppgtt table, so
-		 * there should not be any wp pages for guest. Keep the wp
-		 * related code here in case we need to handle it in furture.
-		 */
-		t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
-		if (t) {
-			/* remove write protection to prevent furture traps */
-			intel_vgpu_clean_page_track(vgpu, t);
-			if (read)
-				intel_gvt_hypervisor_read_gpa(vgpu, pa,
-						p_data, bytes);
-			else
-				intel_gvt_hypervisor_write_gpa(vgpu, pa,
-						p_data, bytes);
-		}
 	}
 	mutex_unlock(&gvt->lock);
 }
@@ -157,7 +138,6 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 	unsigned int offset = 0;
 	int ret = -EINVAL;
 
-
 	if (vgpu->failsafe) {
 		failsafe_emulate_mmio_rw(vgpu, pa, p_data, bytes, true);
 		return 0;
@@ -166,26 +146,7 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 
 	if (vgpu_gpa_is_aperture(vgpu, pa)) {
 		ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, true);
-		mutex_unlock(&gvt->lock);
-		return ret;
-	}
-
-	if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
-		struct intel_vgpu_page_track *t;
-
-		t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
-		if (t) {
-			ret = intel_gvt_hypervisor_read_gpa(vgpu, pa,
-					p_data, bytes);
-			if (ret) {
-				gvt_vgpu_err("guest page read error %d, "
-					"gfn 0x%lx, pa 0x%llx, var 0x%x, len %d\n",
-					ret, t->gfn, pa, *(u32 *)p_data,
-					bytes);
-			}
-			mutex_unlock(&gvt->lock);
-			return ret;
-		}
+		goto out;
 	}
 
 	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
@@ -205,14 +166,12 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 				p_data, bytes);
 		if (ret)
 			goto err;
-		mutex_unlock(&gvt->lock);
-		return ret;
+		goto out;
 	}
 
 	if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) {
 		ret = intel_gvt_hypervisor_read_gpa(vgpu, pa, p_data, bytes);
-		mutex_unlock(&gvt->lock);
-		return ret;
+		goto out;
 	}
 
 	if (WARN_ON(!reg_is_mmio(gvt, offset + bytes - 1)))
@@ -228,11 +187,13 @@ int intel_vgpu_emulate_mmio_read(struct intel_vgpu *vgpu, uint64_t pa,
 		goto err;
 
 	intel_gvt_mmio_set_accessed(gvt, offset);
-	mutex_unlock(&gvt->lock);
-	return 0;
+	ret = 0;
+	goto out;
+
 err:
 	gvt_vgpu_err("fail to emulate MMIO read %08x len %d\n",
 			offset, bytes);
+out:
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
@@ -263,26 +224,7 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 
 	if (vgpu_gpa_is_aperture(vgpu, pa)) {
 		ret = vgpu_aperture_rw(vgpu, pa, p_data, bytes, false);
-		mutex_unlock(&gvt->lock);
-		return ret;
-	}
-
-	if (atomic_read(&vgpu->gtt.n_tracked_guest_page)) {
-		struct intel_vgpu_page_track *t;
-
-		t = intel_vgpu_find_tracked_page(vgpu, pa >> PAGE_SHIFT);
-		if (t) {
-			ret = t->handler(t, pa, p_data, bytes);
-			if (ret) {
-				gvt_err("guest page write error %d, "
-					"gfn 0x%lx, pa 0x%llx, "
-					"var 0x%x, len %d\n",
-					ret, t->gfn, pa,
-					*(u32 *)p_data, bytes);
-			}
-			mutex_unlock(&gvt->lock);
-			return ret;
-		}
+		goto out;
 	}
 
 	offset = intel_vgpu_gpa_to_mmio_offset(vgpu, pa);
@@ -302,14 +244,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 				p_data, bytes);
 		if (ret)
 			goto err;
-		mutex_unlock(&gvt->lock);
-		return ret;
+		goto out;
 	}
 
 	if (WARN_ON_ONCE(!reg_is_mmio(gvt, offset))) {
 		ret = intel_gvt_hypervisor_write_gpa(vgpu, pa, p_data, bytes);
-		mutex_unlock(&gvt->lock);
-		return ret;
+		goto out;
 	}
 
 	ret = intel_vgpu_mmio_reg_rw(vgpu, offset, p_data, bytes, false);
@@ -317,11 +257,12 @@ int intel_vgpu_emulate_mmio_write(struct intel_vgpu *vgpu, uint64_t pa,
 		goto err;
 
 	intel_gvt_mmio_set_accessed(gvt, offset);
-	mutex_unlock(&gvt->lock);
-	return 0;
+	ret = 0;
+	goto out;
 err:
 	gvt_vgpu_err("fail to emulate MMIO write %08x len %d\n", offset,
 		     bytes);
+out:
 	mutex_unlock(&gvt->lock);
 	return ret;
 }
@@ -342,10 +283,10 @@ void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr)
 		memcpy(vgpu->mmio.vreg, mmio, info->mmio_size);
 		memcpy(vgpu->mmio.sreg, mmio, info->mmio_size);
 
-		vgpu_vreg(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
+		vgpu_vreg_t(vgpu, GEN6_GT_THREAD_STATUS_REG) = 0;
 
 		/* set the bit 0:2(Core C-State ) to C0 */
-		vgpu_vreg(vgpu, GEN6_GT_CORE_STATUS) = 0;
+		vgpu_vreg_t(vgpu, GEN6_GT_CORE_STATUS) = 0;
 
 		vgpu->mmio.disable_warn_untrack = false;
 	} else {
diff --git a/drivers/gpu/drm/i915/gvt/mmio.h b/drivers/gpu/drm/i915/gvt/mmio.h
index 62709ac351cd..71b620875943 100644
--- a/drivers/gpu/drm/i915/gvt/mmio.h
+++ b/drivers/gpu/drm/i915/gvt/mmio.h
@@ -76,13 +76,6 @@ int intel_gvt_for_each_tracked_mmio(struct intel_gvt *gvt,
 	int (*handler)(struct intel_gvt *gvt, u32 offset, void *data),
 	void *data);
 
-
-#define INTEL_GVT_MMIO_OFFSET(reg) ({ \
-	typeof(reg) __reg = reg; \
-	u32 *offset = (u32 *)&__reg; \
-	*offset; \
-})
-
 int intel_vgpu_init_mmio(struct intel_vgpu *vgpu);
 void intel_vgpu_reset_mmio(struct intel_vgpu *vgpu, bool dmlr);
 void intel_vgpu_clean_mmio(struct intel_vgpu *vgpu);
diff --git a/drivers/gpu/drm/i915/gvt/mmio_context.c b/drivers/gpu/drm/i915/gvt/mmio_context.c
new file mode 100644
index 000000000000..74834395dd89
--- /dev/null
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.c
@@ -0,0 +1,412 @@
+/*
+ * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ *    Eddie Dong <eddie.dong@intel.com>
+ *    Kevin Tian <kevin.tian@intel.com>
+ *
+ * Contributors:
+ *    Zhi Wang <zhi.a.wang@intel.com>
+ *    Changbin Du <changbin.du@intel.com>
+ *    Zhenyu Wang <zhenyuw@linux.intel.com>
+ *    Tina Zhang <tina.zhang@intel.com>
+ *    Bing Niu <bing.niu@intel.com>
+ *
+ */
+
+#include "i915_drv.h"
+#include "gvt.h"
+#include "trace.h"
+
+/**
+ * Defined in Intel Open Source PRM.
+ * Ref: https://01.org/linuxgraphics/documentation/hardware-specification-prms
+ */
+#define TRVATTL3PTRDW(i)	_MMIO(0x4de0 + (i)*4)
+#define TRNULLDETCT		_MMIO(0x4de8)
+#define TRINVTILEDETCT		_MMIO(0x4dec)
+#define TRVADR			_MMIO(0x4df0)
+#define TRTTE			_MMIO(0x4df4)
+#define RING_EXCC(base)		_MMIO((base) + 0x28)
+#define RING_GFX_MODE(base)	_MMIO((base) + 0x29c)
+#define VF_GUARDBAND		_MMIO(0x83a4)
+
+/* Raw offset is appened to each line for convenience. */
+static struct engine_mmio gen8_engine_mmio_list[] __cacheline_aligned = {
+	{RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
+	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+
+	{BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */
+	{ /* Terminated */ }
+};
+
+static struct engine_mmio gen9_engine_mmio_list[] __cacheline_aligned = {
+	{RCS, GFX_MODE_GEN7, 0xffff, false}, /* 0x229c */
+	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS, HWSTAM, 0x0, false}, /* 0x2098 */
+	{RCS, INSTPM, 0xffff, true}, /* 0x20c0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 0), 0, false}, /* 0x24d0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 1), 0, false}, /* 0x24d4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 2), 0, false}, /* 0x24d8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 3), 0, false}, /* 0x24dc */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 4), 0, false}, /* 0x24e0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 5), 0, false}, /* 0x24e4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 6), 0, false}, /* 0x24e8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 7), 0, false}, /* 0x24ec */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 8), 0, false}, /* 0x24f0 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 9), 0, false}, /* 0x24f4 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 10), 0, false}, /* 0x24f8 */
+	{RCS, RING_FORCE_TO_NONPRIV(RENDER_RING_BASE, 11), 0, false}, /* 0x24fc */
+	{RCS, CACHE_MODE_1, 0xffff, true}, /* 0x7004 */
+	{RCS, GEN7_GT_MODE, 0xffff, true}, /* 0x7008 */
+	{RCS, CACHE_MODE_0_GEN7, 0xffff, true}, /* 0x7000 */
+	{RCS, GEN7_COMMON_SLICE_CHICKEN1, 0xffff, true}, /* 0x7010 */
+	{RCS, HDC_CHICKEN0, 0xffff, true}, /* 0x7300 */
+	{RCS, VF_GUARDBAND, 0xffff, true}, /* 0x83a4 */
+
+	{RCS, GEN8_PRIVATE_PAT_LO, 0, false}, /* 0x40e0 */
+	{RCS, GEN8_PRIVATE_PAT_HI, 0, false}, /* 0x40e4 */
+	{RCS, GEN8_CS_CHICKEN1, 0xffff, true}, /* 0x2580 */
+	{RCS, COMMON_SLICE_CHICKEN2, 0xffff, true}, /* 0x7014 */
+	{RCS, GEN9_CS_DEBUG_MODE1, 0xffff, false}, /* 0x20ec */
+	{RCS, GEN8_L3SQCREG4, 0, false}, /* 0xb118 */
+	{RCS, GEN7_HALF_SLICE_CHICKEN1, 0xffff, true}, /* 0xe100 */
+	{RCS, HALF_SLICE_CHICKEN2, 0xffff, true}, /* 0xe180 */
+	{RCS, HALF_SLICE_CHICKEN3, 0xffff, true}, /* 0xe184 */
+	{RCS, GEN9_HALF_SLICE_CHICKEN5, 0xffff, true}, /* 0xe188 */
+	{RCS, GEN9_HALF_SLICE_CHICKEN7, 0xffff, true}, /* 0xe194 */
+	{RCS, TRVATTL3PTRDW(0), 0, false}, /* 0x4de0 */
+	{RCS, TRVATTL3PTRDW(1), 0, false}, /* 0x4de4 */
+	{RCS, TRNULLDETCT, 0, false}, /* 0x4de8 */
+	{RCS, TRINVTILEDETCT, 0, false}, /* 0x4dec */
+	{RCS, TRVADR, 0, false}, /* 0x4df0 */
+	{RCS, TRTTE, 0, false}, /* 0x4df4 */
+
+	{BCS, RING_GFX_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2229c */
+	{BCS, RING_MI_MODE(BLT_RING_BASE), 0xffff, false}, /* 0x2209c */
+	{BCS, RING_INSTPM(BLT_RING_BASE), 0xffff, false}, /* 0x220c0 */
+	{BCS, RING_HWSTAM(BLT_RING_BASE), 0x0, false}, /* 0x22098 */
+	{BCS, RING_EXCC(BLT_RING_BASE), 0x0, false}, /* 0x22028 */
+
+	{VCS2, RING_EXCC(GEN8_BSD2_RING_BASE), 0xffff, false}, /* 0x1c028 */
+
+	{VECS, RING_EXCC(VEBOX_RING_BASE), 0xffff, false}, /* 0x1a028 */
+
+	{RCS, GEN8_HDC_CHICKEN1, 0xffff, true}, /* 0x7304 */
+	{RCS, GEN9_CTX_PREEMPT_REG, 0x0, false}, /* 0x2248 */
+	{RCS, GEN7_UCGCTL4, 0x0, false}, /* 0x940c */
+	{RCS, GAMT_CHKN_BIT_REG, 0x0, false}, /* 0x4ab8 */
+
+	{RCS, GEN9_GAMT_ECO_REG_RW_IA, 0x0, false}, /* 0x4ab0 */
+	{RCS, GEN9_CSFE_CHICKEN1_RCS, 0x0, false}, /* 0x20d4 */
+
+	{RCS, GEN8_GARBCNTL, 0x0, false}, /* 0xb004 */
+	{RCS, GEN7_FF_THREAD_MODE, 0x0, false}, /* 0x20a0 */
+	{RCS, FF_SLICE_CS_CHICKEN2, 0xffff, false}, /* 0x20e4 */
+	{ /* Terminated */ }
+};
+
+static struct {
+	bool initialized;
+	u32 control_table[I915_NUM_ENGINES][64];
+	u32 l3cc_table[32];
+} gen9_render_mocs;
+
+static void load_render_mocs(struct drm_i915_private *dev_priv)
+{
+	i915_reg_t offset;
+	u32 regs[] = {
+		[RCS] = 0xc800,
+		[VCS] = 0xc900,
+		[VCS2] = 0xca00,
+		[BCS] = 0xcc00,
+		[VECS] = 0xcb00,
+	};
+	int ring_id, i;
+
+	for (ring_id = 0; ring_id < I915_NUM_ENGINES; ring_id++) {
+		offset.reg = regs[ring_id];
+		for (i = 0; i < 64; i++) {
+			gen9_render_mocs.control_table[ring_id][i] =
+				I915_READ_FW(offset);
+			offset.reg += 4;
+		}
+	}
+
+	offset.reg = 0xb020;
+	for (i = 0; i < 32; i++) {
+		gen9_render_mocs.l3cc_table[i] =
+			I915_READ_FW(offset);
+		offset.reg += 4;
+	}
+	gen9_render_mocs.initialized = true;
+}
+
+static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
+{
+	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
+	struct intel_vgpu_submission *s = &vgpu->submission;
+	enum forcewake_domains fw;
+	i915_reg_t reg;
+	u32 regs[] = {
+		[RCS] = 0x4260,
+		[VCS] = 0x4264,
+		[VCS2] = 0x4268,
+		[BCS] = 0x426c,
+		[VECS] = 0x4270,
+	};
+
+	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
+		return;
+
+	if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending))
+		return;
+
+	reg = _MMIO(regs[ring_id]);
+
+	/* WaForceWakeRenderDuringMmioTLBInvalidate:skl
+	 * we need to put a forcewake when invalidating RCS TLB caches,
+	 * otherwise device can go to RC6 state and interrupt invalidation
+	 * process
+	 */
+	fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
+					    FW_REG_READ | FW_REG_WRITE);
+	if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
+		fw |= FORCEWAKE_RENDER;
+
+	intel_uncore_forcewake_get(dev_priv, fw);
+
+	I915_WRITE_FW(reg, 0x1);
+
+	if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
+		gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
+	else
+		vgpu_vreg_t(vgpu, reg) = 0;
+
+	intel_uncore_forcewake_put(dev_priv, fw);
+
+	gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
+}
+
+static void switch_mocs(struct intel_vgpu *pre, struct intel_vgpu *next,
+			int ring_id)
+{
+	struct drm_i915_private *dev_priv;
+	i915_reg_t offset, l3_offset;
+	u32 old_v, new_v;
+
+	u32 regs[] = {
+		[RCS] = 0xc800,
+		[VCS] = 0xc900,
+		[VCS2] = 0xca00,
+		[BCS] = 0xcc00,
+		[VECS] = 0xcb00,
+	};
+	int i;
+
+	dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
+	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
+		return;
+
+	if (!pre && !gen9_render_mocs.initialized)
+		load_render_mocs(dev_priv);
+
+	offset.reg = regs[ring_id];
+	for (i = 0; i < 64; i++) {
+		if (pre)
+			old_v = vgpu_vreg_t(pre, offset);
+		else
+			old_v = gen9_render_mocs.control_table[ring_id][i];
+		if (next)
+			new_v = vgpu_vreg_t(next, offset);
+		else
+			new_v = gen9_render_mocs.control_table[ring_id][i];
+
+		if (old_v != new_v)
+			I915_WRITE_FW(offset, new_v);
+
+		offset.reg += 4;
+	}
+
+	if (ring_id == RCS) {
+		l3_offset.reg = 0xb020;
+		for (i = 0; i < 32; i++) {
+			if (pre)
+				old_v = vgpu_vreg_t(pre, l3_offset);
+			else
+				old_v = gen9_render_mocs.l3cc_table[i];
+			if (next)
+				new_v = vgpu_vreg_t(next, l3_offset);
+			else
+				new_v = gen9_render_mocs.l3cc_table[i];
+
+			if (old_v != new_v)
+				I915_WRITE_FW(l3_offset, new_v);
+
+			l3_offset.reg += 4;
+		}
+	}
+}
+
+#define CTX_CONTEXT_CONTROL_VAL	0x03
+
+/* Switch ring mmio values (context). */
+static void switch_mmio(struct intel_vgpu *pre,
+			struct intel_vgpu *next,
+			int ring_id)
+{
+	struct drm_i915_private *dev_priv;
+	struct intel_vgpu_submission *s;
+	u32 *reg_state, ctx_ctrl;
+	u32 inhibit_mask =
+		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
+	struct engine_mmio *mmio;
+	u32 old_v, new_v;
+
+	dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
+	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv))
+		switch_mocs(pre, next, ring_id);
+
+	mmio = dev_priv->gvt->engine_mmio_list;
+	while (i915_mmio_reg_offset((mmio++)->reg)) {
+		if (mmio->ring_id != ring_id)
+			continue;
+		// save
+		if (pre) {
+			vgpu_vreg_t(pre, mmio->reg) = I915_READ_FW(mmio->reg);
+			if (mmio->mask)
+				vgpu_vreg_t(pre, mmio->reg) &=
+						~(mmio->mask << 16);
+			old_v = vgpu_vreg_t(pre, mmio->reg);
+		} else
+			old_v = mmio->value = I915_READ_FW(mmio->reg);
+
+		// restore
+		if (next) {
+			s = &next->submission;
+			reg_state =
+				s->shadow_ctx->engine[ring_id].lrc_reg_state;
+			ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
+			/*
+			 * if it is an inhibit context, load in_context mmio
+			 * into HW by mmio write. If it is not, skip this mmio
+			 * write.
+			 */
+			if (mmio->in_context &&
+			    (ctx_ctrl & inhibit_mask) != inhibit_mask)
+				continue;
+
+			if (mmio->mask)
+				new_v = vgpu_vreg_t(next, mmio->reg) |
+							(mmio->mask << 16);
+			else
+				new_v = vgpu_vreg_t(next, mmio->reg);
+		} else {
+			if (mmio->in_context)
+				continue;
+			if (mmio->mask)
+				new_v = mmio->value | (mmio->mask << 16);
+			else
+				new_v = mmio->value;
+		}
+
+		I915_WRITE_FW(mmio->reg, new_v);
+
+		trace_render_mmio(pre ? pre->id : 0,
+				  next ? next->id : 0,
+				  "switch",
+				  i915_mmio_reg_offset(mmio->reg),
+				  old_v, new_v);
+	}
+
+	if (next)
+		handle_tlb_pending_event(next, ring_id);
+}
+
+/**
+ * intel_gvt_switch_render_mmio - switch mmio context of specific engine
+ * @pre: the last vGPU that own the engine
+ * @next: the vGPU to switch to
+ * @ring_id: specify the engine
+ *
+ * If pre is null indicates that host own the engine. If next is null
+ * indicates that we are switching to host workload.
+ */
+void intel_gvt_switch_mmio(struct intel_vgpu *pre,
+			   struct intel_vgpu *next, int ring_id)
+{
+	struct drm_i915_private *dev_priv;
+
+	if (WARN_ON(!pre && !next))
+		return;
+
+	gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
+		       pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
+
+	dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
+
+	/**
+	 * We are using raw mmio access wrapper to improve the
+	 * performace for batch mmio read/write, so we need
+	 * handle forcewake mannually.
+	 */
+	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
+	switch_mmio(pre, next, ring_id);
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+}
+
+/**
+ * intel_gvt_init_engine_mmio_context - Initiate the engine mmio list
+ * @gvt: GVT device
+ *
+ */
+void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt)
+{
+	if (IS_SKYLAKE(gvt->dev_priv) || IS_KABYLAKE(gvt->dev_priv))
+		gvt->engine_mmio_list = gen9_engine_mmio_list;
+	else
+		gvt->engine_mmio_list = gen8_engine_mmio_list;
+}
diff --git a/drivers/gpu/drm/i915/gvt/render.h b/drivers/gpu/drm/i915/gvt/mmio_context.h
index 91db1d39d28f..ca2c6a745673 100644
--- a/drivers/gpu/drm/i915/gvt/render.h
+++ b/drivers/gpu/drm/i915/gvt/mmio_context.h
@@ -36,8 +36,17 @@
 #ifndef __GVT_RENDER_H__
 #define __GVT_RENDER_H__
 
+struct engine_mmio {
+	int ring_id;
+	i915_reg_t reg;
+	u32 mask;
+	bool in_context;
+	u32 value;
+};
+
 void intel_gvt_switch_mmio(struct intel_vgpu *pre,
 			   struct intel_vgpu *next, int ring_id);
 
+void intel_gvt_init_engine_mmio_context(struct intel_gvt *gvt);
 
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/mpt.h b/drivers/gpu/drm/i915/gvt/mpt.h
index c436e20ea59e..ca8005a6d5fa 100644
--- a/drivers/gpu/drm/i915/gvt/mpt.h
+++ b/drivers/gpu/drm/i915/gvt/mpt.h
@@ -294,4 +294,49 @@ static inline int intel_gvt_hypervisor_set_trap_area(
 	return intel_gvt_host.mpt->set_trap_area(vgpu->handle, start, end, map);
 }
 
+/**
+ * intel_gvt_hypervisor_set_opregion - Set opregion for guest
+ * @vgpu: a vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_set_opregion(struct intel_vgpu *vgpu)
+{
+	if (!intel_gvt_host.mpt->set_opregion)
+		return 0;
+
+	return intel_gvt_host.mpt->set_opregion(vgpu);
+}
+
+/**
+ * intel_gvt_hypervisor_get_vfio_device - increase vfio device ref count
+ * @vgpu: a vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline int intel_gvt_hypervisor_get_vfio_device(struct intel_vgpu *vgpu)
+{
+	if (!intel_gvt_host.mpt->get_vfio_device)
+		return 0;
+
+	return intel_gvt_host.mpt->get_vfio_device(vgpu);
+}
+
+/**
+ * intel_gvt_hypervisor_put_vfio_device - decrease vfio device ref count
+ * @vgpu: a vGPU
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+static inline void intel_gvt_hypervisor_put_vfio_device(struct intel_vgpu *vgpu)
+{
+	if (!intel_gvt_host.mpt->put_vfio_device)
+		return;
+
+	intel_gvt_host.mpt->put_vfio_device(vgpu);
+}
+
 #endif /* _GVT_MPT_H_ */
diff --git a/drivers/gpu/drm/i915/gvt/opregion.c b/drivers/gpu/drm/i915/gvt/opregion.c
index 80720e59723a..8420d1fc3ddb 100644
--- a/drivers/gpu/drm/i915/gvt/opregion.c
+++ b/drivers/gpu/drm/i915/gvt/opregion.c
@@ -213,11 +213,20 @@ static void virt_vbt_generation(struct vbt *v)
 	v->driver_features.lvds_config = BDB_DRIVER_FEATURE_NO_LVDS;
 }
 
-static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu)
+/**
+ * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion
+ * @vgpu: a vGPU
+ * @gpa: guest physical address of opregion
+ *
+ * Returns:
+ * Zero on success, negative error code if failed.
+ */
+int intel_vgpu_init_opregion(struct intel_vgpu *vgpu)
 {
 	u8 *buf;
 	struct opregion_header *header;
 	struct vbt v;
+	const char opregion_signature[16] = OPREGION_SIGNATURE;
 
 	gvt_dbg_core("init vgpu%d opregion\n", vgpu->id);
 	vgpu_opregion(vgpu)->va = (void *)__get_free_pages(GFP_KERNEL |
@@ -231,8 +240,8 @@ static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu)
 	/* emulated opregion with VBT mailbox only */
 	buf = (u8 *)vgpu_opregion(vgpu)->va;
 	header = (struct opregion_header *)buf;
-	memcpy(header->signature, OPREGION_SIGNATURE,
-			sizeof(OPREGION_SIGNATURE));
+	memcpy(header->signature, opregion_signature,
+	       sizeof(opregion_signature));
 	header->size = 0x8;
 	header->opregion_ver = 0x02000000;
 	header->mboxes = MBOX_VBT;
@@ -250,25 +259,6 @@ static int alloc_and_init_virt_opregion(struct intel_vgpu *vgpu)
 	return 0;
 }
 
-static int init_vgpu_opregion(struct intel_vgpu *vgpu, u32 gpa)
-{
-	int i, ret;
-
-	if (WARN((vgpu_opregion(vgpu)->va),
-			"vgpu%d: opregion has been initialized already.\n",
-			vgpu->id))
-		return -EINVAL;
-
-	ret = alloc_and_init_virt_opregion(vgpu);
-	if (ret < 0)
-		return ret;
-
-	for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
-		vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
-
-	return 0;
-}
-
 static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
 {
 	u64 mfn;
@@ -290,59 +280,91 @@ static int map_vgpu_opregion(struct intel_vgpu *vgpu, bool map)
 			return ret;
 		}
 	}
+
+	vgpu_opregion(vgpu)->mapped = map;
+
 	return 0;
 }
 
 /**
- * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion
+ * intel_vgpu_opregion_base_write_handler - Opregion base register write handler
+ *
  * @vgpu: a vGPU
+ * @gpa: guest physical address of opregion
  *
+ * Returns:
+ * Zero on success, negative error code if failed.
  */
-void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu)
+int intel_vgpu_opregion_base_write_handler(struct intel_vgpu *vgpu, u32 gpa)
 {
-	gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id);
 
-	if (!vgpu_opregion(vgpu)->va)
-		return;
+	int i, ret = 0;
+	unsigned long pfn;
 
-	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) {
-		map_vgpu_opregion(vgpu, false);
-		free_pages((unsigned long)vgpu_opregion(vgpu)->va,
-				get_order(INTEL_GVT_OPREGION_SIZE));
+	gvt_dbg_core("emulate opregion from kernel\n");
+
+	switch (intel_gvt_host.hypervisor_type) {
+	case INTEL_GVT_HYPERVISOR_KVM:
+		pfn = intel_gvt_hypervisor_gfn_to_mfn(vgpu, gpa >> PAGE_SHIFT);
+		vgpu_opregion(vgpu)->va_gopregion = memremap(pfn << PAGE_SHIFT,
+						INTEL_GVT_OPREGION_SIZE,
+						MEMREMAP_WB);
+		if (!vgpu_opregion(vgpu)->va_gopregion) {
+			gvt_vgpu_err("failed to map guest opregion\n");
+			ret = -EFAULT;
+		}
+		vgpu_opregion(vgpu)->mapped = true;
+		break;
+	case INTEL_GVT_HYPERVISOR_XEN:
+		/**
+		 * Wins guest on Xengt will write this register twice: xen
+		 * hvmloader and windows graphic driver.
+		 */
+		if (vgpu_opregion(vgpu)->mapped)
+			map_vgpu_opregion(vgpu, false);
+
+		for (i = 0; i < INTEL_GVT_OPREGION_PAGES; i++)
+			vgpu_opregion(vgpu)->gfn[i] = (gpa >> PAGE_SHIFT) + i;
 
-		vgpu_opregion(vgpu)->va = NULL;
+		ret = map_vgpu_opregion(vgpu, true);
+		break;
+	default:
+		ret = -EINVAL;
+		gvt_vgpu_err("not supported hypervisor\n");
 	}
+
+	return ret;
 }
 
 /**
- * intel_vgpu_init_opregion - initialize the stuff used to emulate opregion
+ * intel_vgpu_clean_opregion - clean the stuff used to emulate opregion
  * @vgpu: a vGPU
- * @gpa: guest physical address of opregion
  *
- * Returns:
- * Zero on success, negative error code if failed.
  */
-int intel_vgpu_init_opregion(struct intel_vgpu *vgpu, u32 gpa)
+void intel_vgpu_clean_opregion(struct intel_vgpu *vgpu)
 {
-	int ret;
+	gvt_dbg_core("vgpu%d: clean vgpu opregion\n", vgpu->id);
 
-	gvt_dbg_core("vgpu%d: init vgpu opregion\n", vgpu->id);
+	if (!vgpu_opregion(vgpu)->va)
+		return;
 
 	if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_XEN) {
-		gvt_dbg_core("emulate opregion from kernel\n");
-
-		ret = init_vgpu_opregion(vgpu, gpa);
-		if (ret)
-			return ret;
-
-		ret = map_vgpu_opregion(vgpu, true);
-		if (ret)
-			return ret;
+		if (vgpu_opregion(vgpu)->mapped)
+			map_vgpu_opregion(vgpu, false);
+	} else if (intel_gvt_host.hypervisor_type == INTEL_GVT_HYPERVISOR_KVM) {
+		if (vgpu_opregion(vgpu)->mapped) {
+			memunmap(vgpu_opregion(vgpu)->va_gopregion);
+			vgpu_opregion(vgpu)->va_gopregion = NULL;
+		}
 	}
+	free_pages((unsigned long)vgpu_opregion(vgpu)->va,
+		   get_order(INTEL_GVT_OPREGION_SIZE));
+
+	vgpu_opregion(vgpu)->va = NULL;
 
-	return 0;
 }
 
+
 #define GVT_OPREGION_FUNC(scic)					\
 	({							\
 	 u32 __ret;						\
@@ -461,8 +483,21 @@ int intel_vgpu_emulate_opregion_request(struct intel_vgpu *vgpu, u32 swsci)
 	u32 *scic, *parm;
 	u32 func, subfunc;
 
-	scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC;
-	parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
+	switch (intel_gvt_host.hypervisor_type) {
+	case INTEL_GVT_HYPERVISOR_XEN:
+		scic = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_SCIC;
+		parm = vgpu_opregion(vgpu)->va + INTEL_GVT_OPREGION_PARM;
+		break;
+	case INTEL_GVT_HYPERVISOR_KVM:
+		scic = vgpu_opregion(vgpu)->va_gopregion +
+						INTEL_GVT_OPREGION_SCIC;
+		parm = vgpu_opregion(vgpu)->va_gopregion +
+						INTEL_GVT_OPREGION_PARM;
+		break;
+	default:
+		gvt_vgpu_err("not supported hypervisor\n");
+		return -EINVAL;
+	}
 
 	if (!(swsci & SWSCI_SCI_SELECT)) {
 		gvt_vgpu_err("requesting SMI service\n");
diff --git a/drivers/gpu/drm/i915/gvt/render.c b/drivers/gpu/drm/i915/gvt/render.c
deleted file mode 100644
index dac12c25f349..000000000000
--- a/drivers/gpu/drm/i915/gvt/render.c
+++ /dev/null
@@ -1,406 +0,0 @@
-/*
- * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- * Authors:
- *    Eddie Dong <eddie.dong@intel.com>
- *    Kevin Tian <kevin.tian@intel.com>
- *
- * Contributors:
- *    Zhi Wang <zhi.a.wang@intel.com>
- *    Changbin Du <changbin.du@intel.com>
- *    Zhenyu Wang <zhenyuw@linux.intel.com>
- *    Tina Zhang <tina.zhang@intel.com>
- *    Bing Niu <bing.niu@intel.com>
- *
- */
-
-#include "i915_drv.h"
-#include "gvt.h"
-#include "trace.h"
-
-struct render_mmio {
-	int ring_id;
-	i915_reg_t reg;
-	u32 mask;
-	bool in_context;
-	u32 value;
-};
-
-static struct render_mmio gen8_render_mmio_list[] __cacheline_aligned = {
-	{RCS, _MMIO(0x229c), 0xffff, false},
-	{RCS, _MMIO(0x2248), 0x0, false},
-	{RCS, _MMIO(0x2098), 0x0, false},
-	{RCS, _MMIO(0x20c0), 0xffff, true},
-	{RCS, _MMIO(0x24d0), 0, false},
-	{RCS, _MMIO(0x24d4), 0, false},
-	{RCS, _MMIO(0x24d8), 0, false},
-	{RCS, _MMIO(0x24dc), 0, false},
-	{RCS, _MMIO(0x24e0), 0, false},
-	{RCS, _MMIO(0x24e4), 0, false},
-	{RCS, _MMIO(0x24e8), 0, false},
-	{RCS, _MMIO(0x24ec), 0, false},
-	{RCS, _MMIO(0x24f0), 0, false},
-	{RCS, _MMIO(0x24f4), 0, false},
-	{RCS, _MMIO(0x24f8), 0, false},
-	{RCS, _MMIO(0x24fc), 0, false},
-	{RCS, _MMIO(0x7004), 0xffff, true},
-	{RCS, _MMIO(0x7008), 0xffff, true},
-	{RCS, _MMIO(0x7000), 0xffff, true},
-	{RCS, _MMIO(0x7010), 0xffff, true},
-	{RCS, _MMIO(0x7300), 0xffff, true},
-	{RCS, _MMIO(0x83a4), 0xffff, true},
-
-	{BCS, _MMIO(0x2229c), 0xffff, false},
-	{BCS, _MMIO(0x2209c), 0xffff, false},
-	{BCS, _MMIO(0x220c0), 0xffff, false},
-	{BCS, _MMIO(0x22098), 0x0, false},
-	{BCS, _MMIO(0x22028), 0x0, false},
-};
-
-static struct render_mmio gen9_render_mmio_list[] __cacheline_aligned = {
-	{RCS, _MMIO(0x229c), 0xffff, false},
-	{RCS, _MMIO(0x2248), 0x0, false},
-	{RCS, _MMIO(0x2098), 0x0, false},
-	{RCS, _MMIO(0x20c0), 0xffff, true},
-	{RCS, _MMIO(0x24d0), 0, false},
-	{RCS, _MMIO(0x24d4), 0, false},
-	{RCS, _MMIO(0x24d8), 0, false},
-	{RCS, _MMIO(0x24dc), 0, false},
-	{RCS, _MMIO(0x24e0), 0, false},
-	{RCS, _MMIO(0x24e4), 0, false},
-	{RCS, _MMIO(0x24e8), 0, false},
-	{RCS, _MMIO(0x24ec), 0, false},
-	{RCS, _MMIO(0x24f0), 0, false},
-	{RCS, _MMIO(0x24f4), 0, false},
-	{RCS, _MMIO(0x24f8), 0, false},
-	{RCS, _MMIO(0x24fc), 0, false},
-	{RCS, _MMIO(0x7004), 0xffff, true},
-	{RCS, _MMIO(0x7008), 0xffff, true},
-	{RCS, _MMIO(0x7000), 0xffff, true},
-	{RCS, _MMIO(0x7010), 0xffff, true},
-	{RCS, _MMIO(0x7300), 0xffff, true},
-	{RCS, _MMIO(0x83a4), 0xffff, true},
-
-	{RCS, _MMIO(0x40e0), 0, false},
-	{RCS, _MMIO(0x40e4), 0, false},
-	{RCS, _MMIO(0x2580), 0xffff, true},
-	{RCS, _MMIO(0x7014), 0xffff, true},
-	{RCS, _MMIO(0x20ec), 0xffff, false},
-	{RCS, _MMIO(0xb118), 0, false},
-	{RCS, _MMIO(0xe100), 0xffff, true},
-	{RCS, _MMIO(0xe180), 0xffff, true},
-	{RCS, _MMIO(0xe184), 0xffff, true},
-	{RCS, _MMIO(0xe188), 0xffff, true},
-	{RCS, _MMIO(0xe194), 0xffff, true},
-	{RCS, _MMIO(0x4de0), 0, false},
-	{RCS, _MMIO(0x4de4), 0, false},
-	{RCS, _MMIO(0x4de8), 0, false},
-	{RCS, _MMIO(0x4dec), 0, false},
-	{RCS, _MMIO(0x4df0), 0, false},
-	{RCS, _MMIO(0x4df4), 0, false},
-
-	{BCS, _MMIO(0x2229c), 0xffff, false},
-	{BCS, _MMIO(0x2209c), 0xffff, false},
-	{BCS, _MMIO(0x220c0), 0xffff, false},
-	{BCS, _MMIO(0x22098), 0x0, false},
-	{BCS, _MMIO(0x22028), 0x0, false},
-
-	{VCS2, _MMIO(0x1c028), 0xffff, false},
-
-	{VECS, _MMIO(0x1a028), 0xffff, false},
-
-	{RCS, _MMIO(0x7304), 0xffff, true},
-	{RCS, _MMIO(0x2248), 0x0, false},
-	{RCS, _MMIO(0x940c), 0x0, false},
-	{RCS, _MMIO(0x4ab8), 0x0, false},
-
-	{RCS, _MMIO(0x4ab0), 0x0, false},
-	{RCS, _MMIO(0x20d4), 0x0, false},
-
-	{RCS, _MMIO(0xb004), 0x0, false},
-	{RCS, _MMIO(0x20a0), 0x0, false},
-	{RCS, _MMIO(0x20e4), 0xffff, false},
-};
-
-static u32 gen9_render_mocs[I915_NUM_ENGINES][64];
-static u32 gen9_render_mocs_L3[32];
-
-static void handle_tlb_pending_event(struct intel_vgpu *vgpu, int ring_id)
-{
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	enum forcewake_domains fw;
-	i915_reg_t reg;
-	u32 regs[] = {
-		[RCS] = 0x4260,
-		[VCS] = 0x4264,
-		[VCS2] = 0x4268,
-		[BCS] = 0x426c,
-		[VECS] = 0x4270,
-	};
-
-	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
-		return;
-
-	if (!test_and_clear_bit(ring_id, (void *)s->tlb_handle_pending))
-		return;
-
-	reg = _MMIO(regs[ring_id]);
-
-	/* WaForceWakeRenderDuringMmioTLBInvalidate:skl
-	 * we need to put a forcewake when invalidating RCS TLB caches,
-	 * otherwise device can go to RC6 state and interrupt invalidation
-	 * process
-	 */
-	fw = intel_uncore_forcewake_for_reg(dev_priv, reg,
-					    FW_REG_READ | FW_REG_WRITE);
-	if (ring_id == RCS && (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)))
-		fw |= FORCEWAKE_RENDER;
-
-	intel_uncore_forcewake_get(dev_priv, fw);
-
-	I915_WRITE_FW(reg, 0x1);
-
-	if (wait_for_atomic((I915_READ_FW(reg) == 0), 50))
-		gvt_vgpu_err("timeout in invalidate ring (%d) tlb\n", ring_id);
-	else
-		vgpu_vreg(vgpu, regs[ring_id]) = 0;
-
-	intel_uncore_forcewake_put(dev_priv, fw);
-
-	gvt_dbg_core("invalidate TLB for ring %d\n", ring_id);
-}
-
-static void load_mocs(struct intel_vgpu *vgpu, int ring_id)
-{
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	i915_reg_t offset, l3_offset;
-	u32 regs[] = {
-		[RCS] = 0xc800,
-		[VCS] = 0xc900,
-		[VCS2] = 0xca00,
-		[BCS] = 0xcc00,
-		[VECS] = 0xcb00,
-	};
-	int i;
-
-	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
-		return;
-
-	offset.reg = regs[ring_id];
-	for (i = 0; i < 64; i++) {
-		gen9_render_mocs[ring_id][i] = I915_READ_FW(offset);
-		I915_WRITE_FW(offset, vgpu_vreg(vgpu, offset));
-		offset.reg += 4;
-	}
-
-	if (ring_id == RCS) {
-		l3_offset.reg = 0xb020;
-		for (i = 0; i < 32; i++) {
-			gen9_render_mocs_L3[i] = I915_READ_FW(l3_offset);
-			I915_WRITE_FW(l3_offset, vgpu_vreg(vgpu, l3_offset));
-			l3_offset.reg += 4;
-		}
-	}
-}
-
-static void restore_mocs(struct intel_vgpu *vgpu, int ring_id)
-{
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	i915_reg_t offset, l3_offset;
-	u32 regs[] = {
-		[RCS] = 0xc800,
-		[VCS] = 0xc900,
-		[VCS2] = 0xca00,
-		[BCS] = 0xcc00,
-		[VECS] = 0xcb00,
-	};
-	int i;
-
-	if (WARN_ON(ring_id >= ARRAY_SIZE(regs)))
-		return;
-
-	offset.reg = regs[ring_id];
-	for (i = 0; i < 64; i++) {
-		vgpu_vreg(vgpu, offset) = I915_READ_FW(offset);
-		I915_WRITE_FW(offset, gen9_render_mocs[ring_id][i]);
-		offset.reg += 4;
-	}
-
-	if (ring_id == RCS) {
-		l3_offset.reg = 0xb020;
-		for (i = 0; i < 32; i++) {
-			vgpu_vreg(vgpu, l3_offset) = I915_READ_FW(l3_offset);
-			I915_WRITE_FW(l3_offset, gen9_render_mocs_L3[i]);
-			l3_offset.reg += 4;
-		}
-	}
-}
-
-#define CTX_CONTEXT_CONTROL_VAL	0x03
-
-/* Switch ring mmio values (context) from host to a vgpu. */
-static void switch_mmio_to_vgpu(struct intel_vgpu *vgpu, int ring_id)
-{
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct intel_vgpu_submission *s = &vgpu->submission;
-	u32 *reg_state = s->shadow_ctx->engine[ring_id].lrc_reg_state;
-	u32 ctx_ctrl = reg_state[CTX_CONTEXT_CONTROL_VAL];
-	u32 inhibit_mask =
-		_MASKED_BIT_ENABLE(CTX_CTRL_ENGINE_CTX_RESTORE_INHIBIT);
-	i915_reg_t last_reg = _MMIO(0);
-	struct render_mmio *mmio;
-	u32 v;
-	int i, array_size;
-
-	if (IS_SKYLAKE(vgpu->gvt->dev_priv)
-		|| IS_KABYLAKE(vgpu->gvt->dev_priv)) {
-		mmio = gen9_render_mmio_list;
-		array_size = ARRAY_SIZE(gen9_render_mmio_list);
-		load_mocs(vgpu, ring_id);
-	} else {
-		mmio = gen8_render_mmio_list;
-		array_size = ARRAY_SIZE(gen8_render_mmio_list);
-	}
-
-	for (i = 0; i < array_size; i++, mmio++) {
-		if (mmio->ring_id != ring_id)
-			continue;
-
-		mmio->value = I915_READ_FW(mmio->reg);
-
-		/*
-		 * if it is an inhibit context, load in_context mmio
-		 * into HW by mmio write. If it is not, skip this mmio
-		 * write.
-		 */
-		if (mmio->in_context &&
-		    (ctx_ctrl & inhibit_mask) != inhibit_mask)
-			continue;
-
-		if (mmio->mask)
-			v = vgpu_vreg(vgpu, mmio->reg) | (mmio->mask << 16);
-		else
-			v = vgpu_vreg(vgpu, mmio->reg);
-
-		I915_WRITE_FW(mmio->reg, v);
-		last_reg = mmio->reg;
-
-		trace_render_mmio(vgpu->id, "load",
-				  i915_mmio_reg_offset(mmio->reg),
-				  mmio->value, v);
-	}
-
-	/* Make sure the swiched MMIOs has taken effect. */
-	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
-		I915_READ_FW(last_reg);
-
-	handle_tlb_pending_event(vgpu, ring_id);
-}
-
-/* Switch ring mmio values (context) from vgpu to host. */
-static void switch_mmio_to_host(struct intel_vgpu *vgpu, int ring_id)
-{
-	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
-	struct render_mmio *mmio;
-	i915_reg_t last_reg = _MMIO(0);
-	u32 v;
-	int i, array_size;
-
-	if (IS_SKYLAKE(dev_priv) || IS_KABYLAKE(dev_priv)) {
-		mmio = gen9_render_mmio_list;
-		array_size = ARRAY_SIZE(gen9_render_mmio_list);
-		restore_mocs(vgpu, ring_id);
-	} else {
-		mmio = gen8_render_mmio_list;
-		array_size = ARRAY_SIZE(gen8_render_mmio_list);
-	}
-
-	for (i = 0; i < array_size; i++, mmio++) {
-		if (mmio->ring_id != ring_id)
-			continue;
-
-		vgpu_vreg(vgpu, mmio->reg) = I915_READ_FW(mmio->reg);
-
-		if (mmio->mask) {
-			vgpu_vreg(vgpu, mmio->reg) &= ~(mmio->mask << 16);
-			v = mmio->value | (mmio->mask << 16);
-		} else
-			v = mmio->value;
-
-		if (mmio->in_context)
-			continue;
-
-		I915_WRITE_FW(mmio->reg, v);
-		last_reg = mmio->reg;
-
-		trace_render_mmio(vgpu->id, "restore",
-				  i915_mmio_reg_offset(mmio->reg),
-				  mmio->value, v);
-	}
-
-	/* Make sure the swiched MMIOs has taken effect. */
-	if (likely(INTEL_GVT_MMIO_OFFSET(last_reg)))
-		I915_READ_FW(last_reg);
-}
-
-/**
- * intel_gvt_switch_render_mmio - switch mmio context of specific engine
- * @pre: the last vGPU that own the engine
- * @next: the vGPU to switch to
- * @ring_id: specify the engine
- *
- * If pre is null indicates that host own the engine. If next is null
- * indicates that we are switching to host workload.
- */
-void intel_gvt_switch_mmio(struct intel_vgpu *pre,
-			   struct intel_vgpu *next, int ring_id)
-{
-	struct drm_i915_private *dev_priv;
-
-	if (WARN_ON(!pre && !next))
-		return;
-
-	gvt_dbg_render("switch ring %d from %s to %s\n", ring_id,
-		       pre ? "vGPU" : "host", next ? "vGPU" : "HOST");
-
-	dev_priv = pre ? pre->gvt->dev_priv : next->gvt->dev_priv;
-
-	/**
-	 * We are using raw mmio access wrapper to improve the
-	 * performace for batch mmio read/write, so we need
-	 * handle forcewake mannually.
-	 */
-	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
-
-	/**
-	 * TODO: Optimize for vGPU to vGPU switch by merging
-	 * switch_mmio_to_host() and switch_mmio_to_vgpu().
-	 */
-	if (pre)
-		switch_mmio_to_host(pre, ring_id);
-
-	if (next)
-		switch_mmio_to_vgpu(next, ring_id);
-
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-}
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.c b/drivers/gpu/drm/i915/gvt/sched_policy.c
index 03532dfc0cd5..eea1a2f92099 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.c
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.c
@@ -372,6 +372,11 @@ void intel_vgpu_start_schedule(struct intel_vgpu *vgpu)
 	vgpu->gvt->scheduler.sched_ops->start_schedule(vgpu);
 }
 
+void intel_gvt_kick_schedule(struct intel_gvt *gvt)
+{
+	intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);
+}
+
 void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu)
 {
 	struct intel_gvt_workload_scheduler *scheduler =
diff --git a/drivers/gpu/drm/i915/gvt/sched_policy.h b/drivers/gpu/drm/i915/gvt/sched_policy.h
index ba00a5f7455f..7b59e3e88b8b 100644
--- a/drivers/gpu/drm/i915/gvt/sched_policy.h
+++ b/drivers/gpu/drm/i915/gvt/sched_policy.h
@@ -57,4 +57,6 @@ void intel_vgpu_start_schedule(struct intel_vgpu *vgpu);
 
 void intel_vgpu_stop_schedule(struct intel_vgpu *vgpu);
 
+void intel_gvt_kick_schedule(struct intel_gvt *gvt);
+
 #endif
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.c b/drivers/gpu/drm/i915/gvt/scheduler.c
index d6177a0baeec..0056638b0c16 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.c
+++ b/drivers/gpu/drm/i915/gvt/scheduler.c
@@ -189,10 +189,12 @@ static int shadow_context_status_change(struct notifier_block *nb,
 		atomic_set(&workload->shadow_ctx_active, 1);
 		break;
 	case INTEL_CONTEXT_SCHEDULE_OUT:
-	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
 		save_ring_hw_state(workload->vgpu, ring_id);
 		atomic_set(&workload->shadow_ctx_active, 0);
 		break;
+	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
+		save_ring_hw_state(workload->vgpu, ring_id);
+		break;
 	default:
 		WARN_ON(1);
 		return NOTIFY_OK;
@@ -246,7 +248,7 @@ static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
 	return 0;
 }
 
-void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
+static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
 {
 	if (!wa_ctx->indirect_ctx.obj)
 		return;
@@ -1037,6 +1039,9 @@ int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
 	if (IS_ERR(s->shadow_ctx))
 		return PTR_ERR(s->shadow_ctx);
 
+	if (HAS_LOGICAL_RING_PREEMPTION(vgpu->gvt->dev_priv))
+		s->shadow_ctx->priority = INT_MAX;
+
 	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
 
 	s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
@@ -1329,3 +1334,15 @@ intel_vgpu_create_workload(struct intel_vgpu *vgpu, int ring_id,
 
 	return workload;
 }
+
+/**
+ * intel_vgpu_queue_workload - Qeue a vGPU workload
+ * @workload: the workload to queue in
+ */
+void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload)
+{
+	list_add_tail(&workload->list,
+		workload_q_head(workload->vgpu, workload->ring_id));
+	intel_gvt_kick_schedule(workload->vgpu->gvt);
+	wake_up(&workload->vgpu->gvt->scheduler.waitq[workload->ring_id]);
+}
diff --git a/drivers/gpu/drm/i915/gvt/scheduler.h b/drivers/gpu/drm/i915/gvt/scheduler.h
index e4a9f9acd4a9..3de77dfa7c59 100644
--- a/drivers/gpu/drm/i915/gvt/scheduler.h
+++ b/drivers/gpu/drm/i915/gvt/scheduler.h
@@ -125,12 +125,7 @@ struct intel_vgpu_shadow_bb {
 #define workload_q_head(vgpu, ring_id) \
 	(&(vgpu->submission.workload_q_head[ring_id]))
 
-#define queue_workload(workload) do { \
-	list_add_tail(&workload->list, \
-	workload_q_head(workload->vgpu, workload->ring_id)); \
-	wake_up(&workload->vgpu->gvt-> \
-	scheduler.waitq[workload->ring_id]); \
-} while (0)
+void intel_vgpu_queue_workload(struct intel_vgpu_workload *workload);
 
 int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt);
 
diff --git a/drivers/gpu/drm/i915/gvt/trace.h b/drivers/gpu/drm/i915/gvt/trace.h
index 8c150381d9a4..7a2511538f34 100644
--- a/drivers/gpu/drm/i915/gvt/trace.h
+++ b/drivers/gpu/drm/i915/gvt/trace.h
@@ -330,13 +330,14 @@ TRACE_EVENT(inject_msi,
 );
 
 TRACE_EVENT(render_mmio,
-	TP_PROTO(int id, char *action, unsigned int reg,
+	TP_PROTO(int old_id, int new_id, char *action, unsigned int reg,
 		 unsigned int old_val, unsigned int new_val),
 
-	TP_ARGS(id, action, reg, new_val, old_val),
+	TP_ARGS(old_id, new_id, action, reg, new_val, old_val),
 
 	TP_STRUCT__entry(
-		__field(int, id)
+		__field(int, old_id)
+		__field(int, new_id)
 		__array(char, buf, GVT_TEMP_STR_LEN)
 		__field(unsigned int, reg)
 		__field(unsigned int, old_val)
@@ -344,15 +345,17 @@ TRACE_EVENT(render_mmio,
 	),
 
 	TP_fast_assign(
-		__entry->id = id;
+		__entry->old_id = old_id;
+		__entry->new_id = new_id;
 		snprintf(__entry->buf, GVT_TEMP_STR_LEN, "%s", action);
 		__entry->reg = reg;
 		__entry->old_val = old_val;
 		__entry->new_val = new_val;
 	),
 
-	TP_printk("VM%u %s reg %x, old %08x new %08x\n",
-		  __entry->id, __entry->buf, __entry->reg,
+	TP_printk("VM%u -> VM%u %s reg %x, old %08x new %08x\n",
+		  __entry->old_id, __entry->new_id,
+		  __entry->buf, __entry->reg,
 		  __entry->old_val, __entry->new_val)
 );
 
diff --git a/drivers/gpu/drm/i915/gvt/vgpu.c b/drivers/gpu/drm/i915/gvt/vgpu.c
index c6b82d1ba7de..4688619f6a1c 100644
--- a/drivers/gpu/drm/i915/gvt/vgpu.c
+++ b/drivers/gpu/drm/i915/gvt/vgpu.c
@@ -38,25 +38,25 @@
 void populate_pvinfo_page(struct intel_vgpu *vgpu)
 {
 	/* setup the ballooning information */
-	vgpu_vreg64(vgpu, vgtif_reg(magic)) = VGT_MAGIC;
-	vgpu_vreg(vgpu, vgtif_reg(version_major)) = 1;
-	vgpu_vreg(vgpu, vgtif_reg(version_minor)) = 0;
-	vgpu_vreg(vgpu, vgtif_reg(display_ready)) = 0;
-	vgpu_vreg(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
+	vgpu_vreg64_t(vgpu, vgtif_reg(magic)) = VGT_MAGIC;
+	vgpu_vreg_t(vgpu, vgtif_reg(version_major)) = 1;
+	vgpu_vreg_t(vgpu, vgtif_reg(version_minor)) = 0;
+	vgpu_vreg_t(vgpu, vgtif_reg(display_ready)) = 0;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_id)) = vgpu->id;
 
-	vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
-	vgpu_vreg(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) = VGT_CAPS_FULL_48BIT_PPGTT;
+	vgpu_vreg_t(vgpu, vgtif_reg(vgt_caps)) |= VGT_CAPS_HWSP_EMULATION;
 
-	vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.base)) =
 		vgpu_aperture_gmadr_base(vgpu);
-	vgpu_vreg(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.mappable_gmadr.size)) =
 		vgpu_aperture_sz(vgpu);
-	vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) =
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.base)) =
 		vgpu_hidden_gmadr_base(vgpu);
-	vgpu_vreg(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) =
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.nonmappable_gmadr.size)) =
 		vgpu_hidden_sz(vgpu);
 
-	vgpu_vreg(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu);
+	vgpu_vreg_t(vgpu, vgtif_reg(avail_rs.fence_num)) = vgpu_fence_sz(vgpu);
 
 	gvt_dbg_core("Populate PVINFO PAGE for vGPU %d\n", vgpu->id);
 	gvt_dbg_core("aperture base [GMADR] 0x%llx size 0x%llx\n",
@@ -236,6 +236,7 @@ void intel_gvt_deactivate_vgpu(struct intel_vgpu *vgpu)
 	}
 
 	intel_vgpu_stop_schedule(vgpu);
+	intel_vgpu_dmabuf_cleanup(vgpu);
 
 	mutex_unlock(&gvt->lock);
 }
@@ -265,6 +266,7 @@ void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu)
 	intel_gvt_hypervisor_detach_vgpu(vgpu);
 	intel_vgpu_free_resource(vgpu);
 	intel_vgpu_clean_mmio(vgpu);
+	intel_vgpu_dmabuf_cleanup(vgpu);
 	vfree(vgpu);
 
 	intel_gvt_update_vgpu_types(gvt);
@@ -349,7 +351,8 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	vgpu->handle = param->handle;
 	vgpu->gvt = gvt;
 	vgpu->sched_ctl.weight = param->weight;
-
+	INIT_LIST_HEAD(&vgpu->dmabuf_obj_list_head);
+	idr_init(&vgpu->object_idr);
 	intel_vgpu_init_cfg_space(vgpu, param->primary);
 
 	ret = intel_vgpu_init_mmio(vgpu);
@@ -370,10 +373,14 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_detach_hypervisor_vgpu;
 
-	ret = intel_vgpu_init_display(vgpu, param->resolution);
+	ret = intel_vgpu_init_opregion(vgpu);
 	if (ret)
 		goto out_clean_gtt;
 
+	ret = intel_vgpu_init_display(vgpu, param->resolution);
+	if (ret)
+		goto out_clean_opregion;
+
 	ret = intel_vgpu_setup_submission(vgpu);
 	if (ret)
 		goto out_clean_display;
@@ -386,6 +393,10 @@ static struct intel_vgpu *__intel_gvt_create_vgpu(struct intel_gvt *gvt,
 	if (ret)
 		goto out_clean_sched_policy;
 
+	ret = intel_gvt_hypervisor_set_opregion(vgpu);
+	if (ret)
+		goto out_clean_sched_policy;
+
 	mutex_unlock(&gvt->lock);
 
 	return vgpu;
@@ -396,6 +407,8 @@ out_clean_submission:
 	intel_vgpu_clean_submission(vgpu);
 out_clean_display:
 	intel_vgpu_clean_display(vgpu);
+out_clean_opregion:
+	intel_vgpu_clean_opregion(vgpu);
 out_clean_gtt:
 	intel_vgpu_clean_gtt(vgpu);
 out_detach_hypervisor_vgpu:
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 28294470ae31..e968aeae1d84 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -37,40 +37,21 @@ static inline struct drm_i915_private *node_to_i915(struct drm_info_node *node)
 	return to_i915(node->minor->dev);
 }
 
-static __always_inline void seq_print_param(struct seq_file *m,
-					    const char *name,
-					    const char *type,
-					    const void *x)
-{
-	if (!__builtin_strcmp(type, "bool"))
-		seq_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
-	else if (!__builtin_strcmp(type, "int"))
-		seq_printf(m, "i915.%s=%d\n", name, *(const int *)x);
-	else if (!__builtin_strcmp(type, "unsigned int"))
-		seq_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
-	else if (!__builtin_strcmp(type, "char *"))
-		seq_printf(m, "i915.%s=%s\n", name, *(const char **)x);
-	else
-		BUILD_BUG();
-}
-
 static int i915_capabilities(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_device_info *info = INTEL_INFO(dev_priv);
+	struct drm_printer p = drm_seq_file_printer(m);
 
 	seq_printf(m, "gen: %d\n", INTEL_GEN(dev_priv));
 	seq_printf(m, "platform: %s\n", intel_platform_name(info->platform));
 	seq_printf(m, "pch: %d\n", INTEL_PCH_TYPE(dev_priv));
 
-#define PRINT_FLAG(x)  seq_printf(m, #x ": %s\n", yesno(info->x))
-	DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
+	intel_device_info_dump_flags(info, &p);
+	intel_device_info_dump_runtime(info, &p);
 
 	kernel_param_lock(THIS_MODULE);
-#define PRINT_PARAM(T, x, ...) seq_print_param(m, #x, #T, &i915_modparams.x);
-	I915_PARAMS_FOR_EACH(PRINT_PARAM);
-#undef PRINT_PARAM
+	i915_params_dump(&i915_modparams, &p);
 	kernel_param_unlock(THIS_MODULE);
 
 	return 0;
@@ -111,8 +92,8 @@ static u64 i915_gem_obj_total_ggtt_size(struct drm_i915_gem_object *obj)
 	u64 size = 0;
 	struct i915_vma *vma;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (i915_vma_is_ggtt(vma) && drm_mm_node_allocated(&vma->node))
+	for_each_ggtt_vma(vma, obj) {
+		if (drm_mm_node_allocated(&vma->node))
 			size += vma->node.size;
 	}
 
@@ -522,8 +503,8 @@ static int i915_gem_object_info(struct seq_file *m, void *data)
 	seq_printf(m, "%u display objects (globally pinned), %llu bytes\n",
 		   dpy_count, dpy_size);
 
-	seq_printf(m, "%llu [%llu] gtt total\n",
-		   ggtt->base.total, ggtt->mappable_end);
+	seq_printf(m, "%llu [%pa] gtt total\n",
+		   ggtt->base.total, &ggtt->mappable_end);
 	seq_printf(m, "Supported page sizes: %s\n",
 		   stringify_page_sizes(INTEL_INFO(dev_priv)->page_sizes,
 					buf, sizeof(buf)));
@@ -664,38 +645,6 @@ static int i915_gem_batch_pool_info(struct seq_file *m, void *data)
 	return 0;
 }
 
-static void i915_ring_seqno_info(struct seq_file *m,
-				 struct intel_engine_cs *engine)
-{
-	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct rb_node *rb;
-
-	seq_printf(m, "Current sequence (%s): %x\n",
-		   engine->name, intel_engine_get_seqno(engine));
-
-	spin_lock_irq(&b->rb_lock);
-	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
-		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
-
-		seq_printf(m, "Waiting (%s): %s [%d] on %x\n",
-			   engine->name, w->tsk->comm, w->tsk->pid, w->seqno);
-	}
-	spin_unlock_irq(&b->rb_lock);
-}
-
-static int i915_gem_seqno_info(struct seq_file *m, void *data)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		i915_ring_seqno_info(m, engine);
-
-	return 0;
-}
-
-
 static int i915_interrupt_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
@@ -896,13 +845,12 @@ static int i915_interrupt_info(struct seq_file *m, void *data)
 		seq_printf(m, "Graphics Interrupt mask:		%08x\n",
 			   I915_READ(GTIMR));
 	}
-	for_each_engine(engine, dev_priv, id) {
-		if (INTEL_GEN(dev_priv) >= 6) {
+	if (INTEL_GEN(dev_priv) >= 6) {
+		for_each_engine(engine, dev_priv, id) {
 			seq_printf(m,
 				   "Graphics Interrupt mask (%s):	%08x\n",
 				   engine->name, I915_READ_IMR(engine));
 		}
-		i915_ring_seqno_info(m, engine);
 	}
 	intel_runtime_pm_put(dev_priv);
 
@@ -1634,20 +1582,23 @@ static int i915_frontbuffer_tracking(struct seq_file *m, void *unused)
 static int i915_fbc_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
+	struct intel_fbc *fbc = &dev_priv->fbc;
 
-	if (!HAS_FBC(dev_priv)) {
-		seq_puts(m, "FBC unsupported on this chipset\n");
-		return 0;
-	}
+	if (!HAS_FBC(dev_priv))
+		return -ENODEV;
 
 	intel_runtime_pm_get(dev_priv);
-	mutex_lock(&dev_priv->fbc.lock);
+	mutex_lock(&fbc->lock);
 
 	if (intel_fbc_is_active(dev_priv))
 		seq_puts(m, "FBC enabled\n");
 	else
-		seq_printf(m, "FBC disabled: %s\n",
-			   dev_priv->fbc.no_fbc_reason);
+		seq_printf(m, "FBC disabled: %s\n", fbc->no_fbc_reason);
+
+	if (fbc->work.scheduled)
+		seq_printf(m, "FBC worker scheduled on vblank %u, now %llu\n",
+			   fbc->work.scheduled_vblank,
+			   drm_crtc_vblank_count(&fbc->crtc->base));
 
 	if (intel_fbc_is_active(dev_priv)) {
 		u32 mask;
@@ -1667,7 +1618,7 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 		seq_printf(m, "Compressing: %s\n", yesno(mask));
 	}
 
-	mutex_unlock(&dev_priv->fbc.lock);
+	mutex_unlock(&fbc->lock);
 	intel_runtime_pm_put(dev_priv);
 
 	return 0;
@@ -1714,10 +1665,8 @@ static int i915_ips_status(struct seq_file *m, void *unused)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 
-	if (!HAS_IPS(dev_priv)) {
-		seq_puts(m, "not supported\n");
-		return 0;
-	}
+	if (!HAS_IPS(dev_priv))
+		return -ENODEV;
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -1803,10 +1752,8 @@ static int i915_ring_freq_table(struct seq_file *m, void *unused)
 	int gpu_freq, ia_freq;
 	unsigned int max_gpu_freq, min_gpu_freq;
 
-	if (!HAS_LLC(dev_priv)) {
-		seq_puts(m, "unsupported on this chipset\n");
-		return 0;
-	}
+	if (!HAS_LLC(dev_priv))
+		return -ENODEV;
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -2286,8 +2233,8 @@ static int i915_huc_load_status_info(struct seq_file *m, void *data)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct drm_printer p;
 
-	if (!HAS_HUC_UCODE(dev_priv))
-		return 0;
+	if (!HAS_HUC(dev_priv))
+		return -ENODEV;
 
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->huc.fw, &p);
@@ -2305,8 +2252,8 @@ static int i915_guc_load_status_info(struct seq_file *m, void *data)
 	struct drm_printer p;
 	u32 tmp, i;
 
-	if (!HAS_GUC_UCODE(dev_priv))
-		return 0;
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
 
 	p = drm_seq_file_printer(m);
 	intel_uc_fw_dump(&dev_priv->guc.fw, &p);
@@ -2379,29 +2326,16 @@ static void i915_guc_client_info(struct seq_file *m,
 	seq_printf(m, "\tTotal: %llu\n", tot);
 }
 
-static bool check_guc_submission(struct seq_file *m)
-{
-	struct drm_i915_private *dev_priv = node_to_i915(m->private);
-	const struct intel_guc *guc = &dev_priv->guc;
-
-	if (!guc->execbuf_client) {
-		seq_printf(m, "GuC submission %s\n",
-			   HAS_GUC_SCHED(dev_priv) ?
-			   "disabled" :
-			   "not supported");
-		return false;
-	}
-
-	return true;
-}
-
 static int i915_guc_info(struct seq_file *m, void *data)
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const struct intel_guc *guc = &dev_priv->guc;
 
-	if (!check_guc_submission(m))
-		return 0;
+	if (!USES_GUC_SUBMISSION(dev_priv))
+		return -ENODEV;
+
+	GEM_BUG_ON(!guc->execbuf_client);
+	GEM_BUG_ON(!guc->preempt_client);
 
 	seq_printf(m, "Doorbell map:\n");
 	seq_printf(m, "\t%*pb\n", GUC_NUM_DOORBELLS, guc->doorbell_bitmap);
@@ -2428,8 +2362,8 @@ static int i915_guc_stage_pool(struct seq_file *m, void *data)
 	unsigned int tmp;
 	int index;
 
-	if (!check_guc_submission(m))
-		return 0;
+	if (!USES_GUC_SUBMISSION(dev_priv))
+		return -ENODEV;
 
 	for (index = 0; index < GUC_MAX_STAGE_DESCRIPTORS; index++, desc++) {
 		struct intel_engine_cs *engine;
@@ -2482,6 +2416,9 @@ static int i915_guc_log_dump(struct seq_file *m, void *data)
 	u32 *log;
 	int i = 0;
 
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
+
 	if (dump_load_err)
 		obj = dev_priv->guc.load_err_log;
 	else if (dev_priv->guc.log.vma)
@@ -2513,6 +2450,9 @@ static int i915_guc_log_control_get(void *data, u64 *val)
 {
 	struct drm_i915_private *dev_priv = data;
 
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
+
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
@@ -2526,6 +2466,9 @@ static int i915_guc_log_control_set(void *data, u64 val)
 	struct drm_i915_private *dev_priv = data;
 	int ret;
 
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
+
 	if (!dev_priv->guc.log.vma)
 		return -EINVAL;
 
@@ -2576,10 +2519,8 @@ static int i915_edp_psr_status(struct seq_file *m, void *data)
 	enum pipe pipe;
 	bool enabled = false;
 
-	if (!HAS_PSR(dev_priv)) {
-		seq_puts(m, "PSR not supported\n");
-		return 0;
-	}
+	if (!HAS_PSR(dev_priv))
+		return -ENODEV;
 
 	intel_runtime_pm_get(dev_priv);
 
@@ -2818,10 +2759,8 @@ static int i915_dmc_info(struct seq_file *m, void *unused)
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	struct intel_csr *csr;
 
-	if (!HAS_CSR(dev_priv)) {
-		seq_puts(m, "not supported\n");
-		return 0;
-	}
+	if (!HAS_CSR(dev_priv))
+		return -ENODEV;
 
 	csr = &dev_priv->csr;
 
@@ -3213,7 +3152,7 @@ static int i915_engine_info(struct seq_file *m, void *unused)
 
 	p = drm_seq_file_printer(m);
 	for_each_engine(engine, dev_priv, id)
-		intel_engine_dump(engine, &p);
+		intel_engine_dump(engine, &p, "%s\n", engine->name);
 
 	intel_runtime_pm_put(dev_priv);
 
@@ -3357,7 +3296,7 @@ static int i915_ddb_info(struct seq_file *m, void *unused)
 	int plane;
 
 	if (INTEL_GEN(dev_priv) < 9)
-		return 0;
+		return -ENODEV;
 
 	drm_modeset_lock_all(dev);
 
@@ -4672,7 +4611,6 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_gem_objects", i915_gem_object_info, 0},
 	{"i915_gem_gtt", i915_gem_gtt_info, 0},
 	{"i915_gem_stolen", i915_gem_stolen_list_info },
-	{"i915_gem_seqno", i915_gem_seqno_info, 0},
 	{"i915_gem_fence_regs", i915_gem_fence_regs_info, 0},
 	{"i915_gem_interrupt", i915_interrupt_info, 0},
 	{"i915_gem_batch_pool", i915_gem_batch_pool_info, 0},
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 7faf20aff25a..6c8da9d20c33 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -617,10 +617,12 @@ static void i915_gem_fini(struct drm_i915_private *dev_priv)
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
 	intel_uc_fini_hw(dev_priv);
+	intel_uc_fini(dev_priv);
 	i915_gem_cleanup_engines(dev_priv);
 	i915_gem_contexts_fini(dev_priv);
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	intel_uc_fini_wq(dev_priv);
 	i915_gem_cleanup_userptr(dev_priv);
 
 	i915_gem_drain_freed_objects(dev_priv);
@@ -726,7 +728,7 @@ static int i915_kick_out_firmware_fb(struct drm_i915_private *dev_priv)
 	if (!ap)
 		return -ENOMEM;
 
-	ap->ranges[0].base = ggtt->mappable_base;
+	ap->ranges[0].base = ggtt->gmadr.start;
 	ap->ranges[0].size = ggtt->mappable_end;
 
 	primary =
@@ -929,8 +931,6 @@ static int i915_driver_init_early(struct drm_i915_private *dev_priv,
 
 	intel_display_crc_init(dev_priv);
 
-	intel_device_info_dump(dev_priv);
-
 	intel_detect_preproduction_hw(dev_priv);
 
 	return 0;
@@ -1082,7 +1082,7 @@ static int i915_driver_init_hw(struct drm_i915_private *dev_priv)
 	if (i915_inject_load_failure())
 		return -ENODEV;
 
-	intel_device_info_runtime_init(dev_priv);
+	intel_device_info_runtime_init(mkwrite_device_info(dev_priv));
 
 	intel_sanitize_options(dev_priv);
 
@@ -1292,6 +1292,21 @@ static void i915_driver_unregister(struct drm_i915_private *dev_priv)
 	i915_gem_shrinker_unregister(dev_priv);
 }
 
+static void i915_welcome_messages(struct drm_i915_private *dev_priv)
+{
+	if (drm_debug & DRM_UT_DRIVER) {
+		struct drm_printer p = drm_debug_printer("i915 device info:");
+
+		intel_device_info_dump(&dev_priv->info, &p);
+		intel_device_info_dump_runtime(&dev_priv->info, &p);
+	}
+
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
+		DRM_INFO("DRM_I915_DEBUG enabled\n");
+	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
+		DRM_INFO("DRM_I915_DEBUG_GEM enabled\n");
+}
+
 /**
  * i915_driver_load - setup chip and create an initial config
  * @pdev: PCI device
@@ -1377,13 +1392,10 @@ int i915_driver_load(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	intel_init_ipc(dev_priv);
 
-	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG))
-		DRM_INFO("DRM_I915_DEBUG enabled\n");
-	if (IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM))
-		DRM_INFO("DRM_I915_DEBUG_GEM enabled\n");
-
 	intel_runtime_pm_put(dev_priv);
 
+	i915_welcome_messages(dev_priv);
+
 	return 0;
 
 out_cleanup_hw:
@@ -1897,13 +1909,16 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 	disable_irq(i915->drm.irq);
 	ret = i915_gem_reset_prepare(i915);
 	if (ret) {
-		DRM_ERROR("GPU recovery failed\n");
+		dev_err(i915->drm.dev, "GPU recovery failed\n");
 		intel_gpu_reset(i915, ALL_ENGINES);
-		goto error;
+		goto taint;
 	}
 
 	if (!intel_has_gpu_reset(i915)) {
-		DRM_DEBUG_DRIVER("GPU reset disabled\n");
+		if (i915_modparams.reset)
+			dev_err(i915->drm.dev, "GPU reset not supported\n");
+		else
+			DRM_DEBUG_DRIVER("GPU reset disabled\n");
 		goto error;
 	}
 
@@ -1916,12 +1931,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 	}
 	if (ret) {
 		dev_err(i915->drm.dev, "Failed to reset chip\n");
-		goto error;
+		goto taint;
 	}
 
-	i915_gem_reset(i915);
-	intel_overlay_reset(i915);
-
 	/* Ok, now get things going again... */
 
 	/*
@@ -1934,6 +1946,9 @@ void i915_reset(struct drm_i915_private *i915, unsigned int flags)
 		goto error;
 	}
 
+	i915_gem_reset(i915);
+	intel_overlay_reset(i915);
+
 	/*
 	 * Next we need to restore the context, but we don't use those
 	 * yet either...
@@ -1959,6 +1974,20 @@ wakeup:
 	wake_up_bit(&error->flags, I915_RESET_HANDOFF);
 	return;
 
+taint:
+	/*
+	 * History tells us that if we cannot reset the GPU now, we
+	 * never will. This then impacts everything that is run
+	 * subsequently. On failing the reset, we mark the driver
+	 * as wedged, preventing further execution on the GPU.
+	 * We also want to go one step further and add a taint to the
+	 * kernel so that any subsequent faults can be traced back to
+	 * this failure. This is important for CI, where if the
+	 * GPU/driver fails we would like to reboot and restart testing
+	 * rather than continue on into oblivion. For everyone else,
+	 * the system should still plod along, but they have been warned!
+	 */
+	add_taint(TAINT_WARN, LOCKDEP_STILL_OK);
 error:
 	i915_gem_set_wedged(i915);
 	i915_gem_retire_requests(i915);
@@ -1992,19 +2021,19 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
 
 	GEM_BUG_ON(!test_bit(I915_RESET_ENGINE + engine->id, &error->flags));
 
+	active_request = i915_gem_reset_prepare_engine(engine);
+	if (IS_ERR_OR_NULL(active_request)) {
+		/* Either the previous reset failed, or we pardon the reset. */
+		ret = PTR_ERR(active_request);
+		goto out;
+	}
+
 	if (!(flags & I915_RESET_QUIET)) {
 		dev_notice(engine->i915->drm.dev,
 			   "Resetting %s after gpu hang\n", engine->name);
 	}
 	error->reset_engine_count[engine->id]++;
 
-	active_request = i915_gem_reset_prepare_engine(engine);
-	if (IS_ERR(active_request)) {
-		DRM_DEBUG_DRIVER("Previous reset failed, promote to full reset\n");
-		ret = PTR_ERR(active_request);
-		goto out;
-	}
-
 	if (!engine->i915->guc.execbuf_client)
 		ret = intel_gt_reset_engine(engine->i915, engine);
 	else
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 594fd14e66c5..caebd5825279 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -56,12 +56,15 @@
 #include "i915_reg.h"
 #include "i915_utils.h"
 
-#include "intel_uncore.h"
 #include "intel_bios.h"
+#include "intel_device_info.h"
+#include "intel_display.h"
 #include "intel_dpll_mgr.h"
-#include "intel_uc.h"
 #include "intel_lrc.h"
+#include "intel_opregion.h"
 #include "intel_ringbuffer.h"
+#include "intel_uncore.h"
+#include "intel_uc.h"
 
 #include "i915_gem.h"
 #include "i915_gem_context.h"
@@ -80,8 +83,8 @@
 
 #define DRIVER_NAME		"i915"
 #define DRIVER_DESC		"Intel Graphics"
-#define DRIVER_DATE		"20171201"
-#define DRIVER_TIMESTAMP	1512176839
+#define DRIVER_DATE		"20171222"
+#define DRIVER_TIMESTAMP	1513971710
 
 /* Use I915_STATE_WARN(x) and I915_STATE_WARN_ON() (rather than WARN() and
  * WARN_ON()) for hw state sanity checks to check for unexpected conditions
@@ -243,173 +246,6 @@ static inline uint_fixed_16_16_t add_fixed16_u32(uint_fixed_16_16_t add1,
 	return clamp_u64_to_fixed16(interm_sum);
 }
 
-static inline const char *yesno(bool v)
-{
-	return v ? "yes" : "no";
-}
-
-static inline const char *onoff(bool v)
-{
-	return v ? "on" : "off";
-}
-
-static inline const char *enableddisabled(bool v)
-{
-	return v ? "enabled" : "disabled";
-}
-
-enum pipe {
-	INVALID_PIPE = -1,
-	PIPE_A = 0,
-	PIPE_B,
-	PIPE_C,
-	_PIPE_EDP,
-	I915_MAX_PIPES = _PIPE_EDP
-};
-#define pipe_name(p) ((p) + 'A')
-
-enum transcoder {
-	TRANSCODER_A = 0,
-	TRANSCODER_B,
-	TRANSCODER_C,
-	TRANSCODER_EDP,
-	TRANSCODER_DSI_A,
-	TRANSCODER_DSI_C,
-	I915_MAX_TRANSCODERS
-};
-
-static inline const char *transcoder_name(enum transcoder transcoder)
-{
-	switch (transcoder) {
-	case TRANSCODER_A:
-		return "A";
-	case TRANSCODER_B:
-		return "B";
-	case TRANSCODER_C:
-		return "C";
-	case TRANSCODER_EDP:
-		return "EDP";
-	case TRANSCODER_DSI_A:
-		return "DSI A";
-	case TRANSCODER_DSI_C:
-		return "DSI C";
-	default:
-		return "<invalid>";
-	}
-}
-
-static inline bool transcoder_is_dsi(enum transcoder transcoder)
-{
-	return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C;
-}
-
-/*
- * Global legacy plane identifier. Valid only for primary/sprite
- * planes on pre-g4x, and only for primary planes on g4x-bdw.
- */
-enum i9xx_plane_id {
-	PLANE_A,
-	PLANE_B,
-	PLANE_C,
-};
-#define plane_name(p) ((p) + 'A')
-
-#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A')
-
-/*
- * Per-pipe plane identifier.
- * I915_MAX_PLANES in the enum below is the maximum (across all platforms)
- * number of planes per CRTC.  Not all platforms really have this many planes,
- * which means some arrays of size I915_MAX_PLANES may have unused entries
- * between the topmost sprite plane and the cursor plane.
- *
- * This is expected to be passed to various register macros
- * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care.
- */
-enum plane_id {
-	PLANE_PRIMARY,
-	PLANE_SPRITE0,
-	PLANE_SPRITE1,
-	PLANE_SPRITE2,
-	PLANE_CURSOR,
-	I915_MAX_PLANES,
-};
-
-#define for_each_plane_id_on_crtc(__crtc, __p) \
-	for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
-		for_each_if ((__crtc)->plane_ids_mask & BIT(__p))
-
-enum port {
-	PORT_NONE = -1,
-	PORT_A = 0,
-	PORT_B,
-	PORT_C,
-	PORT_D,
-	PORT_E,
-	I915_MAX_PORTS
-};
-#define port_name(p) ((p) + 'A')
-
-#define I915_NUM_PHYS_VLV 2
-
-enum dpio_channel {
-	DPIO_CH0,
-	DPIO_CH1
-};
-
-enum dpio_phy {
-	DPIO_PHY0,
-	DPIO_PHY1,
-	DPIO_PHY2,
-};
-
-enum intel_display_power_domain {
-	POWER_DOMAIN_PIPE_A,
-	POWER_DOMAIN_PIPE_B,
-	POWER_DOMAIN_PIPE_C,
-	POWER_DOMAIN_PIPE_A_PANEL_FITTER,
-	POWER_DOMAIN_PIPE_B_PANEL_FITTER,
-	POWER_DOMAIN_PIPE_C_PANEL_FITTER,
-	POWER_DOMAIN_TRANSCODER_A,
-	POWER_DOMAIN_TRANSCODER_B,
-	POWER_DOMAIN_TRANSCODER_C,
-	POWER_DOMAIN_TRANSCODER_EDP,
-	POWER_DOMAIN_TRANSCODER_DSI_A,
-	POWER_DOMAIN_TRANSCODER_DSI_C,
-	POWER_DOMAIN_PORT_DDI_A_LANES,
-	POWER_DOMAIN_PORT_DDI_B_LANES,
-	POWER_DOMAIN_PORT_DDI_C_LANES,
-	POWER_DOMAIN_PORT_DDI_D_LANES,
-	POWER_DOMAIN_PORT_DDI_E_LANES,
-	POWER_DOMAIN_PORT_DDI_A_IO,
-	POWER_DOMAIN_PORT_DDI_B_IO,
-	POWER_DOMAIN_PORT_DDI_C_IO,
-	POWER_DOMAIN_PORT_DDI_D_IO,
-	POWER_DOMAIN_PORT_DDI_E_IO,
-	POWER_DOMAIN_PORT_DSI,
-	POWER_DOMAIN_PORT_CRT,
-	POWER_DOMAIN_PORT_OTHER,
-	POWER_DOMAIN_VGA,
-	POWER_DOMAIN_AUDIO,
-	POWER_DOMAIN_PLLS,
-	POWER_DOMAIN_AUX_A,
-	POWER_DOMAIN_AUX_B,
-	POWER_DOMAIN_AUX_C,
-	POWER_DOMAIN_AUX_D,
-	POWER_DOMAIN_GMBUS,
-	POWER_DOMAIN_MODESET,
-	POWER_DOMAIN_INIT,
-
-	POWER_DOMAIN_NUM,
-};
-
-#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
-#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
-		((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
-#define POWER_DOMAIN_TRANSCODER(tran) \
-	((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
-	 (tran) + POWER_DOMAIN_TRANSCODER_A)
-
 enum hpd_pin {
 	HPD_NONE = 0,
 	HPD_TV = HPD_NONE,     /* TV is known to be unreliable */
@@ -471,121 +307,6 @@ struct i915_hotplug {
 	 I915_GEM_DOMAIN_INSTRUCTION | \
 	 I915_GEM_DOMAIN_VERTEX)
 
-#define for_each_pipe(__dev_priv, __p) \
-	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
-#define for_each_pipe_masked(__dev_priv, __p, __mask) \
-	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
-		for_each_if ((__mask) & (1 << (__p)))
-#define for_each_universal_plane(__dev_priv, __pipe, __p)		\
-	for ((__p) = 0;							\
-	     (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1;	\
-	     (__p)++)
-#define for_each_sprite(__dev_priv, __p, __s)				\
-	for ((__s) = 0;							\
-	     (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)];	\
-	     (__s)++)
-
-#define for_each_port_masked(__port, __ports_mask) \
-	for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++)	\
-		for_each_if ((__ports_mask) & (1 << (__port)))
-
-#define for_each_crtc(dev, crtc) \
-	list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head)
-
-#define for_each_intel_plane(dev, intel_plane) \
-	list_for_each_entry(intel_plane,			\
-			    &(dev)->mode_config.plane_list,	\
-			    base.head)
-
-#define for_each_intel_plane_mask(dev, intel_plane, plane_mask)		\
-	list_for_each_entry(intel_plane,				\
-			    &(dev)->mode_config.plane_list,		\
-			    base.head)					\
-		for_each_if ((plane_mask) &				\
-			     (1 << drm_plane_index(&intel_plane->base)))
-
-#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
-	list_for_each_entry(intel_plane,				\
-			    &(dev)->mode_config.plane_list,		\
-			    base.head)					\
-		for_each_if ((intel_plane)->pipe == (intel_crtc)->pipe)
-
-#define for_each_intel_crtc(dev, intel_crtc)				\
-	list_for_each_entry(intel_crtc,					\
-			    &(dev)->mode_config.crtc_list,		\
-			    base.head)
-
-#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask)		\
-	list_for_each_entry(intel_crtc,					\
-			    &(dev)->mode_config.crtc_list,		\
-			    base.head)					\
-		for_each_if ((crtc_mask) & (1 << drm_crtc_index(&intel_crtc->base)))
-
-#define for_each_intel_encoder(dev, intel_encoder)		\
-	list_for_each_entry(intel_encoder,			\
-			    &(dev)->mode_config.encoder_list,	\
-			    base.head)
-
-#define for_each_intel_connector_iter(intel_connector, iter) \
-	while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter))))
-
-#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
-	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
-		for_each_if ((intel_encoder)->base.crtc == (__crtc))
-
-#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
-	list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
-		for_each_if ((intel_connector)->base.encoder == (__encoder))
-
-#define for_each_power_domain(domain, mask)				\
-	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
-		for_each_if (BIT_ULL(domain) & (mask))
-
-#define for_each_power_well(__dev_priv, __power_well)				\
-	for ((__power_well) = (__dev_priv)->power_domains.power_wells;	\
-	     (__power_well) - (__dev_priv)->power_domains.power_wells <	\
-		(__dev_priv)->power_domains.power_well_count;		\
-	     (__power_well)++)
-
-#define for_each_power_well_rev(__dev_priv, __power_well)			\
-	for ((__power_well) = (__dev_priv)->power_domains.power_wells +		\
-			      (__dev_priv)->power_domains.power_well_count - 1;	\
-	     (__power_well) - (__dev_priv)->power_domains.power_wells >= 0;	\
-	     (__power_well)--)
-
-#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask)	\
-	for_each_power_well(__dev_priv, __power_well)				\
-		for_each_if ((__power_well)->domains & (__domain_mask))
-
-#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \
-	for_each_power_well_rev(__dev_priv, __power_well)		        \
-		for_each_if ((__power_well)->domains & (__domain_mask))
-
-#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
-	for ((__i) = 0; \
-	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
-		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
-		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
-	     (__i)++) \
-		for_each_if (plane)
-
-#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
-	for ((__i) = 0; \
-	     (__i) < (__state)->base.dev->mode_config.num_crtc && \
-		     ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
-		      (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
-	     (__i)++) \
-		for_each_if (crtc)
-
-#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
-	for ((__i) = 0; \
-	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
-		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
-		      (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \
-		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
-	     (__i)++) \
-		for_each_if (plane)
-
 struct drm_i915_private;
 struct i915_mm_struct;
 struct i915_mmu_object;
@@ -622,20 +343,6 @@ struct drm_i915_file_private {
 	atomic_t context_bans;
 };
 
-/* Used by dp and fdi links */
-struct intel_link_m_n {
-	uint32_t	tu;
-	uint32_t	gmch_m;
-	uint32_t	gmch_n;
-	uint32_t	link_m;
-	uint32_t	link_n;
-};
-
-void intel_link_compute_m_n(int bpp, int nlanes,
-			    int pixel_clock, int link_clock,
-			    struct intel_link_m_n *m_n,
-			    bool reduce_m_n);
-
 /* Interface history:
  *
  * 1.1: Original.
@@ -650,27 +357,6 @@ void intel_link_compute_m_n(int bpp, int nlanes,
 #define DRIVER_MINOR		6
 #define DRIVER_PATCHLEVEL	0
 
-struct opregion_header;
-struct opregion_acpi;
-struct opregion_swsci;
-struct opregion_asle;
-
-struct intel_opregion {
-	struct opregion_header *header;
-	struct opregion_acpi *acpi;
-	struct opregion_swsci *swsci;
-	u32 swsci_gbda_sub_functions;
-	u32 swsci_sbcb_sub_functions;
-	struct opregion_asle *asle;
-	void *rvda;
-	void *vbt_firmware;
-	const void *vbt;
-	u32 vbt_size;
-	u32 *lid_state;
-	struct work_struct asle_work;
-};
-#define OPREGION_SIZE            (8*1024)
-
 struct intel_overlay;
 struct intel_overlay_error_state;
 
@@ -763,137 +449,6 @@ struct intel_csr {
 	uint32_t allowed_dc_mask;
 };
 
-#define DEV_INFO_FOR_EACH_FLAG(func) \
-	func(is_mobile); \
-	func(is_lp); \
-	func(is_alpha_support); \
-	/* Keep has_* in alphabetical order */ \
-	func(has_64bit_reloc); \
-	func(has_aliasing_ppgtt); \
-	func(has_csr); \
-	func(has_ddi); \
-	func(has_dp_mst); \
-	func(has_reset_engine); \
-	func(has_fbc); \
-	func(has_fpga_dbg); \
-	func(has_full_ppgtt); \
-	func(has_full_48bit_ppgtt); \
-	func(has_gmch_display); \
-	func(has_guc); \
-	func(has_guc_ct); \
-	func(has_hotplug); \
-	func(has_l3_dpf); \
-	func(has_llc); \
-	func(has_logical_ring_contexts); \
-	func(has_logical_ring_preemption); \
-	func(has_overlay); \
-	func(has_pooled_eu); \
-	func(has_psr); \
-	func(has_rc6); \
-	func(has_rc6p); \
-	func(has_resource_streamer); \
-	func(has_runtime_pm); \
-	func(has_snoop); \
-	func(unfenced_needs_alignment); \
-	func(cursor_needs_physical); \
-	func(hws_needs_physical); \
-	func(overlay_needs_physical); \
-	func(supports_tv); \
-	func(has_ipc);
-
-struct sseu_dev_info {
-	u8 slice_mask;
-	u8 subslice_mask;
-	u8 eu_total;
-	u8 eu_per_subslice;
-	u8 min_eu_in_pool;
-	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
-	u8 subslice_7eu[3];
-	u8 has_slice_pg:1;
-	u8 has_subslice_pg:1;
-	u8 has_eu_pg:1;
-};
-
-static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
-{
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
-}
-
-/* Keep in gen based order, and chronological order within a gen */
-enum intel_platform {
-	INTEL_PLATFORM_UNINITIALIZED = 0,
-	INTEL_I830,
-	INTEL_I845G,
-	INTEL_I85X,
-	INTEL_I865G,
-	INTEL_I915G,
-	INTEL_I915GM,
-	INTEL_I945G,
-	INTEL_I945GM,
-	INTEL_G33,
-	INTEL_PINEVIEW,
-	INTEL_I965G,
-	INTEL_I965GM,
-	INTEL_G45,
-	INTEL_GM45,
-	INTEL_IRONLAKE,
-	INTEL_SANDYBRIDGE,
-	INTEL_IVYBRIDGE,
-	INTEL_VALLEYVIEW,
-	INTEL_HASWELL,
-	INTEL_BROADWELL,
-	INTEL_CHERRYVIEW,
-	INTEL_SKYLAKE,
-	INTEL_BROXTON,
-	INTEL_KABYLAKE,
-	INTEL_GEMINILAKE,
-	INTEL_COFFEELAKE,
-	INTEL_CANNONLAKE,
-	INTEL_MAX_PLATFORMS
-};
-
-struct intel_device_info {
-	u16 device_id;
-	u16 gen_mask;
-
-	u8 gen;
-	u8 gt; /* GT number, 0 if undefined */
-	u8 num_rings;
-	u8 ring_mask; /* Rings supported by the HW */
-
-	enum intel_platform platform;
-	u32 platform_mask;
-
-	u32 display_mmio_offset;
-
-	u8 num_pipes;
-	u8 num_sprites[I915_MAX_PIPES];
-	u8 num_scalers[I915_MAX_PIPES];
-
-	unsigned int page_sizes; /* page sizes supported by the HW */
-
-#define DEFINE_FLAG(name) u8 name:1
-	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
-#undef DEFINE_FLAG
-	u16 ddb_size; /* in blocks */
-
-	/* Register offsets for the various display pipes and transcoders */
-	int pipe_offsets[I915_MAX_TRANSCODERS];
-	int trans_offsets[I915_MAX_TRANSCODERS];
-	int palette_offsets[I915_MAX_PIPES];
-	int cursor_offsets[I915_MAX_PIPES];
-
-	/* Slice/subslice/EU info */
-	struct sseu_dev_info sseu;
-
-	u32 cs_timestamp_frequency_khz;
-
-	struct color_luts {
-		u16 degamma_lut_size;
-		u16 gamma_lut_size;
-	} color;
-};
-
 struct intel_display_error_state;
 
 struct i915_gpu_state {
@@ -947,6 +502,7 @@ struct i915_gpu_state {
 	struct drm_i915_error_engine {
 		int engine_id;
 		/* Software tracked state */
+		bool idle;
 		bool waiting;
 		int num_waiters;
 		unsigned long hangcheck_timestamp;
@@ -1537,9 +1093,6 @@ struct i915_gem_mm {
 	 */
 	struct pagevec wc_stash;
 
-	/** Usable portion of the GTT for GEM */
-	dma_addr_t stolen_base; /* limited to low memory (32-bit) */
-
 	/**
 	 * tmpfs instance used for shmem backed objects
 	 */
@@ -1588,6 +1141,8 @@ struct drm_i915_error_state_buf {
 	loff_t pos;
 };
 
+#define I915_IDLE_ENGINES_TIMEOUT (200) /* in ms */
+
 #define I915_RESET_TIMEOUT (10 * HZ) /* 10s */
 #define I915_FENCE_TIMEOUT (10 * HZ) /* 10s */
 
@@ -2253,6 +1808,30 @@ struct drm_i915_private {
 
 	const struct intel_device_info info;
 
+	/**
+	 * Data Stolen Memory - aka "i915 stolen memory" gives us the start and
+	 * end of stolen which we can optionally use to create GEM objects
+	 * backed by stolen memory. Note that stolen_usable_size tells us
+	 * exactly how much of this we are actually allowed to use, given that
+	 * some portion of it is in fact reserved for use by hardware functions.
+	 */
+	struct resource dsm;
+	/**
+	 * Reseved portion of Data Stolen Memory
+	 */
+	struct resource dsm_reserved;
+
+	/*
+	 * Stolen memory is segmented in hardware with different portions
+	 * offlimits to certain functions.
+	 *
+	 * The drm_mm is initialised to the total accessible range, as found
+	 * from the PCI config. On Broadwell+, this is further restricted to
+	 * avoid the first page! The upper end of stolen memory is reserved for
+	 * hardware functions and similarly removed from the accessible range.
+	 */
+	resource_size_t stolen_usable_size;	/* Total size minus reserved ranges */
+
 	void __iomem *regs;
 
 	struct intel_uncore uncore;
@@ -2381,6 +1960,9 @@ struct drm_i915_private {
 	 */
 	struct workqueue_struct *wq;
 
+	/* ordered wq for modesets */
+	struct workqueue_struct *modeset_wq;
+
 	/* Display functions */
 	struct drm_i915_display_funcs display;
 
@@ -3234,8 +2816,16 @@ intel_info(const struct drm_i915_private *dev_priv)
 #define HAS_GUC_CT(dev_priv)	((dev_priv)->info.has_guc_ct)
 #define HAS_GUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_GUC_SCHED(dev_priv)	(HAS_GUC(dev_priv))
+
+/* For now, anything with a GuC has also HuC */
+#define HAS_HUC(dev_priv)	(HAS_GUC(dev_priv))
 #define HAS_HUC_UCODE(dev_priv)	(HAS_GUC(dev_priv))
 
+/* Having a GuC is not the same as using a GuC */
+#define USES_GUC(dev_priv)		intel_uc_is_using_guc()
+#define USES_GUC_SUBMISSION(dev_priv)	intel_uc_is_using_guc_submission()
+#define USES_HUC(dev_priv)		intel_uc_is_using_huc()
+
 #define HAS_RESOURCE_STREAMER(dev_priv) ((dev_priv)->info.has_resource_streamer)
 
 #define HAS_POOLED_EU(dev_priv)	((dev_priv)->info.has_pooled_eu)
@@ -3879,6 +3469,8 @@ int __must_check i915_gem_evict_for_node(struct i915_address_space *vm,
 					 unsigned int flags);
 int i915_gem_evict_vm(struct i915_address_space *vm);
 
+void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv);
+
 /* belongs in i915_gem_gtt.h */
 static inline void i915_gem_chipset_flush(struct drm_i915_private *dev_priv)
 {
@@ -3900,12 +3492,13 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
 int i915_gem_init_stolen(struct drm_i915_private *dev_priv);
 void i915_gem_cleanup_stolen(struct drm_device *dev);
 struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size);
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			      resource_size_t size);
 struct drm_i915_gem_object *
 i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
-					       u32 stolen_offset,
-					       u32 gtt_offset,
-					       u32 size);
+					       resource_size_t stolen_offset,
+					       resource_size_t gtt_offset,
+					       resource_size_t size);
 
 /* i915_gem_internal.c */
 struct drm_i915_gem_object *
@@ -4076,41 +3669,6 @@ bool intel_bios_is_port_hpd_inverted(struct drm_i915_private *dev_priv,
 bool intel_bios_is_lspcon_present(struct drm_i915_private *dev_priv,
 				enum port port);
 
-
-/* intel_opregion.c */
-#ifdef CONFIG_ACPI
-extern int intel_opregion_setup(struct drm_i915_private *dev_priv);
-extern void intel_opregion_register(struct drm_i915_private *dev_priv);
-extern void intel_opregion_unregister(struct drm_i915_private *dev_priv);
-extern void intel_opregion_asle_intr(struct drm_i915_private *dev_priv);
-extern int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
-					 bool enable);
-extern int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv,
-					 pci_power_t state);
-extern int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv);
-#else
-static inline int intel_opregion_setup(struct drm_i915_private *dev) { return 0; }
-static inline void intel_opregion_register(struct drm_i915_private *dev_priv) { }
-static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv) { }
-static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv)
-{
-}
-static inline int
-intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable)
-{
-	return 0;
-}
-static inline int
-intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state)
-{
-	return 0;
-}
-static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev)
-{
-	return -ENODEV;
-}
-#endif
-
 /* intel_acpi.c */
 #ifdef CONFIG_ACPI
 extern void intel_register_dsm_handler(void);
@@ -4127,10 +3685,6 @@ mkwrite_device_info(struct drm_i915_private *dev_priv)
 	return (struct intel_device_info *)&dev_priv->info;
 }
 
-const char *intel_platform_name(enum intel_platform platform);
-void intel_device_info_runtime_init(struct drm_i915_private *dev_priv);
-void intel_device_info_dump(struct drm_i915_private *dev_priv);
-
 /* modesetting */
 extern void intel_modeset_init_hw(struct drm_device *dev);
 extern int intel_modeset_init(struct drm_device *dev);
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index e083f242b8dc..ba9f67c256f4 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -330,17 +330,10 @@ int i915_gem_object_unbind(struct drm_i915_gem_object *obj)
 	 * must wait for all rendering to complete to the object (as unbinding
 	 * must anyway), and retire the requests.
 	 */
-	ret = i915_gem_object_wait(obj,
-				   I915_WAIT_INTERRUPTIBLE |
-				   I915_WAIT_LOCKED |
-				   I915_WAIT_ALL,
-				   MAX_SCHEDULE_TIMEOUT,
-				   NULL);
+	ret = i915_gem_object_set_to_cpu_domain(obj, false);
 	if (ret)
 		return ret;
 
-	i915_gem_retire_requests(to_i915(obj->base.dev));
-
 	while ((vma = list_first_entry_or_null(&obj->vma_list,
 					       struct i915_vma,
 					       obj_link))) {
@@ -673,17 +666,13 @@ fb_write_origin(struct drm_i915_gem_object *obj, unsigned int domain)
 		obj->frontbuffer_ggtt_origin : ORIGIN_CPU);
 }
 
-static void
-flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+void i915_gem_flush_ggtt_writes(struct drm_i915_private *dev_priv)
 {
-	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
-
-	if (!(obj->base.write_domain & flush_domains))
-		return;
-
-	/* No actual flushing is required for the GTT write domain.  Writes
-	 * to it "immediately" go to main memory as far as we know, so there's
-	 * no chipset flush.  It also doesn't land in render cache.
+	/*
+	 * No actual flushing is required for the GTT write domain for reads
+	 * from the GTT domain. Writes to it "immediately" go to main memory
+	 * as far as we know, so there's no chipset flush. It also doesn't
+	 * land in the GPU render cache.
 	 *
 	 * However, we do have to enforce the order so that all writes through
 	 * the GTT land before any writes to the device, such as updates to
@@ -694,22 +683,43 @@ flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
 	 * timing. This issue has only been observed when switching quickly
 	 * between GTT writes and CPU reads from inside the kernel on recent hw,
 	 * and it appears to only affect discrete GTT blocks (i.e. on LLC
-	 * system agents we cannot reproduce this behaviour).
+	 * system agents we cannot reproduce this behaviour, until Cannonlake
+	 * that was!).
 	 */
+
 	wmb();
 
+	intel_runtime_pm_get(dev_priv);
+	spin_lock_irq(&dev_priv->uncore.lock);
+
+	POSTING_READ_FW(RING_HEAD(RENDER_RING_BASE));
+
+	spin_unlock_irq(&dev_priv->uncore.lock);
+	intel_runtime_pm_put(dev_priv);
+}
+
+static void
+flush_write_domain(struct drm_i915_gem_object *obj, unsigned int flush_domains)
+{
+	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
+	struct i915_vma *vma;
+
+	if (!(obj->base.write_domain & flush_domains))
+		return;
+
 	switch (obj->base.write_domain) {
 	case I915_GEM_DOMAIN_GTT:
-		if (!HAS_LLC(dev_priv)) {
-			intel_runtime_pm_get(dev_priv);
-			spin_lock_irq(&dev_priv->uncore.lock);
-			POSTING_READ_FW(RING_HEAD(dev_priv->engine[RCS]->mmio_base));
-			spin_unlock_irq(&dev_priv->uncore.lock);
-			intel_runtime_pm_put(dev_priv);
-		}
+		i915_gem_flush_ggtt_writes(dev_priv);
 
 		intel_fb_obj_flush(obj,
 				   fb_write_origin(obj, I915_GEM_DOMAIN_GTT));
+
+		for_each_ggtt_vma(vma, obj) {
+			if (vma->iomap)
+				continue;
+
+			i915_vma_unset_ggtt_write(vma);
+		}
 		break;
 
 	case I915_GEM_DOMAIN_CPU:
@@ -1106,7 +1116,7 @@ i915_gem_gtt_pread(struct drm_i915_gem_object *obj,
 			page_base += offset & PAGE_MASK;
 		}
 
-		if (gtt_user_read(&ggtt->mappable, page_base, page_offset,
+		if (gtt_user_read(&ggtt->iomap, page_base, page_offset,
 				  user_data, page_length)) {
 			ret = -EFAULT;
 			break;
@@ -1314,7 +1324,7 @@ i915_gem_gtt_pwrite_fast(struct drm_i915_gem_object *obj,
 		 * If the object is non-shmem backed, we retry again with the
 		 * path that handles page fault.
 		 */
-		if (ggtt_write(&ggtt->mappable, page_base, page_offset,
+		if (ggtt_write(&ggtt->iomap, page_base, page_offset,
 			       user_data, page_length)) {
 			ret = -EFAULT;
 			break;
@@ -1556,10 +1566,7 @@ static void i915_gem_object_bump_inactive_ggtt(struct drm_i915_gem_object *obj)
 
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (!i915_vma_is_ggtt(vma))
-			break;
-
+	for_each_ggtt_vma(vma, obj) {
 		if (i915_vma_is_active(vma))
 			continue;
 
@@ -1960,9 +1967,9 @@ int i915_gem_fault(struct vm_fault *vmf)
 	/* Finally, remap it using the new GTT offset */
 	ret = remap_io_mapping(area,
 			       area->vm_start + (vma->ggtt_view.partial.offset << PAGE_SHIFT),
-			       (ggtt->mappable_base + vma->node.start) >> PAGE_SHIFT,
+			       (ggtt->gmadr.start + vma->node.start) >> PAGE_SHIFT,
 			       min_t(u64, vma->size, area->vm_end - area->vm_start),
-			       &ggtt->mappable);
+			       &ggtt->iomap);
 	if (ret)
 		goto err_fence;
 
@@ -1972,6 +1979,8 @@ int i915_gem_fault(struct vm_fault *vmf)
 		list_add(&obj->userfault_link, &dev_priv->mm.userfault_list);
 	GEM_BUG_ON(!obj->userfault_count);
 
+	i915_vma_set_ggtt_write(vma);
+
 err_fence:
 	i915_vma_unpin_fence(vma);
 err_unpin:
@@ -2036,12 +2045,8 @@ static void __i915_gem_object_release_mmap(struct drm_i915_gem_object *obj)
 	drm_vma_node_unmap(&obj->base.vma_node,
 			   obj->base.dev->anon_inode->i_mapping);
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (!i915_vma_is_ggtt(vma))
-			break;
-
+	for_each_ggtt_vma(vma, obj)
 		i915_vma_unset_userfault(vma);
-	}
 }
 
 /**
@@ -2591,7 +2596,7 @@ static int ____i915_gem_object_get_pages(struct drm_i915_gem_object *obj)
 	}
 
 	err = obj->ops->get_pages(obj);
-	GEM_BUG_ON(!err && IS_ERR_OR_NULL(obj->mm.pages));
+	GEM_BUG_ON(!err && !i915_gem_object_has_pages(obj));
 
 	return err;
 }
@@ -3084,7 +3089,12 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
 void i915_gem_reset_engine(struct intel_engine_cs *engine,
 			   struct drm_i915_gem_request *request)
 {
-	engine->irq_posted = 0;
+	/*
+	 * Make sure this write is visible before we re-enable the interrupt
+	 * handlers on another CPU, as tasklet_enable() resolves to just
+	 * a compiler barrier which is insufficient for our purpose here.
+	 */
+	smp_store_mb(engine->irq_posted, 0);
 
 	if (request)
 		request = i915_gem_reset_request(engine, request);
@@ -3114,6 +3124,25 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
 		ctx = fetch_and_zero(&engine->last_retired_context);
 		if (ctx)
 			engine->context_unpin(engine, ctx);
+
+		/*
+		 * Ostensibily, we always want a context loaded for powersaving,
+		 * so if the engine is idle after the reset, send a request
+		 * to load our scratch kernel_context.
+		 *
+		 * More mysteriously, if we leave the engine idle after a reset,
+		 * the next userspace batch may hang, with what appears to be
+		 * an incoherent read by the CS (presumably stale TLB). An
+		 * empty request appears sufficient to paper over the glitch.
+		 */
+		if (list_empty(&engine->timeline->requests)) {
+			struct drm_i915_gem_request *rq;
+
+			rq = i915_gem_request_alloc(engine,
+						    dev_priv->kernel_context);
+			if (!IS_ERR(rq))
+				__i915_add_request(rq, false);
+		}
 	}
 
 	i915_gem_restore_fences(dev_priv);
@@ -3328,7 +3357,7 @@ i915_gem_idle_work_handler(struct work_struct *work)
 	 * Wait for last execlists context complete, but bail out in case a
 	 * new request is submitted.
 	 */
-	end = ktime_add_ms(ktime_get(), 200);
+	end = ktime_add_ms(ktime_get(), I915_IDLE_ENGINES_TIMEOUT);
 	do {
 		if (new_requests_since_last_retire(dev_priv))
 			return;
@@ -3381,6 +3410,9 @@ i915_gem_idle_work_handler(struct work_struct *work)
 
 	if (INTEL_GEN(dev_priv) >= 6)
 		gen6_rps_idle(dev_priv);
+
+	intel_display_power_put(dev_priv, POWER_DOMAIN_GT_IRQ);
+
 	intel_runtime_pm_put(dev_priv);
 out_unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
@@ -3525,8 +3557,19 @@ static int wait_for_timeline(struct i915_gem_timeline *tl, unsigned int flags)
 
 static int wait_for_engines(struct drm_i915_private *i915)
 {
-	if (wait_for(intel_engines_are_idle(i915), 50)) {
-		DRM_ERROR("Failed to idle engines, declaring wedged!\n");
+	if (wait_for(intel_engines_are_idle(i915), I915_IDLE_ENGINES_TIMEOUT)) {
+		dev_err(i915->drm.dev,
+			"Failed to idle engines, declaring wedged!\n");
+		if (drm_debug & DRM_UT_DRIVER) {
+			struct drm_printer p = drm_debug_printer(__func__);
+			struct intel_engine_cs *engine;
+			enum intel_engine_id id;
+
+			for_each_engine(engine, i915, id)
+				intel_engine_dump(engine, &p,
+						  "%s", engine->name);
+		}
+
 		i915_gem_set_wedged(i915);
 		return -EIO;
 	}
@@ -3552,9 +3595,7 @@ int i915_gem_wait_for_idle(struct drm_i915_private *i915, unsigned int flags)
 			if (ret)
 				return ret;
 		}
-
 		i915_gem_retire_requests(i915);
-		GEM_BUG_ON(i915->gt.active_requests);
 
 		ret = wait_for_engines(i915);
 	} else {
@@ -3753,7 +3794,8 @@ restart:
 			return -EBUSY;
 		}
 
-		if (i915_gem_valid_gtt_space(vma, cache_level))
+		if (!i915_vma_is_closed(vma) &&
+		    i915_gem_valid_gtt_space(vma, cache_level))
 			continue;
 
 		ret = i915_vma_unbind(vma);
@@ -3806,7 +3848,7 @@ restart:
 			 * dropped the fence as all snoopable access is
 			 * supposed to be linear.
 			 */
-			list_for_each_entry(vma, &obj->vma_list, obj_link) {
+			for_each_ggtt_vma(vma, obj) {
 				ret = i915_vma_put_fence(vma);
 				if (ret)
 					return ret;
@@ -4847,7 +4889,8 @@ void i915_gem_resume(struct drm_i915_private *i915)
 	i915_gem_restore_gtt_mappings(i915);
 	i915_gem_restore_fences(i915);
 
-	/* As we didn't flush the kernel context before suspend, we cannot
+	/*
+	 * As we didn't flush the kernel context before suspend, we cannot
 	 * guarantee that the context image is complete. So let's just reset
 	 * it and start again.
 	 */
@@ -4868,8 +4911,10 @@ out_unlock:
 	return;
 
 err_wedged:
-	DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
-	i915_gem_set_wedged(i915);
+	if (!i915_terminally_wedged(&i915->gpu_error)) {
+		DRM_ERROR("failed to re-initialize GPU, declaring wedged!\n");
+		i915_gem_set_wedged(i915);
+	}
 	goto out_unlock;
 }
 
@@ -5142,6 +5187,10 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	if (ret)
 		return ret;
 
+	ret = intel_uc_init_wq(dev_priv);
+	if (ret)
+		return ret;
+
 	/* This is just a security blanket to placate dragons.
 	 * On some systems, we very sporadically observe that the first TLBs
 	 * used by the CS may be stale, despite us poking the TLB reset. If
@@ -5152,22 +5201,32 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_uncore_forcewake_get(dev_priv, FORCEWAKE_ALL);
 
 	ret = i915_gem_init_ggtt(dev_priv);
-	if (ret)
-		goto out_unlock;
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_unlock;
+	}
 
 	ret = i915_gem_contexts_init(dev_priv);
-	if (ret)
-		goto out_unlock;
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_ggtt;
+	}
 
 	ret = intel_engines_init(dev_priv);
-	if (ret)
-		goto out_unlock;
+	if (ret) {
+		GEM_BUG_ON(ret == -EIO);
+		goto err_context;
+	}
 
 	intel_init_gt_powersave(dev_priv);
 
+	ret = intel_uc_init(dev_priv);
+	if (ret)
+		goto err_pm;
+
 	ret = i915_gem_init_hw(dev_priv);
 	if (ret)
-		goto out_unlock;
+		goto err_uc_init;
 
 	/*
 	 * Despite its name intel_init_clock_gating applies both display
@@ -5181,9 +5240,55 @@ int i915_gem_init(struct drm_i915_private *dev_priv)
 	intel_init_clock_gating(dev_priv);
 
 	ret = __intel_engines_record_defaults(dev_priv);
-out_unlock:
+	if (ret)
+		goto err_init_hw;
+
+	if (i915_inject_load_failure()) {
+		ret = -ENODEV;
+		goto err_init_hw;
+	}
+
+	if (i915_inject_load_failure()) {
+		ret = -EIO;
+		goto err_init_hw;
+	}
+
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	return 0;
+
+	/*
+	 * Unwinding is complicated by that we want to handle -EIO to mean
+	 * disable GPU submission but keep KMS alive. We want to mark the
+	 * HW as irrevisibly wedged, but keep enough state around that the
+	 * driver doesn't explode during runtime.
+	 */
+err_init_hw:
+	i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED);
+	i915_gem_contexts_lost(dev_priv);
+	intel_uc_fini_hw(dev_priv);
+err_uc_init:
+	intel_uc_fini(dev_priv);
+err_pm:
+	if (ret != -EIO) {
+		intel_cleanup_gt_powersave(dev_priv);
+		i915_gem_cleanup_engines(dev_priv);
+	}
+err_context:
+	if (ret != -EIO)
+		i915_gem_contexts_fini(dev_priv);
+err_ggtt:
+err_unlock:
+	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
+	mutex_unlock(&dev_priv->drm.struct_mutex);
+
+	if (ret != -EIO)
+		i915_gem_cleanup_userptr(dev_priv);
+
 	if (ret == -EIO) {
-		/* Allow engine initialisation to fail by marking the GPU as
+		/*
+		 * Allow engine initialisation to fail by marking the GPU as
 		 * wedged. But we only want to do this where the GPU is angry,
 		 * for all other failure, such as an allocation failure, bail.
 		 */
@@ -5193,9 +5298,8 @@ out_unlock:
 		}
 		ret = 0;
 	}
-	intel_uncore_forcewake_put(dev_priv, FORCEWAKE_ALL);
-	mutex_unlock(&dev_priv->drm.struct_mutex);
 
+	i915_gem_drain_freed_objects(dev_priv);
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_gem_clflush.c b/drivers/gpu/drm/i915/i915_gem_clflush.c
index f663cd919795..b9b53ac14176 100644
--- a/drivers/gpu/drm/i915/i915_gem_clflush.c
+++ b/drivers/gpu/drm/i915/i915_gem_clflush.c
@@ -167,7 +167,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj,
 		i915_sw_fence_await_reservation(&clflush->wait,
 						obj->resv, NULL,
 						true, I915_FENCE_TIMEOUT,
-						GFP_KERNEL);
+						I915_FENCE_GFP);
 
 		reservation_object_lock(obj->resv, NULL);
 		reservation_object_add_excl_fence(obj->resv, &clflush->dma);
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index ce3139e5ec4c..648e7536ff51 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -316,7 +316,7 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	 * present or not in use we still need a small bias as ring wraparound
 	 * at offset 0 sometimes hangs. No idea why.
 	 */
-	if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading)
+	if (USES_GUC(dev_priv))
 		ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
 	else
 		ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
@@ -409,7 +409,7 @@ i915_gem_context_create_gvt(struct drm_device *dev)
 	i915_gem_context_set_closed(ctx); /* not user accessible */
 	i915_gem_context_clear_bannable(ctx);
 	i915_gem_context_set_force_single_submission(ctx);
-	if (!i915_modparams.enable_guc_submission)
+	if (!USES_GUC_SUBMISSION(to_i915(dev)))
 		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
 
 	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
@@ -617,7 +617,7 @@ int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
 			if (prev)
 				i915_sw_fence_await_sw_fence_gfp(&req->submit,
 								 &prev->submit,
-								 GFP_KERNEL);
+								 I915_FENCE_GFP);
 		}
 
 		/*
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index 70ccd63cbf8e..4401068ff468 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1012,7 +1012,7 @@ static void *reloc_iomap(struct drm_i915_gem_object *obj,
 		offset += page << PAGE_SHIFT;
 	}
 
-	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->mappable,
+	vaddr = (void __force *)io_mapping_map_atomic_wc(&ggtt->iomap,
 							 offset);
 	cache->page = page;
 	cache->vaddr = (unsigned long)vaddr;
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index f3c35e826321..c5f393870532 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -2912,7 +2912,7 @@ void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
 	arch_phys_wc_del(ggtt->mtrr);
-	io_mapping_fini(&ggtt->mappable);
+	io_mapping_fini(&ggtt->iomap);
 }
 
 static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
@@ -2949,50 +2949,6 @@ static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
 	return 0;
 }
 
-static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
-{
-	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
-	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
-	return (size_t)snb_gmch_ctl << 25; /* 32 MB units */
-}
-
-static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
-{
-	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
-	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
-	return (size_t)bdw_gmch_ctl << 25; /* 32 MB units */
-}
-
-static size_t chv_get_stolen_size(u16 gmch_ctrl)
-{
-	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GMS_MASK;
-
-	/*
-	 * 0x0  to 0x10: 32MB increments starting at 0MB
-	 * 0x11 to 0x16: 4MB increments starting at 8MB
-	 * 0x17 to 0x1d: 4MB increments start at 36MB
-	 */
-	if (gmch_ctrl < 0x11)
-		return (size_t)gmch_ctrl << 25;
-	else if (gmch_ctrl < 0x17)
-		return (size_t)(gmch_ctrl - 0x11 + 2) << 22;
-	else
-		return (size_t)(gmch_ctrl - 0x17 + 9) << 22;
-}
-
-static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
-{
-	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
-	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;
-
-	if (gen9_gmch_ctl < 0xf0)
-		return (size_t)gen9_gmch_ctl << 25; /* 32 MB units */
-	else
-		/* 4MB increments starting at 0xf0 for 4MB */
-		return (size_t)(gen9_gmch_ctl - 0xf0 + 1) << 22;
-}
-
 static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
 {
 	struct drm_i915_private *dev_priv = ggtt->base.i915;
@@ -3332,8 +3288,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	int err;
 
 	/* TODO: We're not aware of mappable constraints on gen8 yet */
-	ggtt->mappable_base = pci_resource_start(pdev, 2);
-	ggtt->mappable_end = pci_resource_len(pdev, 2);
+	ggtt->gmadr =
+		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
+						 pci_resource_len(pdev, 2));
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(39));
 	if (!err)
@@ -3344,13 +3302,10 @@ static int gen8_gmch_probe(struct i915_ggtt *ggtt)
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
 	if (INTEL_GEN(dev_priv) >= 9) {
-		ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
 	} else if (IS_CHERRYVIEW(dev_priv)) {
-		ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
 		size = chv_get_total_gtt_size(snb_gmch_ctl);
 	} else {
-		ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
 		size = gen8_get_total_gtt_size(snb_gmch_ctl);
 	}
 
@@ -3390,14 +3345,16 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 	u16 snb_gmch_ctl;
 	int err;
 
-	ggtt->mappable_base = pci_resource_start(pdev, 2);
-	ggtt->mappable_end = pci_resource_len(pdev, 2);
+	ggtt->gmadr =
+		(struct resource) DEFINE_RES_MEM(pci_resource_start(pdev, 2),
+						 pci_resource_len(pdev, 2));
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
 
 	/* 64/512MB is the current min/max we actually know of, but this is just
 	 * a coarse sanity check.
 	 */
 	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
-		DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
+		DRM_ERROR("Unknown GMADR size (%pa)\n", &ggtt->mappable_end);
 		return -ENXIO;
 	}
 
@@ -3408,8 +3365,6 @@ static int gen6_gmch_probe(struct i915_ggtt *ggtt)
 		DRM_ERROR("Can't set DMA mask/consistent mask (%d)\n", err);
 	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
 
-	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
-
 	size = gen6_get_total_gtt_size(snb_gmch_ctl);
 	ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
 
@@ -3446,6 +3401,7 @@ static void i915_gmch_remove(struct i915_address_space *vm)
 static int i915_gmch_probe(struct i915_ggtt *ggtt)
 {
 	struct drm_i915_private *dev_priv = ggtt->base.i915;
+	phys_addr_t gmadr_base;
 	int ret;
 
 	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
@@ -3455,10 +3411,13 @@ static int i915_gmch_probe(struct i915_ggtt *ggtt)
 	}
 
 	intel_gtt_get(&ggtt->base.total,
-		      &ggtt->stolen_size,
-		      &ggtt->mappable_base,
+		      &gmadr_base,
 		      &ggtt->mappable_end);
 
+	ggtt->gmadr =
+		(struct resource) DEFINE_RES_MEM(gmadr_base,
+						 ggtt->mappable_end);
+
 	ggtt->do_idle_maps = needs_idle_maps(dev_priv);
 	ggtt->base.insert_page = i915_ggtt_insert_page;
 	ggtt->base.insert_entries = i915_ggtt_insert_entries;
@@ -3503,9 +3462,9 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 	 * currently don't have any bits spare to pass in this upper
 	 * restriction!
 	 */
-	if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading) {
+	if (USES_GUC(dev_priv)) {
 		ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
-		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
+		ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
 	}
 
 	if ((ggtt->base.total - 1) >> 32) {
@@ -3513,21 +3472,21 @@ int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
 			  " of address space! Found %lldM!\n",
 			  ggtt->base.total >> 20);
 		ggtt->base.total = 1ULL << 32;
-		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
+		ggtt->mappable_end = min_t(u64, ggtt->mappable_end, ggtt->base.total);
 	}
 
 	if (ggtt->mappable_end > ggtt->base.total) {
 		DRM_ERROR("mappable aperture extends past end of GGTT,"
-			  " aperture=%llx, total=%llx\n",
-			  ggtt->mappable_end, ggtt->base.total);
+			  " aperture=%pa, total=%llx\n",
+			  &ggtt->mappable_end, ggtt->base.total);
 		ggtt->mappable_end = ggtt->base.total;
 	}
 
 	/* GMADR is the PCI mmio aperture into the global GTT. */
-	DRM_INFO("Memory usable by graphics device = %lluM\n",
-		 ggtt->base.total >> 20);
-	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
-	DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
+	DRM_DEBUG_DRIVER("GGTT size = %lluM\n", ggtt->base.total >> 20);
+	DRM_DEBUG_DRIVER("GMADR size = %lluM\n", (u64)ggtt->mappable_end >> 20);
+	DRM_DEBUG_DRIVER("DSM size = %lluM\n",
+			 (u64)resource_size(&intel_graphics_stolen_res) >> 20);
 	if (intel_vtd_active())
 		DRM_INFO("VT-d active for gfx access\n");
 
@@ -3556,14 +3515,14 @@ int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
 		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 
-	if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
-				dev_priv->ggtt.mappable_base,
+	if (!io_mapping_init_wc(&dev_priv->ggtt.iomap,
+				dev_priv->ggtt.gmadr.start,
 				dev_priv->ggtt.mappable_end)) {
 		ret = -EIO;
 		goto out_gtt_cleanup;
 	}
 
-	ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);
+	ggtt->mtrr = arch_phys_wc_add(ggtt->gmadr.start, ggtt->mappable_end);
 
 	/*
 	 * Initialise stolen early so that we may reserve preallocated
@@ -3593,6 +3552,8 @@ void i915_ggtt_enable_guc(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->ggtt.invalidate != gen6_ggtt_invalidate);
 
 	i915->ggtt.invalidate = guc_ggtt_invalidate;
+
+	i915_ggtt_invalidate(i915);
 }
 
 void i915_ggtt_disable_guc(struct drm_i915_private *i915)
@@ -3601,6 +3562,8 @@ void i915_ggtt_disable_guc(struct drm_i915_private *i915)
 	GEM_BUG_ON(i915->ggtt.invalidate != guc_ggtt_invalidate);
 
 	i915->ggtt.invalidate = gen6_ggtt_invalidate;
+
+	i915_ggtt_invalidate(i915);
 }
 
 void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
@@ -3620,10 +3583,7 @@ void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
 		bool ggtt_bound = false;
 		struct i915_vma *vma;
 
-		list_for_each_entry(vma, &obj->vma_list, obj_link) {
-			if (vma->vm != &ggtt->base)
-				continue;
-
+		for_each_ggtt_vma(vma, obj) {
 			if (!i915_vma_unbind(vma))
 				continue;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.h b/drivers/gpu/drm/i915/i915_gem_gtt.h
index 93211a96fdad..a42890d9af38 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.h
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.h
@@ -368,23 +368,10 @@ i915_vm_has_scratch_64K(struct i915_address_space *vm)
  */
 struct i915_ggtt {
 	struct i915_address_space base;
-	struct io_mapping mappable;	/* Mapping to our CPU mappable region */
 
-	phys_addr_t mappable_base;	/* PA of our GMADR */
-	u64 mappable_end;		/* End offset that we can CPU map */
-
-	/* Stolen memory is segmented in hardware with different portions
-	 * offlimits to certain functions.
-	 *
-	 * The drm_mm is initialised to the total accessible range, as found
-	 * from the PCI config. On Broadwell+, this is further restricted to
-	 * avoid the first page! The upper end of stolen memory is reserved for
-	 * hardware functions and similarly removed from the accessible range.
-	 */
-	u32 stolen_size;		/* Total size of stolen memory */
-	u32 stolen_usable_size;	/* Total size minus reserved ranges */
-	u32 stolen_reserved_base;
-	u32 stolen_reserved_size;
+	struct io_mapping iomap;	/* Mapping to our CPU mappable region */
+	struct resource gmadr;          /* GMADR resource */
+	resource_size_t mappable_end;	/* End offset that we can CPU map */
 
 	/** "Graphics Stolen Memory" holds the global PTEs */
 	void __iomem *gsm;
diff --git a/drivers/gpu/drm/i915/i915_gem_internal.c b/drivers/gpu/drm/i915/i915_gem_internal.c
index ee83ec838ee7..a1d6956734f7 100644
--- a/drivers/gpu/drm/i915/i915_gem_internal.c
+++ b/drivers/gpu/drm/i915/i915_gem_internal.c
@@ -27,6 +27,7 @@
 #include "i915_drv.h"
 
 #define QUIET (__GFP_NORETRY | __GFP_NOWARN)
+#define MAYFAIL (__GFP_RETRY_MAYFAIL | __GFP_NOWARN)
 
 /* convert swiotlb segment size into sensible units (pages)! */
 #define IO_TLB_SEGPAGES (IO_TLB_SEGSIZE << IO_TLB_SHIFT >> PAGE_SHIFT)
@@ -95,7 +96,8 @@ create_st:
 		struct page *page;
 
 		do {
-			page = alloc_pages(gfp | (order ? QUIET : 0), order);
+			page = alloc_pages(gfp | (order ? QUIET : MAYFAIL),
+					   order);
 			if (page)
 				break;
 			if (!order--)
diff --git a/drivers/gpu/drm/i915/i915_gem_object.h b/drivers/gpu/drm/i915/i915_gem_object.h
index 19fb28c177d8..05e89e1c0a08 100644
--- a/drivers/gpu/drm/i915/i915_gem_object.h
+++ b/drivers/gpu/drm/i915/i915_gem_object.h
@@ -261,6 +261,8 @@ struct drm_i915_gem_object {
 		} userptr;
 
 		unsigned long scratch;
+
+		void *gvt_info;
 	};
 
 	/** for phys allocated objects */
diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c
index a90bdd26571f..d575109f7a7f 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.c
+++ b/drivers/gpu/drm/i915/i915_gem_request.c
@@ -252,6 +252,20 @@ static void mark_busy(struct drm_i915_private *i915)
 	GEM_BUG_ON(!i915->gt.active_requests);
 
 	intel_runtime_pm_get_noresume(i915);
+
+	/*
+	 * It seems that the DMC likes to transition between the DC states a lot
+	 * when there are no connected displays (no active power domains) during
+	 * command submission.
+	 *
+	 * This activity has negative impact on the performance of the chip with
+	 * huge latencies observed in the interrupt handler and elsewhere.
+	 *
+	 * Work around it by grabbing a GT IRQ power domain whilst there is any
+	 * GT activity, preventing any DC state transitions.
+	 */
+	intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ);
+
 	i915->gt.awake = true;
 
 	intel_enable_gt_powersave(i915);
@@ -465,6 +479,7 @@ void __i915_gem_request_submit(struct drm_i915_gem_request *request)
 	/* Transfer from per-context onto the global per-engine timeline */
 	timeline = engine->timeline;
 	GEM_BUG_ON(timeline == request->timeline);
+	GEM_BUG_ON(request->global_seqno);
 
 	seqno = timeline_get_seqno(timeline);
 	GEM_BUG_ON(!seqno);
@@ -511,6 +526,7 @@ void __i915_gem_request_unsubmit(struct drm_i915_gem_request *request)
 	/* Only unwind in reverse order, required so that the per-context list
 	 * is kept in seqno/ring order.
 	 */
+	GEM_BUG_ON(!request->global_seqno);
 	GEM_BUG_ON(request->global_seqno != engine->timeline->seqno);
 	engine->timeline->seqno--;
 
@@ -663,10 +679,21 @@ i915_gem_request_alloc(struct intel_engine_cs *engine,
 	 *
 	 * Do not use kmem_cache_zalloc() here!
 	 */
-	req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
-	if (!req) {
-		ret = -ENOMEM;
-		goto err_unreserve;
+	req = kmem_cache_alloc(dev_priv->requests,
+			       GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+	if (unlikely(!req)) {
+		/* Ratelimit ourselves to prevent oom from malicious clients */
+		ret = i915_gem_wait_for_idle(dev_priv,
+					     I915_WAIT_LOCKED |
+					     I915_WAIT_INTERRUPTIBLE);
+		if (ret)
+			goto err_unreserve;
+
+		req = kmem_cache_alloc(dev_priv->requests, GFP_KERNEL);
+		if (!req) {
+			ret = -ENOMEM;
+			goto err_unreserve;
+		}
 	}
 
 	req->timeline = i915_gem_context_lookup_timeline(ctx, engine);
@@ -768,7 +795,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 	if (to->engine == from->engine) {
 		ret = i915_sw_fence_await_sw_fence_gfp(&to->submit,
 						       &from->submit,
-						       GFP_KERNEL);
+						       I915_FENCE_GFP);
 		return ret < 0 ? ret : 0;
 	}
 
@@ -796,7 +823,7 @@ i915_gem_request_await_request(struct drm_i915_gem_request *to,
 await_dma_fence:
 	ret = i915_sw_fence_await_dma_fence(&to->submit,
 					    &from->fence, 0,
-					    GFP_KERNEL);
+					    I915_FENCE_GFP);
 	return ret < 0 ? ret : 0;
 }
 
@@ -847,7 +874,7 @@ i915_gem_request_await_dma_fence(struct drm_i915_gem_request *req,
 		else
 			ret = i915_sw_fence_await_dma_fence(&req->submit, fence,
 							    I915_FENCE_TIMEOUT,
-							    GFP_KERNEL);
+							    I915_FENCE_GFP);
 		if (ret < 0)
 			return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_gem_request.h b/drivers/gpu/drm/i915/i915_gem_request.h
index 26249f39de67..0d6d39f19506 100644
--- a/drivers/gpu/drm/i915/i915_gem_request.h
+++ b/drivers/gpu/drm/i915/i915_gem_request.h
@@ -204,6 +204,8 @@ struct drm_i915_gem_request {
 	struct list_head client_link;
 };
 
+#define I915_FENCE_GFP (GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN)
+
 extern const struct dma_fence_ops i915_fence_ops;
 
 static inline bool dma_fence_is_i915(const struct dma_fence *fence)
diff --git a/drivers/gpu/drm/i915/i915_gem_stolen.c b/drivers/gpu/drm/i915/i915_gem_stolen.c
index 1877ae9a1d9b..d3f222fa6356 100644
--- a/drivers/gpu/drm/i915/i915_gem_stolen.c
+++ b/drivers/gpu/drm/i915/i915_gem_stolen.c
@@ -30,9 +30,6 @@
 #include <drm/i915_drm.h>
 #include "i915_drv.h"
 
-#define KB(x) ((x) * 1024)
-#define MB(x) (KB(x) * 1024)
-
 /*
  * The BIOS typically reserves some of the system's memory for the exclusive
  * use of the integrated graphics. This memory is no longer available for
@@ -79,129 +76,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *dev_priv,
 	mutex_unlock(&dev_priv->mm.stolen_lock);
 }
 
-static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv)
+static int i915_adjust_stolen(struct drm_i915_private *dev_priv,
+			      struct resource *dsm)
 {
-	struct pci_dev *pdev = dev_priv->drm.pdev;
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct resource *r;
-	dma_addr_t base;
-
-	/* Almost universally we can find the Graphics Base of Stolen Memory
-	 * at register BSM (0x5c) in the igfx configuration space. On a few
-	 * (desktop) machines this is also mirrored in the bridge device at
-	 * different locations, or in the MCHBAR.
-	 *
-	 * On 865 we just check the TOUD register.
-	 *
-	 * On 830/845/85x the stolen memory base isn't available in any
-	 * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size.
-	 *
-	 */
-	base = 0;
-	if (INTEL_GEN(dev_priv) >= 3) {
-		u32 bsm;
-
-		pci_read_config_dword(pdev, INTEL_BSM, &bsm);
-
-		base = bsm & INTEL_BSM_MASK;
-	} else if (IS_I865G(dev_priv)) {
-		u32 tseg_size = 0;
-		u16 toud = 0;
-		u8 tmp;
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I845_ESMRAMC, &tmp);
-
-		if (tmp & TSEG_ENABLE) {
-			switch (tmp & I845_TSEG_SIZE_MASK) {
-			case I845_TSEG_SIZE_512K:
-				tseg_size = KB(512);
-				break;
-			case I845_TSEG_SIZE_1M:
-				tseg_size = MB(1);
-				break;
-			}
-		}
-
-		pci_bus_read_config_word(pdev->bus, PCI_DEVFN(0, 0),
-					 I865_TOUD, &toud);
-
-		base = (toud << 16) + tseg_size;
-	} else if (IS_I85X(dev_priv)) {
-		u32 tseg_size = 0;
-		u32 tom;
-		u8 tmp;
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I85X_ESMRAMC, &tmp);
-
-		if (tmp & TSEG_ENABLE)
-			tseg_size = MB(1);
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 1),
-					 I85X_DRB3, &tmp);
-		tom = tmp * MB(32);
-
-		base = tom - tseg_size - ggtt->stolen_size;
-	} else if (IS_I845G(dev_priv)) {
-		u32 tseg_size = 0;
-		u32 tom;
-		u8 tmp;
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I845_ESMRAMC, &tmp);
-
-		if (tmp & TSEG_ENABLE) {
-			switch (tmp & I845_TSEG_SIZE_MASK) {
-			case I845_TSEG_SIZE_512K:
-				tseg_size = KB(512);
-				break;
-			case I845_TSEG_SIZE_1M:
-				tseg_size = MB(1);
-				break;
-			}
-		}
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I830_DRB3, &tmp);
-		tom = tmp * MB(32);
-
-		base = tom - tseg_size - ggtt->stolen_size;
-	} else if (IS_I830(dev_priv)) {
-		u32 tseg_size = 0;
-		u32 tom;
-		u8 tmp;
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I830_ESMRAMC, &tmp);
 
-		if (tmp & TSEG_ENABLE) {
-			if (tmp & I830_TSEG_SIZE_1M)
-				tseg_size = MB(1);
-			else
-				tseg_size = KB(512);
-		}
-
-		pci_bus_read_config_byte(pdev->bus, PCI_DEVFN(0, 0),
-					 I830_DRB3, &tmp);
-		tom = tmp * MB(32);
-
-		base = tom - tseg_size - ggtt->stolen_size;
-	}
+	if (dsm->start == 0 || dsm->end <= dsm->start)
+		return -EINVAL;
 
-	if (base == 0 || add_overflows(base, ggtt->stolen_size))
-		return 0;
+	/*
+	 * TODO: We have yet too encounter the case where the GTT wasn't at the
+	 * end of stolen. With that assumption we could simplify this.
+	 */
 
-	/* make sure we don't clobber the GTT if it's within stolen memory */
+	/* Make sure we don't clobber the GTT if it's within stolen memory */
 	if (INTEL_GEN(dev_priv) <= 4 &&
 	    !IS_G33(dev_priv) && !IS_PINEVIEW(dev_priv) && !IS_G4X(dev_priv)) {
-		struct {
-			dma_addr_t start, end;
-		} stolen[2] = {
-			{ .start = base, .end = base + ggtt->stolen_size, },
-			{ .start = base, .end = base + ggtt->stolen_size, },
-		};
-		u64 ggtt_start, ggtt_end;
+		struct resource stolen[2] = {*dsm, *dsm};
+		struct resource ggtt_res;
+		resource_size_t ggtt_start;
 
 		ggtt_start = I915_READ(PGTBL_CTL);
 		if (IS_GEN4(dev_priv))
@@ -209,70 +103,64 @@ static dma_addr_t i915_stolen_to_dma(struct drm_i915_private *dev_priv)
 				     (ggtt_start & PGTBL_ADDRESS_HI_MASK) << 28;
 		else
 			ggtt_start &= PGTBL_ADDRESS_LO_MASK;
-		ggtt_end = ggtt_start + ggtt_total_entries(ggtt) * 4;
-
-		if (ggtt_start >= stolen[0].start && ggtt_start < stolen[0].end)
-			stolen[0].end = ggtt_start;
-		if (ggtt_end > stolen[1].start && ggtt_end <= stolen[1].end)
-			stolen[1].start = ggtt_end;
-
-		/* pick the larger of the two chunks */
-		if (stolen[0].end - stolen[0].start >
-		    stolen[1].end - stolen[1].start) {
-			base = stolen[0].start;
-			ggtt->stolen_size = stolen[0].end - stolen[0].start;
-		} else {
-			base = stolen[1].start;
-			ggtt->stolen_size = stolen[1].end - stolen[1].start;
-		}
+
+		ggtt_res =
+			(struct resource) DEFINE_RES_MEM(ggtt_start,
+							 ggtt_total_entries(ggtt) * 4);
+
+		if (ggtt_res.start >= stolen[0].start && ggtt_res.start < stolen[0].end)
+			stolen[0].end = ggtt_res.start;
+		if (ggtt_res.end > stolen[1].start && ggtt_res.end <= stolen[1].end)
+			stolen[1].start = ggtt_res.end;
+
+		/* Pick the larger of the two chunks */
+		if (resource_size(&stolen[0]) > resource_size(&stolen[1]))
+			*dsm = stolen[0];
+		else
+			*dsm = stolen[1];
 
 		if (stolen[0].start != stolen[1].start ||
 		    stolen[0].end != stolen[1].end) {
-			dma_addr_t end = base + ggtt->stolen_size - 1;
-
-			DRM_DEBUG_KMS("GTT within stolen memory at 0x%llx-0x%llx\n",
-				      (unsigned long long)ggtt_start,
-				      (unsigned long long)ggtt_end - 1);
-			DRM_DEBUG_KMS("Stolen memory adjusted to %pad - %pad\n",
-				      &base, &end);
+			DRM_DEBUG_KMS("GTT within stolen memory at %pR\n", &ggtt_res);
+			DRM_DEBUG_KMS("Stolen memory adjusted to %pR\n", dsm);
 		}
 	}
 
-
-	/* Verify that nothing else uses this physical address. Stolen
+	/*
+	 * Verify that nothing else uses this physical address. Stolen
 	 * memory should be reserved by the BIOS and hidden from the
 	 * kernel. So if the region is already marked as busy, something
 	 * is seriously wrong.
 	 */
-	r = devm_request_mem_region(dev_priv->drm.dev, base, ggtt->stolen_size,
+	r = devm_request_mem_region(dev_priv->drm.dev, dsm->start,
+				    resource_size(dsm),
 				    "Graphics Stolen Memory");
 	if (r == NULL) {
 		/*
 		 * One more attempt but this time requesting region from
-		 * base + 1, as we have seen that this resolves the region
+		 * start + 1, as we have seen that this resolves the region
 		 * conflict with the PCI Bus.
 		 * This is a BIOS w/a: Some BIOS wrap stolen in the root
 		 * PCI bus, but have an off-by-one error. Hence retry the
 		 * reservation starting from 1 instead of 0.
 		 * There's also BIOS with off-by-one on the other end.
 		 */
-		r = devm_request_mem_region(dev_priv->drm.dev, base + 1,
-					    ggtt->stolen_size - 2,
+		r = devm_request_mem_region(dev_priv->drm.dev, dsm->start + 1,
+					    resource_size(dsm) - 2,
 					    "Graphics Stolen Memory");
 		/*
 		 * GEN3 firmware likes to smash pci bridges into the stolen
 		 * range. Apparently this works.
 		 */
 		if (r == NULL && !IS_GEN3(dev_priv)) {
-			dma_addr_t end = base + ggtt->stolen_size;
+			DRM_ERROR("conflict detected with stolen region: %pR\n",
+				  dsm);
 
-			DRM_ERROR("conflict detected with stolen region: [%pad - %pad]\n",
-				  &base, &end);
-			base = 0;
+			return -EBUSY;
 		}
 	}
 
-	return base;
+	return 0;
 }
 
 void i915_gem_cleanup_stolen(struct drm_device *dev)
@@ -286,13 +174,12 @@ void i915_gem_cleanup_stolen(struct drm_device *dev)
 }
 
 static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    dma_addr_t *base, u32 *size)
+				    resource_size_t *base, resource_size_t *size)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	uint32_t reg_val = I915_READ(IS_GM45(dev_priv) ?
 				     CTG_STOLEN_RESERVED :
 				     ELK_STOLEN_RESERVED);
-	dma_addr_t stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
+	resource_size_t stolen_top = dev_priv->dsm.end + 1;
 
 	if ((reg_val & G4X_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
@@ -321,7 +208,7 @@ static void g4x_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     dma_addr_t *base, u32 *size)
+				     resource_size_t *base, resource_size_t *size)
 {
 	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 
@@ -353,7 +240,7 @@ static void gen6_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				     dma_addr_t *base, u32 *size)
+				     resource_size_t *base, resource_size_t *size)
 {
 	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 
@@ -379,7 +266,7 @@ static void gen7_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    dma_addr_t *base, u32 *size)
+				    resource_size_t *base, resource_size_t *size)
 {
 	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
 
@@ -411,11 +298,10 @@ static void chv_get_stolen_reserved(struct drm_i915_private *dev_priv,
 }
 
 static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
-				    dma_addr_t *base, u32 *size)
+				    resource_size_t *base, resource_size_t *size)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	uint32_t reg_val = I915_READ(GEN6_STOLEN_RESERVED);
-	dma_addr_t stolen_top;
+	resource_size_t stolen_top;
 
 	if ((reg_val & GEN6_STOLEN_RESERVED_ENABLE) == 0) {
 		*base = 0;
@@ -423,7 +309,7 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
 		return;
 	}
 
-	stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
+	stolen_top = dev_priv->dsm.end + 1;
 
 	*base = reg_val & GEN6_STOLEN_RESERVED_ADDR_MASK;
 
@@ -439,10 +325,9 @@ static void bdw_get_stolen_reserved(struct drm_i915_private *dev_priv,
 
 int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	dma_addr_t reserved_base, stolen_top;
-	u32 reserved_total, reserved_size;
-	u32 stolen_usable_start;
+	resource_size_t reserved_base, stolen_top;
+	resource_size_t reserved_total, reserved_size;
+	resource_size_t stolen_usable_start;
 
 	mutex_init(&dev_priv->mm.stolen_lock);
 
@@ -456,14 +341,18 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		return 0;
 	}
 
-	if (ggtt->stolen_size == 0)
+	if (resource_size(&intel_graphics_stolen_res) == 0)
 		return 0;
 
-	dev_priv->mm.stolen_base = i915_stolen_to_dma(dev_priv);
-	if (dev_priv->mm.stolen_base == 0)
+	dev_priv->dsm = intel_graphics_stolen_res;
+
+	if (i915_adjust_stolen(dev_priv, &dev_priv->dsm))
 		return 0;
 
-	stolen_top = dev_priv->mm.stolen_base + ggtt->stolen_size;
+	GEM_BUG_ON(dev_priv->dsm.start == 0);
+	GEM_BUG_ON(dev_priv->dsm.end <= dev_priv->dsm.start);
+
+	stolen_top = dev_priv->dsm.end + 1;
 	reserved_base = 0;
 	reserved_size = 0;
 
@@ -504,50 +393,47 @@ int i915_gem_init_stolen(struct drm_i915_private *dev_priv)
 		reserved_base = stolen_top;
 	}
 
-	if (reserved_base < dev_priv->mm.stolen_base ||
-	    reserved_base + reserved_size > stolen_top) {
-		dma_addr_t reserved_top = reserved_base + reserved_size;
-		DRM_ERROR("Stolen reserved area [%pad - %pad] outside stolen memory [%pad - %pad]\n",
-			  &reserved_base, &reserved_top,
-			  &dev_priv->mm.stolen_base, &stolen_top);
+	dev_priv->dsm_reserved =
+		(struct resource) DEFINE_RES_MEM(reserved_base, reserved_size);
+
+	if (!resource_contains(&dev_priv->dsm, &dev_priv->dsm_reserved)) {
+		DRM_ERROR("Stolen reserved area %pR outside stolen memory %pR\n",
+			  &dev_priv->dsm_reserved, &dev_priv->dsm);
 		return 0;
 	}
 
-	ggtt->stolen_reserved_base = reserved_base;
-	ggtt->stolen_reserved_size = reserved_size;
-
 	/* It is possible for the reserved area to end before the end of stolen
 	 * memory, so just consider the start. */
 	reserved_total = stolen_top - reserved_base;
 
-	DRM_DEBUG_KMS("Memory reserved for graphics device: %uK, usable: %uK\n",
-		      ggtt->stolen_size >> 10,
-		      (ggtt->stolen_size - reserved_total) >> 10);
+	DRM_DEBUG_KMS("Memory reserved for graphics device: %lluK, usable: %lluK\n",
+		      (u64)resource_size(&dev_priv->dsm) >> 10,
+		      ((u64)resource_size(&dev_priv->dsm) - reserved_total) >> 10);
 
 	stolen_usable_start = 0;
 	/* WaSkipStolenMemoryFirstPage:bdw+ */
 	if (INTEL_GEN(dev_priv) >= 8)
 		stolen_usable_start = 4096;
 
-	ggtt->stolen_usable_size =
-		ggtt->stolen_size - reserved_total - stolen_usable_start;
+	dev_priv->stolen_usable_size =
+		resource_size(&dev_priv->dsm) - reserved_total - stolen_usable_start;
 
 	/* Basic memrange allocator for stolen space. */
 	drm_mm_init(&dev_priv->mm.stolen, stolen_usable_start,
-		    ggtt->stolen_usable_size);
+		    dev_priv->stolen_usable_size);
 
 	return 0;
 }
 
 static struct sg_table *
 i915_pages_create_for_stolen(struct drm_device *dev,
-			     u32 offset, u32 size)
+			     resource_size_t offset, resource_size_t size)
 {
 	struct drm_i915_private *dev_priv = to_i915(dev);
 	struct sg_table *st;
 	struct scatterlist *sg;
 
-	GEM_BUG_ON(range_overflows(offset, size, dev_priv->ggtt.stolen_size));
+	GEM_BUG_ON(range_overflows(offset, size, resource_size(&dev_priv->dsm)));
 
 	/* We hide that we have no struct page backing our stolen object
 	 * by wrapping the contiguous physical allocation with a fake
@@ -567,7 +453,7 @@ i915_pages_create_for_stolen(struct drm_device *dev,
 	sg->offset = 0;
 	sg->length = size;
 
-	sg_dma_address(sg) = (dma_addr_t)dev_priv->mm.stolen_base + offset;
+	sg_dma_address(sg) = (dma_addr_t)dev_priv->dsm.start + offset;
 	sg_dma_len(sg) = size;
 
 	return st;
@@ -645,7 +531,8 @@ cleanup:
 }
 
 struct drm_i915_gem_object *
-i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size)
+i915_gem_object_create_stolen(struct drm_i915_private *dev_priv,
+			      resource_size_t size)
 {
 	struct drm_i915_gem_object *obj;
 	struct drm_mm_node *stolen;
@@ -678,9 +565,9 @@ i915_gem_object_create_stolen(struct drm_i915_private *dev_priv, u32 size)
 
 struct drm_i915_gem_object *
 i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv,
-					       u32 stolen_offset,
-					       u32 gtt_offset,
-					       u32 size)
+					       resource_size_t stolen_offset,
+					       resource_size_t gtt_offset,
+					       resource_size_t size)
 {
 	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct drm_i915_gem_object *obj;
@@ -693,8 +580,8 @@ i915_gem_object_create_stolen_for_preallocated(struct drm_i915_private *dev_priv
 
 	lockdep_assert_held(&dev_priv->drm.struct_mutex);
 
-	DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%x, gtt_offset=%x, size=%x\n",
-			stolen_offset, gtt_offset, size);
+	DRM_DEBUG_KMS("creating preallocated stolen object: stolen_offset=%pa, gtt_offset=%pa, size=%pa\n",
+			&stolen_offset, &gtt_offset, &size);
 
 	/* KISS and expect everything to be page-aligned */
 	if (WARN_ON(size == 0) ||
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index b85d7ebd9bee..d9dc9df523b5 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -205,10 +205,7 @@ i915_gem_object_fence_prepare(struct drm_i915_gem_object *obj,
 	if (tiling_mode == I915_TILING_NONE)
 		return 0;
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (!i915_vma_is_ggtt(vma))
-			break;
-
+	for_each_ggtt_vma(vma, obj) {
 		if (i915_vma_fence_prepare(vma, tiling_mode, stride))
 			continue;
 
@@ -285,10 +282,7 @@ i915_gem_object_set_tiling(struct drm_i915_gem_object *obj,
 	}
 	mutex_unlock(&obj->mm.lock);
 
-	list_for_each_entry(vma, &obj->vma_list, obj_link) {
-		if (!i915_vma_is_ggtt(vma))
-			break;
-
+	for_each_ggtt_vma(vma, obj) {
 		vma->fence_size =
 			i915_gem_fence_size(i915, vma->size, tiling, stride);
 		vma->fence_alignment =
diff --git a/drivers/gpu/drm/i915/i915_gem_timeline.c b/drivers/gpu/drm/i915/i915_gem_timeline.c
index c01905d6450c..e9fd87604067 100644
--- a/drivers/gpu/drm/i915/i915_gem_timeline.c
+++ b/drivers/gpu/drm/i915/i915_gem_timeline.c
@@ -33,11 +33,8 @@ static void __intel_timeline_init(struct intel_timeline *tl,
 {
 	tl->fence_context = context;
 	tl->common = parent;
-#ifdef CONFIG_DEBUG_SPINLOCK
-	__raw_spin_lock_init(&tl->lock.rlock, lockname, lockclass);
-#else
 	spin_lock_init(&tl->lock);
-#endif
+	lockdep_set_class_and_name(&tl->lock, lockclass, lockname);
 	init_request_active(&tl->last_request, NULL);
 	INIT_LIST_HEAD(&tl->requests);
 	i915_syncmap_init(&tl->sync);
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index 48418fb81066..944059322daa 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -416,6 +416,7 @@ static void error_print_engine(struct drm_i915_error_state_buf *m,
 	int n;
 
 	err_printf(m, "%s command stream:\n", engine_str(ee->engine_id));
+	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
 	err_printf(m, "  START: 0x%08x\n", ee->start);
 	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
 	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
@@ -564,34 +565,17 @@ static void print_error_obj(struct drm_i915_error_state_buf *m,
 static void err_print_capabilities(struct drm_i915_error_state_buf *m,
 				   const struct intel_device_info *info)
 {
-#define PRINT_FLAG(x)  err_printf(m, #x ": %s\n", yesno(info->x))
-	DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
-#undef PRINT_FLAG
-}
+	struct drm_printer p = i915_error_printer(m);
 
-static __always_inline void err_print_param(struct drm_i915_error_state_buf *m,
-					    const char *name,
-					    const char *type,
-					    const void *x)
-{
-	if (!__builtin_strcmp(type, "bool"))
-		err_printf(m, "i915.%s=%s\n", name, yesno(*(const bool *)x));
-	else if (!__builtin_strcmp(type, "int"))
-		err_printf(m, "i915.%s=%d\n", name, *(const int *)x);
-	else if (!__builtin_strcmp(type, "unsigned int"))
-		err_printf(m, "i915.%s=%u\n", name, *(const unsigned int *)x);
-	else if (!__builtin_strcmp(type, "char *"))
-		err_printf(m, "i915.%s=%s\n", name, *(const char **)x);
-	else
-		BUILD_BUG();
+	intel_device_info_dump_flags(info, &p);
 }
 
 static void err_print_params(struct drm_i915_error_state_buf *m,
-			     const struct i915_params *p)
+			     const struct i915_params *params)
 {
-#define PRINT(T, x, ...) err_print_param(m, #x, #T, &p->x);
-	I915_PARAMS_FOR_EACH(PRINT);
-#undef PRINT
+	struct drm_printer p = i915_error_printer(m);
+
+	i915_params_dump(params, &p);
 }
 
 static void err_print_pciid(struct drm_i915_error_state_buf *m,
@@ -956,7 +940,7 @@ i915_error_object_create(struct drm_i915_private *i915,
 		ggtt->base.insert_page(&ggtt->base, dma, slot,
 				       I915_CACHE_NONE, 0);
 
-		s = io_mapping_map_atomic_wc(&ggtt->mappable, slot);
+		s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
 		ret = compress_page(&compress, (void  __force *)s, dst);
 		io_mapping_unmap_atomic(s);
 
@@ -1256,6 +1240,7 @@ static void error_record_engine_registers(struct i915_gpu_state *error,
 		ee->hws = I915_READ(mmio);
 	}
 
+	ee->idle = intel_engine_is_idle(engine);
 	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
 	ee->hangcheck_action = engine->hangcheck.action;
 	ee->hangcheck_stalled = engine->hangcheck.stalled;
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 7cac07db89b9..3517c6548e2c 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -1400,7 +1400,7 @@ gen8_cs_irq_handler(struct intel_engine_cs *engine, u32 iir, int test_shift)
 
 	if (iir & (GT_RENDER_USER_INTERRUPT << test_shift)) {
 		notify_ring(engine);
-		tasklet |= i915_modparams.enable_guc_submission;
+		tasklet |= USES_GUC_SUBMISSION(engine->i915);
 	}
 
 	if (tasklet)
diff --git a/drivers/gpu/drm/i915/i915_memcpy.c b/drivers/gpu/drm/i915/i915_memcpy.c
index 49a079494b68..79f8ec756362 100644
--- a/drivers/gpu/drm/i915/i915_memcpy.c
+++ b/drivers/gpu/drm/i915/i915_memcpy.c
@@ -96,6 +96,11 @@ bool i915_memcpy_from_wc(void *dst, const void *src, unsigned long len)
 
 void i915_memcpy_init_early(struct drm_i915_private *dev_priv)
 {
-	if (static_cpu_has(X86_FEATURE_XMM4_1))
+	/*
+	 * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions
+	 * emulation. So don't enable movntdqa in hypervisor guest.
+	 */
+	if (static_cpu_has(X86_FEATURE_XMM4_1) &&
+	    !boot_cpu_has(X86_FEATURE_HYPERVISOR))
 		static_branch_enable(&has_movntdqa);
 }
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 7bc538687871..b5f3eb4fa8a3 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -22,6 +22,8 @@
  * IN THE SOFTWARE.
  */
 
+#include <drm/drm_print.h>
+
 #include "i915_params.h"
 #include "i915_drv.h"
 
@@ -147,13 +149,10 @@ i915_param_named_unsafe(edp_vswing, int, 0400,
 	"(0=use value from vbt [default], 1=low power swing(200mV),"
 	"2=default swing(400mV))");
 
-i915_param_named_unsafe(enable_guc_loading, int, 0400,
-	"Enable GuC firmware loading "
-	"(-1=auto, 0=never [default], 1=if available, 2=required)");
-
-i915_param_named_unsafe(enable_guc_submission, int, 0400,
-	"Enable GuC submission "
-	"(-1=auto, 0=never [default], 1=if available, 2=required)");
+i915_param_named_unsafe(enable_guc, int, 0400,
+	"Enable GuC load for GuC submission and/or HuC load. "
+	"Required functionality can be selected using bitmask values. "
+	"(-1=auto, 0=disable [default], 1=GuC submission, 2=HuC load)");
 
 i915_param_named(guc_log_level, int, 0400,
 	"GuC firmware logging level (-1:disabled (default), 0-3:enabled)");
@@ -175,3 +174,34 @@ i915_param_named(enable_dpcd_backlight, bool, 0600,
 
 i915_param_named(enable_gvt, bool, 0400,
 	"Enable support for Intel GVT-g graphics virtualization host support(default:false)");
+
+static __always_inline void _print_param(struct drm_printer *p,
+					 const char *name,
+					 const char *type,
+					 const void *x)
+{
+	if (!__builtin_strcmp(type, "bool"))
+		drm_printf(p, "i915.%s=%s\n", name, yesno(*(const bool *)x));
+	else if (!__builtin_strcmp(type, "int"))
+		drm_printf(p, "i915.%s=%d\n", name, *(const int *)x);
+	else if (!__builtin_strcmp(type, "unsigned int"))
+		drm_printf(p, "i915.%s=%u\n", name, *(const unsigned int *)x);
+	else if (!__builtin_strcmp(type, "char *"))
+		drm_printf(p, "i915.%s=%s\n", name, *(const char **)x);
+	else
+		BUILD_BUG();
+}
+
+/**
+ * i915_params_dump - dump i915 modparams
+ * @params: i915 modparams
+ * @p: the &drm_printer
+ *
+ * Pretty printer for i915 modparams.
+ */
+void i915_params_dump(const struct i915_params *params, struct drm_printer *p)
+{
+#define PRINT(T, x, ...) _print_param(p, #x, #T, &params->x);
+	I915_PARAMS_FOR_EACH(PRINT);
+#undef PRINT
+}
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index c48c88bb95e8..c96360398072 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -25,8 +25,14 @@
 #ifndef _I915_PARAMS_H_
 #define _I915_PARAMS_H_
 
+#include <linux/bitops.h>
 #include <linux/cache.h> /* for __read_mostly */
 
+struct drm_printer;
+
+#define ENABLE_GUC_SUBMISSION		BIT(0)
+#define ENABLE_GUC_LOAD_HUC		BIT(1)
+
 #define I915_PARAMS_FOR_EACH(param) \
 	param(char *, vbt_firmware, NULL) \
 	param(int, modeset, -1) \
@@ -41,8 +47,7 @@
 	param(int, disable_power_well, -1) \
 	param(int, enable_ips, 1) \
 	param(int, invert_brightness, 0) \
-	param(int, enable_guc_loading, 0) \
-	param(int, enable_guc_submission, 0) \
+	param(int, enable_guc, 0) \
 	param(int, guc_log_level, -1) \
 	param(char *, guc_firmware_path, NULL) \
 	param(char *, huc_firmware_path, NULL) \
@@ -74,5 +79,7 @@ struct i915_params {
 
 extern struct i915_params i915_modparams __read_mostly;
 
+void i915_params_dump(const struct i915_params *params, struct drm_printer *p);
+
 #endif
 
diff --git a/drivers/gpu/drm/i915/i915_pci.c b/drivers/gpu/drm/i915/i915_pci.c
index fa67d3dde20e..36d48422b475 100644
--- a/drivers/gpu/drm/i915/i915_pci.c
+++ b/drivers/gpu/drm/i915/i915_pci.c
@@ -633,6 +633,8 @@ static const struct pci_device_id pciidlist[] = {
 	INTEL_CFL_S_GT1_IDS(&intel_coffeelake_gt1_info),
 	INTEL_CFL_S_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_H_GT2_IDS(&intel_coffeelake_gt2_info),
+	INTEL_CFL_U_GT1_IDS(&intel_coffeelake_gt1_info),
+	INTEL_CFL_U_GT2_IDS(&intel_coffeelake_gt2_info),
 	INTEL_CFL_U_GT3_IDS(&intel_coffeelake_gt3_info),
 	INTEL_CNL_U_GT2_IDS(&intel_cannonlake_gt2_info),
 	INTEL_CNL_Y_GT2_IDS(&intel_cannonlake_gt2_info),
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index 09bf043c1c2e..41285bec8fc0 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -3278,6 +3278,7 @@ enum i915_power_well_id {
 # define AUDUNIT_CLOCK_GATE_DISABLE		(1 << 26) /* 965 */
 # define DPUNIT_A_CLOCK_GATE_DISABLE		(1 << 25) /* 965 */
 # define DPCUNIT_CLOCK_GATE_DISABLE		(1 << 24) /* 965 */
+# define PNV_GMBUSUNIT_CLOCK_GATE_DISABLE	(1 << 24) /* pnv */
 # define TVRUNIT_CLOCK_GATE_DISABLE		(1 << 23) /* 915-945 */
 # define TVCUNIT_CLOCK_GATE_DISABLE		(1 << 22) /* 915-945 */
 # define TVFUNIT_CLOCK_GATE_DISABLE		(1 << 21) /* 915-945 */
@@ -3858,6 +3859,9 @@ enum {
 #define   PWM2_GATING_DIS		(1 << 14)
 #define   PWM1_GATING_DIS		(1 << 13)
 
+#define GEN9_CLKGATE_DIS_4		_MMIO(0x4653C)
+#define   BXT_GMBUS_GATING_DIS		(1 << 14)
+
 #define _CLKGATE_DIS_PSL_A		0x46520
 #define _CLKGATE_DIS_PSL_B		0x46524
 #define _CLKGATE_DIS_PSL_C		0x46528
@@ -3875,6 +3879,9 @@ enum {
 #define  SARBUNIT_CLKGATE_DIS		(1 << 5)
 #define  RCCUNIT_CLKGATE_DIS		(1 << 7)
 
+#define UNSLICE_UNIT_LEVEL_CLKGATE	_MMIO(0x9434)
+#define  VFUNIT_CLKGATE_DIS		(1 << 20)
+
 /*
  * Display engine regs
  */
@@ -6329,6 +6336,7 @@ enum {
 #define   PLANE_CTL_TILED_X			(  1 << 10)
 #define   PLANE_CTL_TILED_Y			(  4 << 10)
 #define   PLANE_CTL_TILED_YF			(  5 << 10)
+#define   PLANE_CTL_FLIP_HORIZONTAL		(  1 << 8)
 #define   PLANE_CTL_ALPHA_MASK			(0x3 << 4) /* Pre-GLK */
 #define   PLANE_CTL_ALPHA_DISABLE		(  0 << 4)
 #define   PLANE_CTL_ALPHA_SW_PREMULTIPLY	(  2 << 4)
@@ -7552,6 +7560,7 @@ enum {
 #define FDI_RX_CHICKEN(pipe)	_MMIO_PIPE(pipe, _FDI_RXA_CHICKEN, _FDI_RXB_CHICKEN)
 
 #define SOUTH_DSPCLK_GATE_D	_MMIO(0xc2020)
+#define  PCH_GMBUSUNIT_CLOCK_GATE_DISABLE (1<<31)
 #define  PCH_DPLUNIT_CLOCK_GATE_DISABLE (1<<30)
 #define  PCH_DPLSUNIT_CLOCK_GATE_DISABLE (1<<29)
 #define  PCH_CPUNIT_CLOCK_GATE_DISABLE (1<<14)
@@ -8142,6 +8151,7 @@ enum {
 #define   PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE	(1<<8)
 #define   STALL_DOP_GATING_DISABLE		(1<<5)
 #define   THROTTLE_12_5				(7<<2)
+#define   DISABLE_EARLY_EOT			(1<<1)
 
 #define GEN7_ROW_CHICKEN2		_MMIO(0xe4f4)
 #define GEN7_ROW_CHICKEN2_GT2		_MMIO(0xf4f4)
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index e8ca67a129d2..3669f5eeb91e 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -303,6 +303,7 @@ static int __i915_sw_fence_await_sw_fence(struct i915_sw_fence *fence,
 	int pending;
 
 	debug_fence_assert(fence);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
 	if (i915_sw_fence_done(signaler))
 		return 0;
@@ -367,6 +368,7 @@ struct i915_sw_dma_fence_cb {
 	struct dma_fence *dma;
 	struct timer_list timer;
 	struct irq_work work;
+	struct rcu_head rcu;
 };
 
 static void timer_i915_sw_fence_wake(struct timer_list *t)
@@ -406,7 +408,7 @@ static void irq_i915_sw_fence_work(struct irq_work *wrk)
 	del_timer_sync(&cb->timer);
 	dma_fence_put(cb->dma);
 
-	kfree(cb);
+	kfree_rcu(cb, rcu);
 }
 
 int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
@@ -418,6 +420,7 @@ int i915_sw_fence_await_dma_fence(struct i915_sw_fence *fence,
 	int ret;
 
 	debug_fence_assert(fence);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
 	if (dma_fence_is_signaled(dma))
 		return 0;
@@ -464,6 +467,7 @@ int i915_sw_fence_await_reservation(struct i915_sw_fence *fence,
 	int ret = 0, pending;
 
 	debug_fence_assert(fence);
+	might_sleep_if(gfpflags_allow_blocking(gfp));
 
 	if (write) {
 		struct dma_fence **shared;
diff --git a/drivers/gpu/drm/i915/i915_trace.h b/drivers/gpu/drm/i915/i915_trace.h
index 4e76768ffa95..e1169c02eb2b 100644
--- a/drivers/gpu/drm/i915/i915_trace.h
+++ b/drivers/gpu/drm/i915/i915_trace.h
@@ -616,6 +616,7 @@ TRACE_EVENT(i915_gem_request_queue,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
+			     __field(u32, hw_id)
 			     __field(u32, ring)
 			     __field(u32, ctx)
 			     __field(u32, seqno)
@@ -624,15 +625,16 @@ TRACE_EVENT(i915_gem_request_queue,
 
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
+			   __entry->hw_id = req->ctx->hw_id;
 			   __entry->ring = req->engine->id;
 			   __entry->ctx = req->fence.context;
 			   __entry->seqno = req->fence.seqno;
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x",
-		      __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
-		      __entry->flags)
+	    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, flags=0x%x",
+		      __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+		      __entry->seqno, __entry->flags)
 );
 
 DECLARE_EVENT_CLASS(i915_gem_request,
@@ -641,23 +643,25 @@ DECLARE_EVENT_CLASS(i915_gem_request,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
-			     __field(u32, ctx)
+			     __field(u32, hw_id)
 			     __field(u32, ring)
+			     __field(u32, ctx)
 			     __field(u32, seqno)
 			     __field(u32, global)
 			     ),
 
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
+			   __entry->hw_id = req->ctx->hw_id;
 			   __entry->ring = req->engine->id;
 			   __entry->ctx = req->fence.context;
 			   __entry->seqno = req->fence.seqno;
 			   __entry->global = req->global_seqno;
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u",
-		      __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
-		      __entry->global)
+	    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u",
+		      __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+		      __entry->seqno, __entry->global)
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_add,
@@ -683,15 +687,17 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
 
 		    TP_STRUCT__entry(
 				     __field(u32, dev)
+				     __field(u32, hw_id)
 				     __field(u32, ring)
+				     __field(u32, ctx)
 				     __field(u32, seqno)
 				     __field(u32, global_seqno)
-				     __field(u32, ctx)
 				     __field(u32, port)
 				    ),
 
 		    TP_fast_assign(
 			           __entry->dev = req->i915->drm.primary->index;
+			           __entry->hw_id = req->ctx->hw_id;
 			           __entry->ring = req->engine->id;
 			           __entry->ctx = req->fence.context;
 			           __entry->seqno = req->fence.seqno;
@@ -699,10 +705,10 @@ DECLARE_EVENT_CLASS(i915_gem_request_hw,
 			           __entry->port = port;
 			          ),
 
-		    TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
-			      __entry->dev, __entry->ring, __entry->ctx,
-			      __entry->seqno, __entry->global_seqno,
-			      __entry->port)
+		    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, port=%u",
+			      __entry->dev, __entry->hw_id, __entry->ring,
+			      __entry->ctx, __entry->seqno,
+			      __entry->global_seqno, __entry->port)
 );
 
 DEFINE_EVENT(i915_gem_request_hw, i915_gem_request_in,
@@ -772,6 +778,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 
 	    TP_STRUCT__entry(
 			     __field(u32, dev)
+			     __field(u32, hw_id)
 			     __field(u32, ring)
 			     __field(u32, ctx)
 			     __field(u32, seqno)
@@ -787,6 +794,7 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 	     */
 	    TP_fast_assign(
 			   __entry->dev = req->i915->drm.primary->index;
+			   __entry->hw_id = req->ctx->hw_id;
 			   __entry->ring = req->engine->id;
 			   __entry->ctx = req->fence.context;
 			   __entry->seqno = req->fence.seqno;
@@ -794,10 +802,10 @@ TRACE_EVENT(i915_gem_request_wait_begin,
 			   __entry->flags = flags;
 			   ),
 
-	    TP_printk("dev=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x",
-		      __entry->dev, __entry->ring, __entry->ctx, __entry->seqno,
-		      __entry->global, !!(__entry->flags & I915_WAIT_LOCKED),
-		      __entry->flags)
+	    TP_printk("dev=%u, hw_id=%u, ring=%u, ctx=%u, seqno=%u, global=%u, blocking=%u, flags=0x%x",
+		      __entry->dev, __entry->hw_id, __entry->ring, __entry->ctx,
+		      __entry->seqno, __entry->global,
+		      !!(__entry->flags & I915_WAIT_LOCKED), __entry->flags)
 );
 
 DEFINE_EVENT(i915_gem_request, i915_gem_request_wait_end,
diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h
index 8d07764887ec..51dbfe5bb418 100644
--- a/drivers/gpu/drm/i915/i915_utils.h
+++ b/drivers/gpu/drm/i915/i915_utils.h
@@ -140,4 +140,19 @@ static inline void drain_delayed_work(struct delayed_work *dw)
 	} while (delayed_work_pending(dw));
 }
 
+static inline const char *yesno(bool v)
+{
+	return v ? "yes" : "no";
+}
+
+static inline const char *onoff(bool v)
+{
+	return v ? "on" : "off";
+}
+
+static inline const char *enableddisabled(bool v)
+{
+	return v ? "enabled" : "disabled";
+}
+
 #endif /* !__I915_UTILS_H */
diff --git a/drivers/gpu/drm/i915/i915_vma.c b/drivers/gpu/drm/i915/i915_vma.c
index bf6d8d1eaabe..e0e7c48f45dc 100644
--- a/drivers/gpu/drm/i915/i915_vma.c
+++ b/drivers/gpu/drm/i915/i915_vma.c
@@ -142,6 +142,12 @@ vma_create(struct drm_i915_gem_object *obj,
 								i915_gem_object_get_stride(obj));
 		GEM_BUG_ON(!is_power_of_2(vma->fence_alignment));
 
+		/*
+		 * We put the GGTT vma at the start of the vma-list, followed
+		 * by the ppGGTT vma. This allows us to break early when
+		 * iterating over only the GGTT vma for an object, see
+		 * for_each_ggtt_vma()
+		 */
 		vma->flags |= I915_VMA_GGTT;
 		list_add(&vma->obj_link, &obj->vma_list);
 	} else {
@@ -305,7 +311,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 
 	ptr = vma->iomap;
 	if (ptr == NULL) {
-		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->mappable,
+		ptr = io_mapping_map_wc(&i915_vm_to_ggtt(vma->vm)->iomap,
 					vma->node.start,
 					vma->node.size);
 		if (ptr == NULL) {
@@ -322,6 +328,7 @@ void __iomem *i915_vma_pin_iomap(struct i915_vma *vma)
 	if (err)
 		goto err_unpin;
 
+	i915_vma_set_ggtt_write(vma);
 	return ptr;
 
 err_unpin:
@@ -330,12 +337,24 @@ err:
 	return IO_ERR_PTR(err);
 }
 
+void i915_vma_flush_writes(struct i915_vma *vma)
+{
+	if (!i915_vma_has_ggtt_write(vma))
+		return;
+
+	i915_gem_flush_ggtt_writes(vma->vm->i915);
+
+	i915_vma_unset_ggtt_write(vma);
+}
+
 void i915_vma_unpin_iomap(struct i915_vma *vma)
 {
 	lockdep_assert_held(&vma->obj->base.dev->struct_mutex);
 
 	GEM_BUG_ON(vma->iomap == NULL);
 
+	i915_vma_flush_writes(vma);
+
 	i915_vma_unpin_fence(vma);
 	i915_vma_unpin(vma);
 }
@@ -466,6 +485,7 @@ i915_vma_insert(struct i915_vma *vma, u64 size, u64 alignment, u64 flags)
 	u64 start, end;
 	int ret;
 
+	GEM_BUG_ON(i915_vma_is_closed(vma));
 	GEM_BUG_ON(vma->flags & (I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND));
 	GEM_BUG_ON(drm_mm_node_allocated(&vma->node));
 
@@ -678,7 +698,9 @@ static void i915_vma_destroy(struct i915_vma *vma)
 		GEM_BUG_ON(i915_gem_active_isset(&vma->last_read[i]));
 	GEM_BUG_ON(i915_gem_active_isset(&vma->last_fence));
 
+	list_del(&vma->obj_link);
 	list_del(&vma->vm_link);
+
 	if (!i915_vma_is_ggtt(vma))
 		i915_ppgtt_put(i915_vm_to_ppgtt(vma->vm));
 
@@ -690,7 +712,6 @@ void i915_vma_close(struct i915_vma *vma)
 	GEM_BUG_ON(i915_vma_is_closed(vma));
 	vma->flags |= I915_VMA_CLOSED;
 
-	list_del(&vma->obj_link);
 	rb_erase(&vma->obj_node, &vma->obj->vma_tree);
 
 	if (!i915_vma_is_active(vma) && !i915_vma_is_pinned(vma))
@@ -790,6 +811,15 @@ int i915_vma_unbind(struct i915_vma *vma)
 	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj));
 
 	if (i915_vma_is_map_and_fenceable(vma)) {
+		/*
+		 * Check that we have flushed all writes through the GGTT
+		 * before the unbind, other due to non-strict nature of those
+		 * indirect writes they may end up referencing the GGTT PTE
+		 * after the unbind.
+		 */
+		i915_vma_flush_writes(vma);
+		GEM_BUG_ON(i915_vma_has_ggtt_write(vma));
+
 		/* release the fence reg _after_ flushing */
 		ret = i915_vma_put_fence(vma);
 		if (ret)
diff --git a/drivers/gpu/drm/i915/i915_vma.h b/drivers/gpu/drm/i915/i915_vma.h
index 1e2bc9b3c3ac..fd5b84904f7c 100644
--- a/drivers/gpu/drm/i915/i915_vma.h
+++ b/drivers/gpu/drm/i915/i915_vma.h
@@ -90,6 +90,7 @@ struct i915_vma {
 #define I915_VMA_CLOSED		BIT(10)
 #define I915_VMA_USERFAULT_BIT	11
 #define I915_VMA_USERFAULT	BIT(I915_VMA_USERFAULT_BIT)
+#define I915_VMA_GGTT_WRITE	BIT(12)
 
 	unsigned int active;
 	struct i915_gem_active last_read[I915_NUM_ENGINES];
@@ -138,6 +139,24 @@ static inline bool i915_vma_is_ggtt(const struct i915_vma *vma)
 	return vma->flags & I915_VMA_GGTT;
 }
 
+static inline bool i915_vma_has_ggtt_write(const struct i915_vma *vma)
+{
+	return vma->flags & I915_VMA_GGTT_WRITE;
+}
+
+static inline void i915_vma_set_ggtt_write(struct i915_vma *vma)
+{
+	GEM_BUG_ON(!i915_vma_is_ggtt(vma));
+	vma->flags |= I915_VMA_GGTT_WRITE;
+}
+
+static inline void i915_vma_unset_ggtt_write(struct i915_vma *vma)
+{
+	vma->flags &= ~I915_VMA_GGTT_WRITE;
+}
+
+void i915_vma_flush_writes(struct i915_vma *vma);
+
 static inline bool i915_vma_is_map_and_fenceable(const struct i915_vma *vma)
 {
 	return vma->flags & I915_VMA_CAN_FENCE;
@@ -389,5 +408,19 @@ i915_vma_unpin_fence(struct i915_vma *vma)
 		__i915_vma_unpin_fence(vma);
 }
 
-#endif
+#define for_each_until(cond) if (cond) break; else
+
+/**
+ * for_each_ggtt_vma - Iterate over the GGTT VMA belonging to an object.
+ * @V: the #i915_vma iterator
+ * @OBJ: the #drm_i915_gem_object
+ *
+ * GGTT VMA are placed at the being of the object's vma_list, see
+ * vma_create(), so we can stop our walk as soon as we see a ppgtt VMA,
+ * or the list is empty ofc.
+ */
+#define for_each_ggtt_vma(V, OBJ) \
+	list_for_each_entry(V, &(OBJ)->vma_list, obj_link)		\
+		for_each_until(!i915_vma_is_ggtt(V))
 
+#endif
diff --git a/drivers/gpu/drm/i915/intel_breadcrumbs.c b/drivers/gpu/drm/i915/intel_breadcrumbs.c
index 5ae2d276f7f3..58c624f982d9 100644
--- a/drivers/gpu/drm/i915/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/intel_breadcrumbs.c
@@ -27,6 +27,12 @@
 
 #include "i915_drv.h"
 
+#ifdef CONFIG_SMP
+#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL && !(tsk)->on_cpu)
+#else
+#define task_asleep(tsk) ((tsk)->state & TASK_NORMAL)
+#endif
+
 static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
 {
 	struct intel_wait *wait;
@@ -36,8 +42,20 @@ static unsigned int __intel_breadcrumbs_wakeup(struct intel_breadcrumbs *b)
 
 	wait = b->irq_wait;
 	if (wait) {
+		/*
+		 * N.B. Since task_asleep() and ttwu are not atomic, the
+		 * waiter may actually go to sleep after the check, causing
+		 * us to suppress a valid wakeup. We prefer to reduce the
+		 * number of false positive missed_breadcrumb() warnings
+		 * at the expense of a few false negatives, as it it easy
+		 * to trigger a false positive under heavy load. Enough
+		 * signal should remain from genuine missed_breadcrumb()
+		 * for us to detect in CI.
+		 */
+		bool was_asleep = task_asleep(wait->tsk);
+
 		result = ENGINE_WAKEUP_WAITER;
-		if (wake_up_process(wait->tsk))
+		if (wake_up_process(wait->tsk) && was_asleep)
 			result |= ENGINE_WAKEUP_ASLEEP;
 	}
 
@@ -64,20 +82,21 @@ static unsigned long wait_timeout(void)
 
 static noinline void missed_breadcrumb(struct intel_engine_cs *engine)
 {
-	DRM_DEBUG_DRIVER("%s missed breadcrumb at %pS, irq posted? %s, current seqno=%x, last=%x\n",
-			 engine->name, __builtin_return_address(0),
-			 yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
-					&engine->irq_posted)),
-			 intel_engine_get_seqno(engine),
-			 intel_engine_last_submit(engine));
+	if (drm_debug & DRM_UT_DRIVER) {
+		struct drm_printer p = drm_debug_printer(__func__);
+
+		intel_engine_dump(engine, &p,
+				  "%s missed breadcrumb at %pS\n",
+				  engine->name, __builtin_return_address(0));
+	}
 
 	set_bit(engine->id, &engine->i915->gpu_error.missed_irq_rings);
 }
 
 static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 {
-	struct intel_engine_cs *engine = from_timer(engine, t,
-						    breadcrumbs.hangcheck);
+	struct intel_engine_cs *engine =
+		from_timer(engine, t, breadcrumbs.hangcheck);
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
 
 	if (!b->irq_armed)
@@ -103,7 +122,7 @@ static void intel_breadcrumbs_hangcheck(struct timer_list *t)
 	 */
 	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP) {
 		missed_breadcrumb(engine);
-		mod_timer(&engine->breadcrumbs.fake_irq, jiffies + 1);
+		mod_timer(&b->fake_irq, jiffies + 1);
 	} else {
 		mod_timer(&b->hangcheck, wait_timeout());
 	}
@@ -213,32 +232,42 @@ void intel_engine_unpin_breadcrumbs_irq(struct intel_engine_cs *engine)
 void intel_engine_disarm_breadcrumbs(struct intel_engine_cs *engine)
 {
 	struct intel_breadcrumbs *b = &engine->breadcrumbs;
-	struct intel_wait *wait, *n, *first;
+	struct intel_wait *wait, *n;
 
 	if (!b->irq_armed)
-		return;
+		goto wakeup_signaler;
 
-	/* We only disarm the irq when we are idle (all requests completed),
+	/*
+	 * We only disarm the irq when we are idle (all requests completed),
 	 * so if the bottom-half remains asleep, it missed the request
 	 * completion.
 	 */
+	if (intel_engine_wakeup(engine) & ENGINE_WAKEUP_ASLEEP)
+		missed_breadcrumb(engine);
 
 	spin_lock_irq(&b->rb_lock);
 
 	spin_lock(&b->irq_lock);
-	first = fetch_and_zero(&b->irq_wait);
+	b->irq_wait = NULL;
 	if (b->irq_armed)
 		__intel_engine_disarm_breadcrumbs(engine);
 	spin_unlock(&b->irq_lock);
 
 	rbtree_postorder_for_each_entry_safe(wait, n, &b->waiters, node) {
 		RB_CLEAR_NODE(&wait->node);
-		if (wake_up_process(wait->tsk) && wait == first)
-			missed_breadcrumb(engine);
+		wake_up_process(wait->tsk);
 	}
 	b->waiters = RB_ROOT;
 
 	spin_unlock_irq(&b->rb_lock);
+
+	/*
+	 * The signaling thread may be asleep holding a reference to a request,
+	 * that had its signaling cancelled prior to being preempted. We need
+	 * to kick the signaler, just in case, to release any such reference.
+	 */
+wakeup_signaler:
+	wake_up_process(b->signaler);
 }
 
 static bool use_fake_irq(const struct intel_breadcrumbs *b)
@@ -683,23 +712,15 @@ static int intel_breadcrumbs_signaler(void *arg)
 		}
 
 		if (unlikely(do_schedule)) {
-			DEFINE_WAIT(exec);
-
 			if (kthread_should_park())
 				kthread_parkme();
 
-			if (kthread_should_stop()) {
-				GEM_BUG_ON(request);
+			if (unlikely(kthread_should_stop())) {
+				i915_gem_request_put(request);
 				break;
 			}
 
-			if (request)
-				add_wait_queue(&request->execute, &exec);
-
 			schedule();
-
-			if (request)
-				remove_wait_queue(&request->execute, &exec);
 		}
 		i915_gem_request_put(request);
 	} while (1);
diff --git a/drivers/gpu/drm/i915/intel_csr.c b/drivers/gpu/drm/i915/intel_csr.c
index 07e4f7bc4412..7fe4aac0facc 100644
--- a/drivers/gpu/drm/i915/intel_csr.c
+++ b/drivers/gpu/drm/i915/intel_csr.c
@@ -44,9 +44,9 @@
 MODULE_FIRMWARE(I915_CSR_KBL);
 #define KBL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 4)
 
-#define I915_CSR_SKL "i915/skl_dmc_ver1_26.bin"
+#define I915_CSR_SKL "i915/skl_dmc_ver1_27.bin"
 MODULE_FIRMWARE(I915_CSR_SKL);
-#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 26)
+#define SKL_CSR_VERSION_REQUIRED	CSR_VERSION(1, 27)
 
 #define I915_CSR_BXT "i915/bxt_dmc_ver1_07.bin"
 MODULE_FIRMWARE(I915_CSR_BXT);
diff --git a/drivers/gpu/drm/i915/intel_ddi.c b/drivers/gpu/drm/i915/intel_ddi.c
index 369f780588fb..f51645a08dca 100644
--- a/drivers/gpu/drm/i915/intel_ddi.c
+++ b/drivers/gpu/drm/i915/intel_ddi.c
@@ -2095,6 +2095,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 	if (WARN_ON(!pll))
 		return;
 
+	mutex_lock(&dev_priv->dpll_lock);
+
 	if (IS_CANNONLAKE(dev_priv)) {
 		/* Configure DPCLKA_CFGCR0 to map the DPLL to the DDI. */
 		val = I915_READ(DPCLKA_CFGCR0);
@@ -2115,7 +2117,7 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 		val = I915_READ(DPLL_CTRL2);
 
 		val &= ~(DPLL_CTRL2_DDI_CLK_OFF(port) |
-			DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
+			 DPLL_CTRL2_DDI_CLK_SEL_MASK(port));
 		val |= (DPLL_CTRL2_DDI_CLK_SEL(pll->id, port) |
 			DPLL_CTRL2_DDI_SEL_OVERRIDE(port));
 
@@ -2124,6 +2126,8 @@ static void intel_ddi_clk_select(struct intel_encoder *encoder,
 	} else if (INTEL_INFO(dev_priv)->gen < 9) {
 		I915_WRITE(PORT_CLK_SEL(port), hsw_pll_to_ddi_pll_sel(pll));
 	}
+
+	mutex_unlock(&dev_priv->dpll_lock);
 }
 
 static void intel_ddi_clk_disable(struct intel_encoder *encoder)
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 02f8bf101ccd..d28592e43512 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -22,6 +22,9 @@
  *
  */
 
+#include <drm/drm_print.h>
+
+#include "intel_device_info.h"
 #include "i915_drv.h"
 
 #define PLATFORM_NAME(x) [INTEL_##x] = #x
@@ -67,21 +70,55 @@ const char *intel_platform_name(enum intel_platform platform)
 	return platform_names[platform];
 }
 
-void intel_device_info_dump(struct drm_i915_private *dev_priv)
+void intel_device_info_dump_flags(const struct intel_device_info *info,
+				  struct drm_printer *p)
 {
-	const struct intel_device_info *info = &dev_priv->info;
-
-	DRM_DEBUG_DRIVER("i915 device info: platform=%s gen=%i pciid=0x%04x rev=0x%02x",
-			 intel_platform_name(info->platform),
-			 info->gen,
-			 dev_priv->drm.pdev->device,
-			 dev_priv->drm.pdev->revision);
-#define PRINT_FLAG(name) \
-	DRM_DEBUG_DRIVER("i915 device info: " #name ": %s", yesno(info->name))
+#define PRINT_FLAG(name) drm_printf(p, "%s: %s\n", #name, yesno(info->name));
 	DEV_INFO_FOR_EACH_FLAG(PRINT_FLAG);
 #undef PRINT_FLAG
 }
 
+static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
+{
+	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
+	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
+	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
+	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
+	drm_printf(p, "subslice per slice: %u\n",
+		   hweight8(sseu->subslice_mask));
+	drm_printf(p, "EU total: %u\n", sseu->eu_total);
+	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
+	drm_printf(p, "has slice power gating: %s\n",
+		   yesno(sseu->has_slice_pg));
+	drm_printf(p, "has subslice power gating: %s\n",
+		   yesno(sseu->has_subslice_pg));
+	drm_printf(p, "has EU power gating: %s\n", yesno(sseu->has_eu_pg));
+}
+
+void intel_device_info_dump_runtime(const struct intel_device_info *info,
+				    struct drm_printer *p)
+{
+	sseu_dump(&info->sseu, p);
+
+	drm_printf(p, "CS timestamp frequency: %u kHz\n",
+		   info->cs_timestamp_frequency_khz);
+}
+
+void intel_device_info_dump(const struct intel_device_info *info,
+			    struct drm_printer *p)
+{
+	struct drm_i915_private *dev_priv =
+		container_of(info, struct drm_i915_private, info);
+
+	drm_printf(p, "pciid=0x%04x rev=0x%02x platform=%s gen=%i\n",
+		   INTEL_DEVID(dev_priv),
+		   INTEL_REVID(dev_priv),
+		   intel_platform_name(info->platform),
+		   info->gen);
+
+	intel_device_info_dump_flags(info, p);
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
@@ -403,24 +440,27 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
 				freq = f24_mhz;
 				break;
 			}
-		}
 
-		/* Now figure out how the command stream's timestamp register
-		 * increments from this frequency (it might increment only
-		 * every few clock cycle).
-		 */
-		freq >>= 3 - ((rpm_config_reg &
-			       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
-			      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+			/* Now figure out how the command stream's timestamp
+			 * register increments from this frequency (it might
+			 * increment only every few clock cycle).
+			 */
+			freq >>= 3 - ((rpm_config_reg &
+				       GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_MASK) >>
+				      GEN10_RPM_CONFIG0_CTC_SHIFT_PARAMETER_SHIFT);
+		}
 
 		return freq;
 	}
 
-	DRM_ERROR("Unknown gen, unable to compute command stream timestamp frequency\n");
+	MISSING_CASE("Unknown gen, unable to read command streamer timestamp frequency\n");
 	return 0;
 }
 
-/*
+/**
+ * intel_device_info_runtime_init - initialize runtime info
+ * @info: intel device info struct
+ *
  * Determine various intel_device_info fields at runtime.
  *
  * Use it when either:
@@ -433,9 +473,10 @@ static u32 read_timestamp_frequency(struct drm_i915_private *dev_priv)
  *   - after the PCH has been detected,
  *   - before the first usage of the fields it can tweak.
  */
-void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
+void intel_device_info_runtime_init(struct intel_device_info *info)
 {
-	struct intel_device_info *info = mkwrite_device_info(dev_priv);
+	struct drm_i915_private *dev_priv =
+		container_of(info, struct drm_i915_private, info);
 	enum pipe pipe;
 
 	if (INTEL_GEN(dev_priv) >= 10) {
@@ -543,22 +584,4 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
 
 	/* Initialize command stream timestamp frequency */
 	info->cs_timestamp_frequency_khz = read_timestamp_frequency(dev_priv);
-
-	DRM_DEBUG_DRIVER("slice mask: %04x\n", info->sseu.slice_mask);
-	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
-	DRM_DEBUG_DRIVER("subslice total: %u\n",
-			 sseu_subslice_total(&info->sseu));
-	DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
-	DRM_DEBUG_DRIVER("subslice per slice: %u\n",
-			 hweight8(info->sseu.subslice_mask));
-	DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
-	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
-	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
-			 info->sseu.has_slice_pg ? "y" : "n");
-	DRM_DEBUG_DRIVER("has subslice power gating: %s\n",
-			 info->sseu.has_subslice_pg ? "y" : "n");
-	DRM_DEBUG_DRIVER("has EU power gating: %s\n",
-			 info->sseu.has_eu_pg ? "y" : "n");
-	DRM_DEBUG_DRIVER("CS timestamp frequency: %u kHz\n",
-			 info->cs_timestamp_frequency_khz);
 }
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
new file mode 100644
index 000000000000..49cb27bd04c1
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -0,0 +1,183 @@
+/*
+ * Copyright © 2014-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_DEVICE_INFO_H_
+#define _INTEL_DEVICE_INFO_H_
+
+#include "intel_display.h"
+
+struct drm_printer;
+struct drm_i915_private;
+
+/* Keep in gen based order, and chronological order within a gen */
+enum intel_platform {
+	INTEL_PLATFORM_UNINITIALIZED = 0,
+	/* gen2 */
+	INTEL_I830,
+	INTEL_I845G,
+	INTEL_I85X,
+	INTEL_I865G,
+	/* gen3 */
+	INTEL_I915G,
+	INTEL_I915GM,
+	INTEL_I945G,
+	INTEL_I945GM,
+	INTEL_G33,
+	INTEL_PINEVIEW,
+	/* gen4 */
+	INTEL_I965G,
+	INTEL_I965GM,
+	INTEL_G45,
+	INTEL_GM45,
+	/* gen5 */
+	INTEL_IRONLAKE,
+	/* gen6 */
+	INTEL_SANDYBRIDGE,
+	/* gen7 */
+	INTEL_IVYBRIDGE,
+	INTEL_VALLEYVIEW,
+	INTEL_HASWELL,
+	/* gen8 */
+	INTEL_BROADWELL,
+	INTEL_CHERRYVIEW,
+	/* gen9 */
+	INTEL_SKYLAKE,
+	INTEL_BROXTON,
+	INTEL_KABYLAKE,
+	INTEL_GEMINILAKE,
+	INTEL_COFFEELAKE,
+	/* gen10 */
+	INTEL_CANNONLAKE,
+	INTEL_MAX_PLATFORMS
+};
+
+#define DEV_INFO_FOR_EACH_FLAG(func) \
+	func(is_mobile); \
+	func(is_lp); \
+	func(is_alpha_support); \
+	/* Keep has_* in alphabetical order */ \
+	func(has_64bit_reloc); \
+	func(has_aliasing_ppgtt); \
+	func(has_csr); \
+	func(has_ddi); \
+	func(has_dp_mst); \
+	func(has_reset_engine); \
+	func(has_fbc); \
+	func(has_fpga_dbg); \
+	func(has_full_ppgtt); \
+	func(has_full_48bit_ppgtt); \
+	func(has_gmch_display); \
+	func(has_guc); \
+	func(has_guc_ct); \
+	func(has_hotplug); \
+	func(has_l3_dpf); \
+	func(has_llc); \
+	func(has_logical_ring_contexts); \
+	func(has_logical_ring_preemption); \
+	func(has_overlay); \
+	func(has_pooled_eu); \
+	func(has_psr); \
+	func(has_rc6); \
+	func(has_rc6p); \
+	func(has_resource_streamer); \
+	func(has_runtime_pm); \
+	func(has_snoop); \
+	func(unfenced_needs_alignment); \
+	func(cursor_needs_physical); \
+	func(hws_needs_physical); \
+	func(overlay_needs_physical); \
+	func(supports_tv); \
+	func(has_ipc);
+
+struct sseu_dev_info {
+	u8 slice_mask;
+	u8 subslice_mask;
+	u8 eu_total;
+	u8 eu_per_subslice;
+	u8 min_eu_in_pool;
+	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
+	u8 subslice_7eu[3];
+	u8 has_slice_pg:1;
+	u8 has_subslice_pg:1;
+	u8 has_eu_pg:1;
+};
+
+struct intel_device_info {
+	u16 device_id;
+	u16 gen_mask;
+
+	u8 gen;
+	u8 gt; /* GT number, 0 if undefined */
+	u8 num_rings;
+	u8 ring_mask; /* Rings supported by the HW */
+
+	enum intel_platform platform;
+	u32 platform_mask;
+
+	u32 display_mmio_offset;
+
+	u8 num_pipes;
+	u8 num_sprites[I915_MAX_PIPES];
+	u8 num_scalers[I915_MAX_PIPES];
+
+	unsigned int page_sizes; /* page sizes supported by the HW */
+
+#define DEFINE_FLAG(name) u8 name:1
+	DEV_INFO_FOR_EACH_FLAG(DEFINE_FLAG);
+#undef DEFINE_FLAG
+	u16 ddb_size; /* in blocks */
+
+	/* Register offsets for the various display pipes and transcoders */
+	int pipe_offsets[I915_MAX_TRANSCODERS];
+	int trans_offsets[I915_MAX_TRANSCODERS];
+	int palette_offsets[I915_MAX_PIPES];
+	int cursor_offsets[I915_MAX_PIPES];
+
+	/* Slice/subslice/EU info */
+	struct sseu_dev_info sseu;
+
+	u32 cs_timestamp_frequency_khz;
+
+	struct color_luts {
+		u16 degamma_lut_size;
+		u16 gamma_lut_size;
+	} color;
+};
+
+static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
+{
+	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+}
+
+const char *intel_platform_name(enum intel_platform platform);
+
+void intel_device_info_runtime_init(struct intel_device_info *info);
+void intel_device_info_dump(const struct intel_device_info *info,
+			    struct drm_printer *p);
+void intel_device_info_dump_flags(const struct intel_device_info *info,
+				  struct drm_printer *p);
+void intel_device_info_dump_runtime(const struct intel_device_info *info,
+				    struct drm_printer *p);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 1f7e312d0d0d..0cd355978ab4 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -2639,7 +2639,6 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 {
 	struct drm_device *dev = crtc->base.dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct drm_i915_gem_object *obj = NULL;
 	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
 	struct drm_framebuffer *fb = &plane_config->fb->base;
@@ -2655,7 +2654,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
 	/* If the FB is too big, just don't use it since fbdev is not very
 	 * important and we should probably use that space with FBC or other
 	 * features. */
-	if (size_aligned * 2 > ggtt->stolen_usable_size)
+	if (size_aligned * 2 > dev_priv->stolen_usable_size)
 		return false;
 
 	mutex_lock(&dev->struct_mutex);
@@ -3074,6 +3073,12 @@ int skl_check_plane_surface(struct intel_plane_state *plane_state)
 	unsigned int rotation = plane_state->base.rotation;
 	int ret;
 
+	if (rotation & DRM_MODE_REFLECT_X &&
+	    fb->modifier == DRM_FORMAT_MOD_LINEAR) {
+		DRM_DEBUG_KMS("horizontal flip is not supported with linear surface formats\n");
+		return -EINVAL;
+	}
+
 	if (!plane_state->base.visible)
 		return 0;
 
@@ -3454,9 +3459,9 @@ static u32 skl_plane_ctl_tiling(uint64_t fb_modifier)
 	return 0;
 }
 
-static u32 skl_plane_ctl_rotation(unsigned int rotation)
+static u32 skl_plane_ctl_rotate(unsigned int rotate)
 {
-	switch (rotation) {
+	switch (rotate) {
 	case DRM_MODE_ROTATE_0:
 		break;
 	/*
@@ -3470,7 +3475,22 @@ static u32 skl_plane_ctl_rotation(unsigned int rotation)
 	case DRM_MODE_ROTATE_270:
 		return PLANE_CTL_ROTATE_90;
 	default:
-		MISSING_CASE(rotation);
+		MISSING_CASE(rotate);
+	}
+
+	return 0;
+}
+
+static u32 cnl_plane_ctl_flip(unsigned int reflect)
+{
+	switch (reflect) {
+	case 0:
+		break;
+	case DRM_MODE_REFLECT_X:
+		return PLANE_CTL_FLIP_HORIZONTAL;
+	case DRM_MODE_REFLECT_Y:
+	default:
+		MISSING_CASE(reflect);
 	}
 
 	return 0;
@@ -3498,7 +3518,11 @@ u32 skl_plane_ctl(const struct intel_crtc_state *crtc_state,
 
 	plane_ctl |= skl_plane_ctl_format(fb->format->format);
 	plane_ctl |= skl_plane_ctl_tiling(fb->modifier);
-	plane_ctl |= skl_plane_ctl_rotation(rotation);
+	plane_ctl |= skl_plane_ctl_rotate(rotation & DRM_MODE_ROTATE_MASK);
+
+	if (INTEL_GEN(dev_priv) >= 10)
+		plane_ctl |= cnl_plane_ctl_flip(rotation &
+						DRM_MODE_REFLECT_MASK);
 
 	if (key->flags & I915_SET_COLORKEY_DESTINATION)
 		plane_ctl |= PLANE_CTL_KEY_ENABLE_DESTINATION;
@@ -9695,111 +9719,27 @@ err:
 	return ERR_PTR(ret);
 }
 
-static u32
-intel_framebuffer_pitch_for_width(int width, int bpp)
-{
-	u32 pitch = DIV_ROUND_UP(width * bpp, 8);
-	return ALIGN(pitch, 64);
-}
-
-static u32
-intel_framebuffer_size_for_mode(const struct drm_display_mode *mode, int bpp)
-{
-	u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp);
-	return PAGE_ALIGN(pitch * mode->vdisplay);
-}
-
-static struct drm_framebuffer *
-intel_framebuffer_create_for_mode(struct drm_device *dev,
-				  const struct drm_display_mode *mode,
-				  int depth, int bpp)
-{
-	struct drm_framebuffer *fb;
-	struct drm_i915_gem_object *obj;
-	struct drm_mode_fb_cmd2 mode_cmd = { 0 };
-
-	obj = i915_gem_object_create(to_i915(dev),
-				    intel_framebuffer_size_for_mode(mode, bpp));
-	if (IS_ERR(obj))
-		return ERR_CAST(obj);
-
-	mode_cmd.width = mode->hdisplay;
-	mode_cmd.height = mode->vdisplay;
-	mode_cmd.pitches[0] = intel_framebuffer_pitch_for_width(mode_cmd.width,
-								bpp);
-	mode_cmd.pixel_format = drm_mode_legacy_fb_format(bpp, depth);
-
-	fb = intel_framebuffer_create(obj, &mode_cmd);
-	if (IS_ERR(fb))
-		i915_gem_object_put(obj);
-
-	return fb;
-}
-
-static struct drm_framebuffer *
-mode_fits_in_fbdev(struct drm_device *dev,
-		   const struct drm_display_mode *mode)
-{
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_i915_gem_object *obj;
-	struct drm_framebuffer *fb;
-
-	if (!dev_priv->fbdev)
-		return NULL;
-
-	if (!dev_priv->fbdev->fb)
-		return NULL;
-
-	obj = dev_priv->fbdev->fb->obj;
-	BUG_ON(!obj);
-
-	fb = &dev_priv->fbdev->fb->base;
-	if (fb->pitches[0] < intel_framebuffer_pitch_for_width(mode->hdisplay,
-							       fb->format->cpp[0] * 8))
-		return NULL;
-
-	if (obj->base.size < mode->vdisplay * fb->pitches[0])
-		return NULL;
-
-	drm_framebuffer_get(fb);
-	return fb;
-#else
-	return NULL;
-#endif
-}
-
-static int intel_modeset_setup_plane_state(struct drm_atomic_state *state,
-					   struct drm_crtc *crtc,
-					   const struct drm_display_mode *mode,
-					   struct drm_framebuffer *fb,
-					   int x, int y)
+static int intel_modeset_disable_planes(struct drm_atomic_state *state,
+					struct drm_crtc *crtc)
 {
+	struct drm_plane *plane;
 	struct drm_plane_state *plane_state;
-	int hdisplay, vdisplay;
-	int ret;
-
-	plane_state = drm_atomic_get_plane_state(state, crtc->primary);
-	if (IS_ERR(plane_state))
-		return PTR_ERR(plane_state);
-
-	if (mode)
-		drm_mode_get_hv_timing(mode, &hdisplay, &vdisplay);
-	else
-		hdisplay = vdisplay = 0;
+	int ret, i;
 
-	ret = drm_atomic_set_crtc_for_plane(plane_state, fb ? crtc : NULL);
+	ret = drm_atomic_add_affected_planes(state, crtc);
 	if (ret)
 		return ret;
-	drm_atomic_set_fb_for_plane(plane_state, fb);
-	plane_state->crtc_x = 0;
-	plane_state->crtc_y = 0;
-	plane_state->crtc_w = hdisplay;
-	plane_state->crtc_h = vdisplay;
-	plane_state->src_x = x << 16;
-	plane_state->src_y = y << 16;
-	plane_state->src_w = hdisplay << 16;
-	plane_state->src_h = vdisplay << 16;
+
+	for_each_new_plane_in_state(state, plane, plane_state, i) {
+		if (plane_state->crtc != crtc)
+			continue;
+
+		ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
+		if (ret)
+			return ret;
+
+		drm_atomic_set_fb_for_plane(plane_state, NULL);
+	}
 
 	return 0;
 }
@@ -9817,7 +9757,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
 	struct drm_crtc *crtc = NULL;
 	struct drm_device *dev = encoder->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct drm_framebuffer *fb;
 	struct drm_mode_config *config = &dev->mode_config;
 	struct drm_atomic_state *state = NULL, *restore_state = NULL;
 	struct drm_connector_state *connector_state;
@@ -9885,10 +9824,6 @@ int intel_get_load_detect_pipe(struct drm_connector *connector,
 found:
 	intel_crtc = to_intel_crtc(crtc);
 
-	ret = drm_modeset_lock(&crtc->primary->mutex, ctx);
-	if (ret)
-		goto fail;
-
 	state = drm_atomic_state_alloc(dev);
 	restore_state = drm_atomic_state_alloc(dev);
 	if (!state || !restore_state) {
@@ -9920,40 +9855,17 @@ found:
 	if (!mode)
 		mode = &load_detect_mode;
 
-	/* We need a framebuffer large enough to accommodate all accesses
-	 * that the plane may generate whilst we perform load detection.
-	 * We can not rely on the fbcon either being present (we get called
-	 * during its initialisation to detect all boot displays, or it may
-	 * not even exist) or that it is large enough to satisfy the
-	 * requested mode.
-	 */
-	fb = mode_fits_in_fbdev(dev, mode);
-	if (fb == NULL) {
-		DRM_DEBUG_KMS("creating tmp fb for load-detection\n");
-		fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32);
-	} else
-		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
-	if (IS_ERR(fb)) {
-		DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
-		ret = PTR_ERR(fb);
-		goto fail;
-	}
-
-	ret = intel_modeset_setup_plane_state(state, crtc, mode, fb, 0, 0);
+	ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
 	if (ret)
 		goto fail;
 
-	drm_framebuffer_put(fb);
-
-	ret = drm_atomic_set_mode_for_crtc(&crtc_state->base, mode);
+	ret = intel_modeset_disable_planes(state, crtc);
 	if (ret)
 		goto fail;
 
 	ret = PTR_ERR_OR_ZERO(drm_atomic_get_connector_state(restore_state, connector));
 	if (!ret)
 		ret = PTR_ERR_OR_ZERO(drm_atomic_get_crtc_state(restore_state, crtc));
-	if (!ret)
-		ret = PTR_ERR_OR_ZERO(drm_atomic_get_plane_state(restore_state, crtc->primary));
 	if (ret) {
 		DRM_DEBUG_KMS("Failed to create a copy of old state to restore: %i\n", ret);
 		goto fail;
@@ -10967,31 +10879,6 @@ fail:
 	return ret;
 }
 
-static void
-intel_modeset_update_crtc_state(struct drm_atomic_state *state)
-{
-	struct drm_crtc *crtc;
-	struct drm_crtc_state *new_crtc_state;
-	int i;
-
-	/* Double check state. */
-	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i) {
-		to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state);
-
-		/*
-		 * Update legacy state to satisfy fbc code. This can
-		 * be removed when fbc uses the atomic state.
-		 */
-		if (drm_atomic_get_existing_plane_state(state, crtc->primary)) {
-			struct drm_plane_state *plane_state = crtc->primary->state;
-
-			crtc->primary->fb = plane_state->fb;
-			crtc->x = plane_state->src_x >> 16;
-			crtc->y = plane_state->src_y >> 16;
-		}
-	}
-}
-
 static bool intel_fuzzy_clock_check(int clock1, int clock2)
 {
 	int diff;
@@ -12364,9 +12251,9 @@ static void intel_atomic_commit_tail(struct drm_atomic_state *state)
 		}
 	}
 
-	/* Only after disabling all output pipelines that will be changed can we
-	 * update the the output configuration. */
-	intel_modeset_update_crtc_state(state);
+	/* FIXME: Eventually get rid of our intel_crtc->config pointer */
+	for_each_new_crtc_in_state(state, crtc, new_crtc_state, i)
+		to_intel_crtc(crtc)->config = to_intel_crtc_state(new_crtc_state);
 
 	if (intel_state->modeset) {
 		drm_atomic_helper_update_legacy_modeset_state(state->dev, state);
@@ -12596,11 +12483,15 @@ static int intel_atomic_commit(struct drm_device *dev,
 	INIT_WORK(&state->commit_work, intel_atomic_commit_work);
 
 	i915_sw_fence_commit(&intel_state->commit_ready);
-	if (nonblock)
+	if (nonblock && intel_state->modeset) {
+		queue_work(dev_priv->modeset_wq, &state->commit_work);
+	} else if (nonblock) {
 		queue_work(system_unbound_wq, &state->commit_work);
-	else
+	} else {
+		if (intel_state->modeset)
+			flush_workqueue(dev_priv->modeset_wq);
 		intel_atomic_commit_tail(state);
-
+	}
 
 	return 0;
 }
@@ -13265,7 +13156,7 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	primary->frontbuffer_bit = INTEL_FRONTBUFFER_PRIMARY(pipe);
 	primary->check_plane = intel_check_primary_plane;
 
-	if (INTEL_GEN(dev_priv) >= 10 || IS_GEMINILAKE(dev_priv)) {
+	if (INTEL_GEN(dev_priv) >= 10) {
 		intel_primary_formats = skl_primary_formats;
 		num_formats = ARRAY_SIZE(skl_primary_formats);
 		modifiers = skl_format_modifiers_ccs;
@@ -13327,7 +13218,12 @@ intel_primary_plane_create(struct drm_i915_private *dev_priv, enum pipe pipe)
 	if (ret)
 		goto fail;
 
-	if (INTEL_GEN(dev_priv) >= 9) {
+	if (INTEL_GEN(dev_priv) >= 10) {
+		supported_rotations =
+			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
+			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270 |
+			DRM_MODE_REFLECT_X;
+	} else if (INTEL_GEN(dev_priv) >= 9) {
 		supported_rotations =
 			DRM_MODE_ROTATE_0 | DRM_MODE_ROTATE_90 |
 			DRM_MODE_ROTATE_180 | DRM_MODE_ROTATE_270;
@@ -14558,6 +14454,8 @@ int intel_modeset_init(struct drm_device *dev)
 	enum pipe pipe;
 	struct intel_crtc *crtc;
 
+	dev_priv->modeset_wq = alloc_ordered_workqueue("i915_modeset", 0);
+
 	drm_mode_config_init(dev);
 
 	dev->mode_config.min_width = 0;
@@ -14621,7 +14519,7 @@ int intel_modeset_init(struct drm_device *dev)
 		dev->mode_config.cursor_height = MAX_CURSOR_HEIGHT;
 	}
 
-	dev->mode_config.fb_base = ggtt->mappable_base;
+	dev->mode_config.fb_base = ggtt->gmadr.start;
 
 	DRM_DEBUG_KMS("%d display pipe%s available.\n",
 		      INTEL_INFO(dev_priv)->num_pipes,
@@ -15362,6 +15260,8 @@ void intel_modeset_cleanup(struct drm_device *dev)
 	intel_cleanup_gt_powersave(dev_priv);
 
 	intel_teardown_gmbus(dev_priv);
+
+	destroy_workqueue(dev_priv->modeset_wq);
 }
 
 void intel_connector_attach_encoder(struct intel_connector *connector,
diff --git a/drivers/gpu/drm/i915/intel_display.h b/drivers/gpu/drm/i915/intel_display.h
new file mode 100644
index 000000000000..a0d2b6169361
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_display.h
@@ -0,0 +1,321 @@
+/*
+ * Copyright © 2006-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_DISPLAY_H_
+#define _INTEL_DISPLAY_H_
+
+enum pipe {
+	INVALID_PIPE = -1,
+
+	PIPE_A = 0,
+	PIPE_B,
+	PIPE_C,
+	_PIPE_EDP,
+
+	I915_MAX_PIPES = _PIPE_EDP
+};
+
+#define pipe_name(p) ((p) + 'A')
+
+enum transcoder {
+	TRANSCODER_A = 0,
+	TRANSCODER_B,
+	TRANSCODER_C,
+	TRANSCODER_EDP,
+	TRANSCODER_DSI_A,
+	TRANSCODER_DSI_C,
+
+	I915_MAX_TRANSCODERS
+};
+
+static inline const char *transcoder_name(enum transcoder transcoder)
+{
+	switch (transcoder) {
+	case TRANSCODER_A:
+		return "A";
+	case TRANSCODER_B:
+		return "B";
+	case TRANSCODER_C:
+		return "C";
+	case TRANSCODER_EDP:
+		return "EDP";
+	case TRANSCODER_DSI_A:
+		return "DSI A";
+	case TRANSCODER_DSI_C:
+		return "DSI C";
+	default:
+		return "<invalid>";
+	}
+}
+
+static inline bool transcoder_is_dsi(enum transcoder transcoder)
+{
+	return transcoder == TRANSCODER_DSI_A || transcoder == TRANSCODER_DSI_C;
+}
+
+/*
+ * Global legacy plane identifier. Valid only for primary/sprite
+ * planes on pre-g4x, and only for primary planes on g4x-bdw.
+ */
+enum i9xx_plane_id {
+	PLANE_A,
+	PLANE_B,
+	PLANE_C,
+};
+
+#define plane_name(p) ((p) + 'A')
+#define sprite_name(p, s) ((p) * INTEL_INFO(dev_priv)->num_sprites[(p)] + (s) + 'A')
+
+/*
+ * Per-pipe plane identifier.
+ * I915_MAX_PLANES in the enum below is the maximum (across all platforms)
+ * number of planes per CRTC.  Not all platforms really have this many planes,
+ * which means some arrays of size I915_MAX_PLANES may have unused entries
+ * between the topmost sprite plane and the cursor plane.
+ *
+ * This is expected to be passed to various register macros
+ * (eg. PLANE_CTL(), PS_PLANE_SEL(), etc.) so adjust with care.
+ */
+enum plane_id {
+	PLANE_PRIMARY,
+	PLANE_SPRITE0,
+	PLANE_SPRITE1,
+	PLANE_SPRITE2,
+	PLANE_CURSOR,
+
+	I915_MAX_PLANES,
+};
+
+#define for_each_plane_id_on_crtc(__crtc, __p) \
+	for ((__p) = PLANE_PRIMARY; (__p) < I915_MAX_PLANES; (__p)++) \
+		for_each_if((__crtc)->plane_ids_mask & BIT(__p))
+
+enum port {
+	PORT_NONE = -1,
+
+	PORT_A = 0,
+	PORT_B,
+	PORT_C,
+	PORT_D,
+	PORT_E,
+
+	I915_MAX_PORTS
+};
+
+#define port_name(p) ((p) + 'A')
+
+enum dpio_channel {
+	DPIO_CH0,
+	DPIO_CH1
+};
+
+enum dpio_phy {
+	DPIO_PHY0,
+	DPIO_PHY1,
+	DPIO_PHY2,
+};
+
+#define I915_NUM_PHYS_VLV 2
+
+enum intel_display_power_domain {
+	POWER_DOMAIN_PIPE_A,
+	POWER_DOMAIN_PIPE_B,
+	POWER_DOMAIN_PIPE_C,
+	POWER_DOMAIN_PIPE_A_PANEL_FITTER,
+	POWER_DOMAIN_PIPE_B_PANEL_FITTER,
+	POWER_DOMAIN_PIPE_C_PANEL_FITTER,
+	POWER_DOMAIN_TRANSCODER_A,
+	POWER_DOMAIN_TRANSCODER_B,
+	POWER_DOMAIN_TRANSCODER_C,
+	POWER_DOMAIN_TRANSCODER_EDP,
+	POWER_DOMAIN_TRANSCODER_DSI_A,
+	POWER_DOMAIN_TRANSCODER_DSI_C,
+	POWER_DOMAIN_PORT_DDI_A_LANES,
+	POWER_DOMAIN_PORT_DDI_B_LANES,
+	POWER_DOMAIN_PORT_DDI_C_LANES,
+	POWER_DOMAIN_PORT_DDI_D_LANES,
+	POWER_DOMAIN_PORT_DDI_E_LANES,
+	POWER_DOMAIN_PORT_DDI_A_IO,
+	POWER_DOMAIN_PORT_DDI_B_IO,
+	POWER_DOMAIN_PORT_DDI_C_IO,
+	POWER_DOMAIN_PORT_DDI_D_IO,
+	POWER_DOMAIN_PORT_DDI_E_IO,
+	POWER_DOMAIN_PORT_DSI,
+	POWER_DOMAIN_PORT_CRT,
+	POWER_DOMAIN_PORT_OTHER,
+	POWER_DOMAIN_VGA,
+	POWER_DOMAIN_AUDIO,
+	POWER_DOMAIN_PLLS,
+	POWER_DOMAIN_AUX_A,
+	POWER_DOMAIN_AUX_B,
+	POWER_DOMAIN_AUX_C,
+	POWER_DOMAIN_AUX_D,
+	POWER_DOMAIN_GMBUS,
+	POWER_DOMAIN_MODESET,
+	POWER_DOMAIN_GT_IRQ,
+	POWER_DOMAIN_INIT,
+
+	POWER_DOMAIN_NUM,
+};
+
+#define POWER_DOMAIN_PIPE(pipe) ((pipe) + POWER_DOMAIN_PIPE_A)
+#define POWER_DOMAIN_PIPE_PANEL_FITTER(pipe) \
+		((pipe) + POWER_DOMAIN_PIPE_A_PANEL_FITTER)
+#define POWER_DOMAIN_TRANSCODER(tran) \
+	((tran) == TRANSCODER_EDP ? POWER_DOMAIN_TRANSCODER_EDP : \
+	 (tran) + POWER_DOMAIN_TRANSCODER_A)
+
+/* Used by dp and fdi links */
+struct intel_link_m_n {
+	u32 tu;
+	u32 gmch_m;
+	u32 gmch_n;
+	u32 link_m;
+	u32 link_n;
+};
+
+#define for_each_pipe(__dev_priv, __p) \
+	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++)
+
+#define for_each_pipe_masked(__dev_priv, __p, __mask) \
+	for ((__p) = 0; (__p) < INTEL_INFO(__dev_priv)->num_pipes; (__p)++) \
+		for_each_if((__mask) & BIT(__p))
+
+#define for_each_universal_plane(__dev_priv, __pipe, __p)		\
+	for ((__p) = 0;							\
+	     (__p) < INTEL_INFO(__dev_priv)->num_sprites[(__pipe)] + 1;	\
+	     (__p)++)
+
+#define for_each_sprite(__dev_priv, __p, __s)				\
+	for ((__s) = 0;							\
+	     (__s) < INTEL_INFO(__dev_priv)->num_sprites[(__p)];	\
+	     (__s)++)
+
+#define for_each_port_masked(__port, __ports_mask) \
+	for ((__port) = PORT_A; (__port) < I915_MAX_PORTS; (__port)++)	\
+		for_each_if((__ports_mask) & BIT(__port))
+
+#define for_each_crtc(dev, crtc) \
+	list_for_each_entry(crtc, &(dev)->mode_config.crtc_list, head)
+
+#define for_each_intel_plane(dev, intel_plane) \
+	list_for_each_entry(intel_plane,			\
+			    &(dev)->mode_config.plane_list,	\
+			    base.head)
+
+#define for_each_intel_plane_mask(dev, intel_plane, plane_mask)		\
+	list_for_each_entry(intel_plane,				\
+			    &(dev)->mode_config.plane_list,		\
+			    base.head)					\
+		for_each_if((plane_mask) &				\
+			    BIT(drm_plane_index(&intel_plane->base)))
+
+#define for_each_intel_plane_on_crtc(dev, intel_crtc, intel_plane)	\
+	list_for_each_entry(intel_plane,				\
+			    &(dev)->mode_config.plane_list,		\
+			    base.head)					\
+		for_each_if((intel_plane)->pipe == (intel_crtc)->pipe)
+
+#define for_each_intel_crtc(dev, intel_crtc)				\
+	list_for_each_entry(intel_crtc,					\
+			    &(dev)->mode_config.crtc_list,		\
+			    base.head)
+
+#define for_each_intel_crtc_mask(dev, intel_crtc, crtc_mask)		\
+	list_for_each_entry(intel_crtc,					\
+			    &(dev)->mode_config.crtc_list,		\
+			    base.head)					\
+		for_each_if((crtc_mask) & BIT(drm_crtc_index(&intel_crtc->base)))
+
+#define for_each_intel_encoder(dev, intel_encoder)		\
+	list_for_each_entry(intel_encoder,			\
+			    &(dev)->mode_config.encoder_list,	\
+			    base.head)
+
+#define for_each_intel_connector_iter(intel_connector, iter) \
+	while ((intel_connector = to_intel_connector(drm_connector_list_iter_next(iter))))
+
+#define for_each_encoder_on_crtc(dev, __crtc, intel_encoder) \
+	list_for_each_entry((intel_encoder), &(dev)->mode_config.encoder_list, base.head) \
+		for_each_if((intel_encoder)->base.crtc == (__crtc))
+
+#define for_each_connector_on_encoder(dev, __encoder, intel_connector) \
+	list_for_each_entry((intel_connector), &(dev)->mode_config.connector_list, base.head) \
+		for_each_if((intel_connector)->base.encoder == (__encoder))
+
+#define for_each_power_domain(domain, mask)				\
+	for ((domain) = 0; (domain) < POWER_DOMAIN_NUM; (domain)++)	\
+		for_each_if(BIT_ULL(domain) & (mask))
+
+#define for_each_power_well(__dev_priv, __power_well)				\
+	for ((__power_well) = (__dev_priv)->power_domains.power_wells;	\
+	     (__power_well) - (__dev_priv)->power_domains.power_wells <	\
+		(__dev_priv)->power_domains.power_well_count;		\
+	     (__power_well)++)
+
+#define for_each_power_well_rev(__dev_priv, __power_well)			\
+	for ((__power_well) = (__dev_priv)->power_domains.power_wells +		\
+			      (__dev_priv)->power_domains.power_well_count - 1;	\
+	     (__power_well) - (__dev_priv)->power_domains.power_wells >= 0;	\
+	     (__power_well)--)
+
+#define for_each_power_domain_well(__dev_priv, __power_well, __domain_mask)	\
+	for_each_power_well(__dev_priv, __power_well)				\
+		for_each_if((__power_well)->domains & (__domain_mask))
+
+#define for_each_power_domain_well_rev(__dev_priv, __power_well, __domain_mask) \
+	for_each_power_well_rev(__dev_priv, __power_well)		        \
+		for_each_if((__power_well)->domains & (__domain_mask))
+
+#define for_each_new_intel_plane_in_state(__state, plane, new_plane_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
+	     (__i)++) \
+		for_each_if(plane)
+
+#define for_each_new_intel_crtc_in_state(__state, crtc, new_crtc_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_crtc && \
+		     ((crtc) = to_intel_crtc((__state)->base.crtcs[__i].ptr), \
+		      (new_crtc_state) = to_intel_crtc_state((__state)->base.crtcs[__i].new_state), 1); \
+	     (__i)++) \
+		for_each_if(crtc)
+
+#define for_each_oldnew_intel_plane_in_state(__state, plane, old_plane_state, new_plane_state, __i) \
+	for ((__i) = 0; \
+	     (__i) < (__state)->base.dev->mode_config.num_total_plane && \
+		     ((plane) = to_intel_plane((__state)->base.planes[__i].ptr), \
+		      (old_plane_state) = to_intel_plane_state((__state)->base.planes[__i].old_state), \
+		      (new_plane_state) = to_intel_plane_state((__state)->base.planes[__i].new_state), 1); \
+	     (__i)++) \
+		for_each_if(plane)
+
+void intel_link_compute_m_n(int bpp, int nlanes,
+			    int pixel_clock, int link_clock,
+			    struct intel_link_m_n *m_n,
+			    bool reduce_m_n);
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 86d4c85c8725..ebdcbcbacb3c 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1272,6 +1272,9 @@ static int cnl_init_workarounds(struct intel_engine_cs *engine)
 	if (ret)
 		return ret;
 
+	/* WaDisableEarlyEOT:cnl */
+	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN, DISABLE_EARLY_EOT);
+
 	return 0;
 }
 
@@ -1513,10 +1516,8 @@ bool intel_engines_are_idle(struct drm_i915_private *dev_priv)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
-	if (READ_ONCE(dev_priv->gt.active_requests))
-		return false;
-
-	/* If the driver is wedged, HW state may be very inconsistent and
+	/*
+	 * If the driver is wedged, HW state may be very inconsistent and
 	 * report that it is still busy, even though we have stopped using it.
 	 */
 	if (i915_terminally_wedged(&dev_priv->gpu_error))
@@ -1596,7 +1597,7 @@ void intel_engines_park(struct drm_i915_private *i915)
 			dev_err(i915->drm.dev,
 				"%s is not idle before parking\n",
 				engine->name);
-			intel_engine_dump(engine, &p);
+			intel_engine_dump(engine, &p, NULL);
 		}
 
 		if (engine->park)
@@ -1666,7 +1667,38 @@ static void print_request(struct drm_printer *m,
 		   rq->timeline->common->name);
 }
 
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
+static void hexdump(struct drm_printer *m, const void *buf, size_t len)
+{
+	const size_t rowsize = 8 * sizeof(u32);
+	const void *prev = NULL;
+	bool skip = false;
+	size_t pos;
+
+	for (pos = 0; pos < len; pos += rowsize) {
+		char line[128];
+
+		if (prev && !memcmp(prev, buf + pos, rowsize)) {
+			if (!skip) {
+				drm_printf(m, "*\n");
+				skip = true;
+			}
+			continue;
+		}
+
+		WARN_ON_ONCE(hex_dump_to_buffer(buf + pos, len - pos,
+						rowsize, sizeof(u32),
+						line, sizeof(line),
+						false) >= sizeof(line));
+		drm_printf(m, "%08zx %s\n", pos, line);
+
+		prev = buf + pos;
+		skip = false;
+	}
+}
+
+void intel_engine_dump(struct intel_engine_cs *engine,
+		       struct drm_printer *m,
+		       const char *header, ...)
 {
 	struct intel_breadcrumbs * const b = &engine->breadcrumbs;
 	const struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -1674,17 +1706,29 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 	struct drm_i915_private *dev_priv = engine->i915;
 	struct drm_i915_gem_request *rq;
 	struct rb_node *rb;
+	char hdr[80];
 	u64 addr;
 
-	drm_printf(m, "%s\n", engine->name);
+	if (header) {
+		va_list ap;
+
+		va_start(ap, header);
+		drm_vprintf(m, header, &ap);
+		va_end(ap);
+	}
+
+	if (i915_terminally_wedged(&engine->i915->gpu_error))
+		drm_printf(m, "*** WEDGED ***\n");
+
 	drm_printf(m, "\tcurrent seqno %x, last %x, hangcheck %x [%d ms], inflight %d\n",
 		   intel_engine_get_seqno(engine),
 		   intel_engine_last_submit(engine),
 		   engine->hangcheck.seqno,
 		   jiffies_to_msecs(jiffies - engine->hangcheck.action_timestamp),
 		   engine->timeline->inflight_seqnos);
-	drm_printf(m, "\tReset count: %d\n",
-		   i915_reset_engine_count(error, engine));
+	drm_printf(m, "\tReset count: %d (global %d)\n",
+		   i915_reset_engine_count(error, engine),
+		   i915_reset_count(error));
 
 	rcu_read_lock();
 
@@ -1745,6 +1789,24 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 	addr = intel_engine_get_last_batch_head(engine);
 	drm_printf(m, "\tBBADDR: 0x%08x_%08x\n",
 		   upper_32_bits(addr), lower_32_bits(addr));
+	if (INTEL_GEN(dev_priv) >= 8)
+		addr = I915_READ64_2x32(RING_DMA_FADD(engine->mmio_base),
+					RING_DMA_FADD_UDW(engine->mmio_base));
+	else if (INTEL_GEN(dev_priv) >= 4)
+		addr = I915_READ(RING_DMA_FADD(engine->mmio_base));
+	else
+		addr = I915_READ(DMA_FADD_I8XX);
+	drm_printf(m, "\tDMA_FADDR: 0x%08x_%08x\n",
+		   upper_32_bits(addr), lower_32_bits(addr));
+	if (INTEL_GEN(dev_priv) >= 4) {
+		drm_printf(m, "\tIPEIR: 0x%08x\n",
+			   I915_READ(RING_IPEIR(engine->mmio_base)));
+		drm_printf(m, "\tIPEHR: 0x%08x\n",
+			   I915_READ(RING_IPEHR(engine->mmio_base)));
+	} else {
+		drm_printf(m, "\tIPEIR: 0x%08x\n", I915_READ(IPEIR));
+		drm_printf(m, "\tIPEHR: 0x%08x\n", I915_READ(IPEHR));
+	}
 
 	if (HAS_EXECLISTS(dev_priv)) {
 		const u32 *hws = &engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
@@ -1786,12 +1848,12 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 
 			rq = port_unpack(&execlists->port[idx], &count);
 			if (rq) {
-				drm_printf(m, "\t\tELSP[%d] count=%d, ",
-					   idx, count);
-				print_request(m, rq, "rq: ");
+				snprintf(hdr, sizeof(hdr),
+					 "\t\tELSP[%d] count=%d, rq: ",
+					 idx, count);
+				print_request(m, rq, hdr);
 			} else {
-				drm_printf(m, "\t\tELSP[%d] idle\n",
-					   idx);
+				drm_printf(m, "\t\tELSP[%d] idle\n", idx);
 			}
 		}
 		drm_printf(m, "\t\tHW active? 0x%x\n", execlists->active);
@@ -1826,8 +1888,21 @@ void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *m)
 	}
 	spin_unlock_irq(&b->rb_lock);
 
+	if (INTEL_GEN(dev_priv) >= 6) {
+		drm_printf(m, "\tRING_IMR: %08x\n", I915_READ_IMR(engine));
+	}
+
+	drm_printf(m, "IRQ? 0x%lx (breadcrumbs? %s) (execlists? %s)\n",
+		   engine->irq_posted,
+		   yesno(test_bit(ENGINE_IRQ_BREADCRUMB,
+				  &engine->irq_posted)),
+		   yesno(test_bit(ENGINE_IRQ_EXECLIST,
+				  &engine->irq_posted)));
+
+	drm_printf(m, "HWSP:\n");
+	hexdump(m, engine->status_page.page_addr, PAGE_SIZE);
+
 	drm_printf(m, "Idle? %s\n", yesno(intel_engine_is_idle(engine)));
-	drm_printf(m, "\n");
 }
 
 static u8 user_class_map[] = {
diff --git a/drivers/gpu/drm/i915/intel_fbc.c b/drivers/gpu/drm/i915/intel_fbc.c
index 4aefc658a5cf..f88c1b5dae4c 100644
--- a/drivers/gpu/drm/i915/intel_fbc.c
+++ b/drivers/gpu/drm/i915/intel_fbc.c
@@ -531,7 +531,6 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv,
 				      int size,
 				      int fb_cpp)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	int compression_threshold = 1;
 	int ret;
 	u64 end;
@@ -541,7 +540,7 @@ static int find_compression_threshold(struct drm_i915_private *dev_priv,
 	 * If we enable FBC using a CFB on that memory range we'll get FIFO
 	 * underruns, even if that range is not reserved by the BIOS. */
 	if (IS_BROADWELL(dev_priv) || IS_GEN9_BC(dev_priv))
-		end = ggtt->stolen_size - 8 * 1024 * 1024;
+		end = resource_size(&dev_priv->dsm) - 8 * 1024 * 1024;
 	else
 		end = U64_MAX;
 
@@ -615,10 +614,16 @@ static int intel_fbc_alloc_cfb(struct intel_crtc *crtc)
 
 		fbc->compressed_llb = compressed_llb;
 
+		GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start,
+					     fbc->compressed_fb.start,
+					     U32_MAX));
+		GEM_BUG_ON(range_overflows_t(u64, dev_priv->dsm.start,
+					     fbc->compressed_llb->start,
+					     U32_MAX));
 		I915_WRITE(FBC_CFB_BASE,
-			   dev_priv->mm.stolen_base + fbc->compressed_fb.start);
+			   dev_priv->dsm.start + fbc->compressed_fb.start);
 		I915_WRITE(FBC_LL_BASE,
-			   dev_priv->mm.stolen_base + compressed_llb->start);
+			   dev_priv->dsm.start + compressed_llb->start);
 	}
 
 	DRM_DEBUG_KMS("reserved %llu bytes of contiguous stolen space for FBC, threshold: %d\n",
diff --git a/drivers/gpu/drm/i915/intel_fbdev.c b/drivers/gpu/drm/i915/intel_fbdev.c
index ea96682568e8..da48af11eb6b 100644
--- a/drivers/gpu/drm/i915/intel_fbdev.c
+++ b/drivers/gpu/drm/i915/intel_fbdev.c
@@ -115,7 +115,6 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	struct drm_framebuffer *fb;
 	struct drm_device *dev = helper->dev;
 	struct drm_i915_private *dev_priv = to_i915(dev);
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	struct drm_mode_fb_cmd2 mode_cmd = {};
 	struct drm_i915_gem_object *obj;
 	int size, ret;
@@ -139,7 +138,7 @@ static int intelfb_alloc(struct drm_fb_helper *helper,
 	 * important and we should probably use that space with FBC or other
 	 * features. */
 	obj = NULL;
-	if (size * 2 < ggtt->stolen_usable_size)
+	if (size * 2 < dev_priv->stolen_usable_size)
 		obj = i915_gem_object_create_stolen(dev_priv, size);
 	if (obj == NULL)
 		obj = i915_gem_object_create(dev_priv, size);
diff --git a/drivers/gpu/drm/i915/intel_guc.c b/drivers/gpu/drm/i915/intel_guc.c
index d08e760252d4..3c6bf5a34c3c 100644
--- a/drivers/gpu/drm/i915/intel_guc.c
+++ b/drivers/gpu/drm/i915/intel_guc.c
@@ -61,6 +61,7 @@ void intel_guc_init_send_regs(struct intel_guc *guc)
 
 void intel_guc_init_early(struct intel_guc *guc)
 {
+	intel_guc_fw_init_early(guc);
 	intel_guc_ct_init_early(&guc->ct);
 
 	mutex_init(&guc->send_mutex);
@@ -68,6 +69,114 @@ void intel_guc_init_early(struct intel_guc *guc)
 	guc->notify = gen8_guc_raise_irq;
 }
 
+int intel_guc_init_wq(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	/*
+	 * GuC log buffer flush work item has to do register access to
+	 * send the ack to GuC and this work item, if not synced before
+	 * suspend, can potentially get executed after the GFX device is
+	 * suspended.
+	 * By marking the WQ as freezable, we don't have to bother about
+	 * flushing of this work item from the suspend hooks, the pending
+	 * work item if any will be either executed before the suspend
+	 * or scheduled later on resume. This way the handling of work
+	 * item can be kept same between system suspend & rpm suspend.
+	 */
+	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
+						WQ_HIGHPRI | WQ_FREEZABLE);
+	if (!guc->log.runtime.flush_wq)
+		return -ENOMEM;
+
+	/*
+	 * Even though both sending GuC action, and adding a new workitem to
+	 * GuC workqueue are serialized (each with its own locking), since
+	 * we're using mutliple engines, it's possible that we're going to
+	 * issue a preempt request with two (or more - each for different
+	 * engine) workitems in GuC queue. In this situation, GuC may submit
+	 * all of them, which will make us very confused.
+	 * Our preemption contexts may even already be complete - before we
+	 * even had the chance to sent the preempt action to GuC!. Rather
+	 * than introducing yet another lock, we can just use ordered workqueue
+	 * to make sure we're always sending a single preemption request with a
+	 * single workitem.
+	 */
+	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+	    USES_GUC_SUBMISSION(dev_priv)) {
+		guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
+							  WQ_HIGHPRI);
+		if (!guc->preempt_wq) {
+			destroy_workqueue(guc->log.runtime.flush_wq);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+void intel_guc_fini_wq(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	if (HAS_LOGICAL_RING_PREEMPTION(dev_priv) &&
+	    USES_GUC_SUBMISSION(dev_priv))
+		destroy_workqueue(guc->preempt_wq);
+
+	destroy_workqueue(guc->log.runtime.flush_wq);
+}
+
+static int guc_shared_data_create(struct intel_guc *guc)
+{
+	struct i915_vma *vma;
+	void *vaddr;
+
+	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
+	if (IS_ERR(vma))
+		return PTR_ERR(vma);
+
+	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
+	if (IS_ERR(vaddr)) {
+		i915_vma_unpin_and_release(&vma);
+		return PTR_ERR(vaddr);
+	}
+
+	guc->shared_data = vma;
+	guc->shared_data_vaddr = vaddr;
+
+	return 0;
+}
+
+static void guc_shared_data_destroy(struct intel_guc *guc)
+{
+	i915_gem_object_unpin_map(guc->shared_data->obj);
+	i915_vma_unpin_and_release(&guc->shared_data);
+}
+
+int intel_guc_init(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	int ret;
+
+	ret = guc_shared_data_create(guc);
+	if (ret)
+		return ret;
+	GEM_BUG_ON(!guc->shared_data);
+
+	/* We need to notify the guc whenever we change the GGTT */
+	i915_ggtt_enable_guc(dev_priv);
+
+	return 0;
+}
+
+void intel_guc_fini(struct intel_guc *guc)
+{
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+
+	i915_ggtt_disable_guc(dev_priv);
+	guc_shared_data_destroy(guc);
+}
+
 static u32 get_gt_type(struct drm_i915_private *dev_priv)
 {
 	/* XXX: GT type based on PCI device ID? field seems unused by fw */
@@ -128,7 +237,7 @@ void intel_guc_init_params(struct intel_guc *guc)
 	}
 
 	/* If GuC submission is enabled, set up additional parameters here */
-	if (i915_modparams.enable_guc_submission) {
+	if (USES_GUC_SUBMISSION(dev_priv)) {
 		u32 ads = guc_ggtt_offset(guc->ads_vma) >> PAGE_SHIFT;
 		u32 pgs = guc_ggtt_offset(dev_priv->guc.stage_desc_pool);
 		u32 ctx_in_16 = GUC_MAX_STAGE_DESCRIPTORS / 16;
diff --git a/drivers/gpu/drm/i915/intel_guc.h b/drivers/gpu/drm/i915/intel_guc.h
index 59856726d2bc..52856a97477d 100644
--- a/drivers/gpu/drm/i915/intel_guc.h
+++ b/drivers/gpu/drm/i915/intel_guc.h
@@ -119,6 +119,10 @@ static inline u32 guc_ggtt_offset(struct i915_vma *vma)
 void intel_guc_init_early(struct intel_guc *guc);
 void intel_guc_init_send_regs(struct intel_guc *guc);
 void intel_guc_init_params(struct intel_guc *guc);
+int intel_guc_init_wq(struct intel_guc *guc);
+void intel_guc_fini_wq(struct intel_guc *guc);
+int intel_guc_init(struct intel_guc *guc);
+void intel_guc_fini(struct intel_guc *guc);
 int intel_guc_send_nop(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_send_mmio(struct intel_guc *guc, const u32 *action, u32 len);
 int intel_guc_sample_forcewake(struct intel_guc *guc);
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.c b/drivers/gpu/drm/i915/intel_guc_fw.c
index 89862fa8ab42..cbc51c960425 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.c
+++ b/drivers/gpu/drm/i915/intel_guc_fw.c
@@ -56,45 +56,54 @@ MODULE_FIRMWARE(I915_KBL_GUC_UCODE);
 
 #define I915_GLK_GUC_UCODE GUC_FW_PATH(glk, GLK_FW_MAJOR, GLK_FW_MINOR)
 
-/**
- * intel_guc_fw_select() - selects GuC firmware for uploading
- *
- * @guc:	intel_guc struct
- *
- * Return: zero when we know firmware, non-zero in other case
- */
-int intel_guc_fw_select(struct intel_guc *guc)
+static void guc_fw_select(struct intel_uc_fw *guc_fw)
 {
+	struct intel_guc *guc = container_of(guc_fw, struct intel_guc, fw);
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
-	intel_uc_fw_init(&guc->fw, INTEL_UC_FW_TYPE_GUC);
+	GEM_BUG_ON(guc_fw->type != INTEL_UC_FW_TYPE_GUC);
+
+	if (!HAS_GUC(dev_priv))
+		return;
 
 	if (i915_modparams.guc_firmware_path) {
-		guc->fw.path = i915_modparams.guc_firmware_path;
-		guc->fw.major_ver_wanted = 0;
-		guc->fw.minor_ver_wanted = 0;
+		guc_fw->path = i915_modparams.guc_firmware_path;
+		guc_fw->major_ver_wanted = 0;
+		guc_fw->minor_ver_wanted = 0;
 	} else if (IS_SKYLAKE(dev_priv)) {
-		guc->fw.path = I915_SKL_GUC_UCODE;
-		guc->fw.major_ver_wanted = SKL_FW_MAJOR;
-		guc->fw.minor_ver_wanted = SKL_FW_MINOR;
+		guc_fw->path = I915_SKL_GUC_UCODE;
+		guc_fw->major_ver_wanted = SKL_FW_MAJOR;
+		guc_fw->minor_ver_wanted = SKL_FW_MINOR;
 	} else if (IS_BROXTON(dev_priv)) {
-		guc->fw.path = I915_BXT_GUC_UCODE;
-		guc->fw.major_ver_wanted = BXT_FW_MAJOR;
-		guc->fw.minor_ver_wanted = BXT_FW_MINOR;
+		guc_fw->path = I915_BXT_GUC_UCODE;
+		guc_fw->major_ver_wanted = BXT_FW_MAJOR;
+		guc_fw->minor_ver_wanted = BXT_FW_MINOR;
 	} else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) {
-		guc->fw.path = I915_KBL_GUC_UCODE;
-		guc->fw.major_ver_wanted = KBL_FW_MAJOR;
-		guc->fw.minor_ver_wanted = KBL_FW_MINOR;
+		guc_fw->path = I915_KBL_GUC_UCODE;
+		guc_fw->major_ver_wanted = KBL_FW_MAJOR;
+		guc_fw->minor_ver_wanted = KBL_FW_MINOR;
 	} else if (IS_GEMINILAKE(dev_priv)) {
-		guc->fw.path = I915_GLK_GUC_UCODE;
-		guc->fw.major_ver_wanted = GLK_FW_MAJOR;
-		guc->fw.minor_ver_wanted = GLK_FW_MINOR;
+		guc_fw->path = I915_GLK_GUC_UCODE;
+		guc_fw->major_ver_wanted = GLK_FW_MAJOR;
+		guc_fw->minor_ver_wanted = GLK_FW_MINOR;
 	} else {
-		DRM_ERROR("No GuC firmware known for platform with GuC!\n");
-		return -ENOENT;
+		DRM_WARN("%s: No firmware known for this platform!\n",
+			 intel_uc_fw_type_repr(guc_fw->type));
 	}
+}
 
-	return 0;
+/**
+ * intel_guc_fw_init_early() - initializes GuC firmware struct
+ * @guc: intel_guc struct
+ *
+ * On platforms with GuC selects firmware for uploading
+ */
+void intel_guc_fw_init_early(struct intel_guc *guc)
+{
+	struct intel_uc_fw *guc_fw = &guc->fw;
+
+	intel_uc_fw_init(guc_fw, INTEL_UC_FW_TYPE_GUC);
+	guc_fw_select(guc_fw);
 }
 
 static void guc_prepare_xfer(struct intel_guc *guc)
diff --git a/drivers/gpu/drm/i915/intel_guc_fw.h b/drivers/gpu/drm/i915/intel_guc_fw.h
index 023f5baa9dd6..4ec5d3d9e2b0 100644
--- a/drivers/gpu/drm/i915/intel_guc_fw.h
+++ b/drivers/gpu/drm/i915/intel_guc_fw.h
@@ -27,7 +27,7 @@
 
 struct intel_guc;
 
-int intel_guc_fw_select(struct intel_guc *guc);
+void intel_guc_fw_init_early(struct intel_guc *guc);
 int intel_guc_fw_upload(struct intel_guc *guc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_guc_log.c b/drivers/gpu/drm/i915/intel_guc_log.c
index 76d3eb1e4614..eaedd63e3819 100644
--- a/drivers/gpu/drm/i915/intel_guc_log.c
+++ b/drivers/gpu/drm/i915/intel_guc_log.c
@@ -411,30 +411,8 @@ static int guc_log_runtime_create(struct intel_guc *guc)
 	guc->log.runtime.relay_chan = guc_log_relay_chan;
 
 	INIT_WORK(&guc->log.runtime.flush_work, capture_logs_work);
-
-	/*
-	 * GuC log buffer flush work item has to do register access to
-	 * send the ack to GuC and this work item, if not synced before
-	 * suspend, can potentially get executed after the GFX device is
-	 * suspended.
-	 * By marking the WQ as freezable, we don't have to bother about
-	 * flushing of this work item from the suspend hooks, the pending
-	 * work item if any will be either executed before the suspend
-	 * or scheduled later on resume. This way the handling of work
-	 * item can be kept same between system suspend & rpm suspend.
-	 */
-	guc->log.runtime.flush_wq = alloc_ordered_workqueue("i915-guc_log",
-						WQ_HIGHPRI | WQ_FREEZABLE);
-	if (!guc->log.runtime.flush_wq) {
-		DRM_ERROR("Couldn't allocate the wq for GuC logging\n");
-		ret = -ENOMEM;
-		goto err_relaychan;
-	}
-
 	return 0;
 
-err_relaychan:
-	relay_close(guc->log.runtime.relay_chan);
 err_vaddr:
 	i915_gem_object_unpin_map(guc->log.vma->obj);
 	guc->log.runtime.buf_addr = NULL;
@@ -450,7 +428,6 @@ static void guc_log_runtime_destroy(struct intel_guc *guc)
 	if (!guc_log_has_runtime(guc))
 		return;
 
-	destroy_workqueue(guc->log.runtime.flush_wq);
 	relay_close(guc->log.runtime.relay_chan);
 	i915_gem_object_unpin_map(guc->log.vma->obj);
 	guc->log.runtime.buf_addr = NULL;
@@ -505,7 +482,7 @@ static void guc_flush_logs(struct intel_guc *guc)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(guc);
 
-	if (!i915_modparams.enable_guc_submission ||
+	if (!USES_GUC_SUBMISSION(dev_priv) ||
 	    (i915_modparams.guc_log_level < 0))
 		return;
 
@@ -646,7 +623,7 @@ int i915_guc_log_control(struct drm_i915_private *dev_priv, u64 control_val)
 
 void i915_guc_log_register(struct drm_i915_private *dev_priv)
 {
-	if (!i915_modparams.enable_guc_submission ||
+	if (!USES_GUC_SUBMISSION(dev_priv) ||
 	    (i915_modparams.guc_log_level < 0))
 		return;
 
@@ -657,7 +634,7 @@ void i915_guc_log_register(struct drm_i915_private *dev_priv)
 
 void i915_guc_log_unregister(struct drm_i915_private *dev_priv)
 {
-	if (!i915_modparams.enable_guc_submission)
+	if (!USES_GUC_SUBMISSION(dev_priv))
 		return;
 
 	mutex_lock(&dev_priv->drm.struct_mutex);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 912ff143d531..4d2409466a3a 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -88,7 +88,7 @@ static inline bool is_high_priority(struct intel_guc_client *client)
 		client->priority == GUC_CLIENT_PRIORITY_HIGH);
 }
 
-static int __reserve_doorbell(struct intel_guc_client *client)
+static int reserve_doorbell(struct intel_guc_client *client)
 {
 	unsigned long offset;
 	unsigned long end;
@@ -120,7 +120,7 @@ static int __reserve_doorbell(struct intel_guc_client *client)
 	return 0;
 }
 
-static void __unreserve_doorbell(struct intel_guc_client *client)
+static void unreserve_doorbell(struct intel_guc_client *client)
 {
 	GEM_BUG_ON(client->doorbell_id == GUC_DOORBELL_INVALID);
 
@@ -188,32 +188,21 @@ static bool has_doorbell(struct intel_guc_client *client)
 	return test_bit(client->doorbell_id, client->guc->doorbell_bitmap);
 }
 
-static int __create_doorbell(struct intel_guc_client *client)
+static void __create_doorbell(struct intel_guc_client *client)
 {
 	struct guc_doorbell_info *doorbell;
-	int err;
 
 	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_ENABLED;
 	doorbell->cookie = 0;
-
-	err = __guc_allocate_doorbell(client->guc, client->stage_id);
-	if (err) {
-		doorbell->db_status = GUC_DOORBELL_DISABLED;
-		DRM_ERROR("Couldn't create client %u doorbell: %d\n",
-			  client->stage_id, err);
-	}
-
-	return err;
 }
 
-static int __destroy_doorbell(struct intel_guc_client *client)
+static void __destroy_doorbell(struct intel_guc_client *client)
 {
 	struct drm_i915_private *dev_priv = guc_to_i915(client->guc);
 	struct guc_doorbell_info *doorbell;
 	u16 db_id = client->doorbell_id;
 
-	GEM_BUG_ON(db_id >= GUC_DOORBELL_INVALID);
 
 	doorbell = __get_doorbell(client);
 	doorbell->db_status = GUC_DOORBELL_DISABLED;
@@ -225,50 +214,42 @@ static int __destroy_doorbell(struct intel_guc_client *client)
 	 */
 	if (wait_for_us(!(I915_READ(GEN8_DRBREGL(db_id)) & GEN8_DRB_VALID), 10))
 		WARN_ONCE(true, "Doorbell never became invalid after disable\n");
-
-	return __guc_deallocate_doorbell(client->guc, client->stage_id);
 }
 
 static int create_doorbell(struct intel_guc_client *client)
 {
 	int ret;
 
-	ret = __reserve_doorbell(client);
-	if (ret)
-		return ret;
-
 	__update_doorbell_desc(client, client->doorbell_id);
+	__create_doorbell(client);
 
-	ret = __create_doorbell(client);
-	if (ret)
-		goto err;
+	ret = __guc_allocate_doorbell(client->guc, client->stage_id);
+	if (ret) {
+		__destroy_doorbell(client);
+		__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
+		DRM_ERROR("Couldn't create client %u doorbell: %d\n",
+			  client->stage_id, ret);
+		return ret;
+	}
 
 	return 0;
-
-err:
-	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
-	__unreserve_doorbell(client);
-	return ret;
 }
 
 static int destroy_doorbell(struct intel_guc_client *client)
 {
-	int err;
+	int ret;
 
 	GEM_BUG_ON(!has_doorbell(client));
 
-	/* XXX: wait for any interrupts */
-	/* XXX: wait for workqueue to drain */
-
-	err = __destroy_doorbell(client);
-	if (err)
-		return err;
+	__destroy_doorbell(client);
+	ret = __guc_deallocate_doorbell(client->guc, client->stage_id);
+	if (ret)
+		DRM_ERROR("Couldn't destroy client %u doorbell: %d\n",
+			  client->stage_id, ret);
 
 	__update_doorbell_desc(client, GUC_DOORBELL_INVALID);
 
-	__unreserve_doorbell(client);
-
-	return 0;
+	return ret;
 }
 
 static unsigned long __select_cacheline(struct intel_guc *guc)
@@ -447,33 +428,6 @@ static void guc_stage_desc_fini(struct intel_guc *guc,
 	memset(desc, 0, sizeof(*desc));
 }
 
-static int guc_shared_data_create(struct intel_guc *guc)
-{
-	struct i915_vma *vma;
-	void *vaddr;
-
-	vma = intel_guc_allocate_vma(guc, PAGE_SIZE);
-	if (IS_ERR(vma))
-		return PTR_ERR(vma);
-
-	vaddr = i915_gem_object_pin_map(vma->obj, I915_MAP_WB);
-	if (IS_ERR(vaddr)) {
-		i915_vma_unpin_and_release(&vma);
-		return PTR_ERR(vaddr);
-	}
-
-	guc->shared_data = vma;
-	guc->shared_data_vaddr = vaddr;
-
-	return 0;
-}
-
-static void guc_shared_data_destroy(struct intel_guc *guc)
-{
-	i915_gem_object_unpin_map(guc->shared_data->obj);
-	i915_vma_unpin_and_release(&guc->shared_data);
-}
-
 /* Construct a Work Item and append it to the GuC's Work Queue */
 static void guc_wq_item_append(struct intel_guc_client *client,
 			       u32 target_engine, u32 context_desc,
@@ -866,83 +820,47 @@ static bool doorbell_ok(struct intel_guc *guc, u16 db_id)
 	return false;
 }
 
-/*
- * If the GuC thinks that the doorbell is unassigned (e.g. because we reset and
- * reloaded the GuC FW) we can use this function to tell the GuC to reassign the
- * doorbell to the rightful owner.
- */
-static int __reset_doorbell(struct intel_guc_client *client, u16 db_id)
+static bool guc_verify_doorbells(struct intel_guc *guc)
 {
-	int err;
+	u16 db_id;
 
-	__update_doorbell_desc(client, db_id);
-	err = __create_doorbell(client);
-	if (!err)
-		err = __destroy_doorbell(client);
+	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
+		if (!doorbell_ok(guc, db_id))
+			return false;
 
-	return err;
+	return true;
 }
 
-/*
- * Set up & tear down each unused doorbell in turn, to ensure that all doorbell
- * HW is (re)initialised. For that end, we might have to borrow the first
- * client. Also, tell GuC about all the doorbells in use by all clients.
- * We do this because the KMD, the GuC and the doorbell HW can easily go out of
- * sync (e.g. we can reset the GuC, but not the doorbel HW).
- */
-static int guc_init_doorbell_hw(struct intel_guc *guc)
+static int guc_clients_doorbell_init(struct intel_guc *guc)
 {
-	struct intel_guc_client *client = guc->execbuf_client;
-	bool recreate_first_client = false;
-	u16 db_id;
 	int ret;
 
-	/* For unused doorbells, make sure they are disabled */
-	for_each_clear_bit(db_id, guc->doorbell_bitmap, GUC_NUM_DOORBELLS) {
-		if (doorbell_ok(guc, db_id))
-			continue;
-
-		if (has_doorbell(client)) {
-			/* Borrow execbuf_client (we will recreate it later) */
-			destroy_doorbell(client);
-			recreate_first_client = true;
-		}
-
-		ret = __reset_doorbell(client, db_id);
-		WARN(ret, "Doorbell %u reset failed, err %d\n", db_id, ret);
-	}
-
-	if (recreate_first_client) {
-		ret = __reserve_doorbell(client);
-		if (unlikely(ret)) {
-			DRM_ERROR("Couldn't re-reserve first client db: %d\n",
-				  ret);
-			return ret;
-		}
-
-		__update_doorbell_desc(client, client->doorbell_id);
-	}
-
-	/* Now for every client (and not only execbuf_client) make sure their
-	 * doorbells are known by the GuC
-	 */
-	ret = __create_doorbell(guc->execbuf_client);
+	ret = create_doorbell(guc->execbuf_client);
 	if (ret)
 		return ret;
 
-	ret = __create_doorbell(guc->preempt_client);
+	ret = create_doorbell(guc->preempt_client);
 	if (ret) {
-		__destroy_doorbell(guc->execbuf_client);
+		destroy_doorbell(guc->execbuf_client);
 		return ret;
 	}
 
-	/* Read back & verify all (used & unused) doorbell registers */
-	for (db_id = 0; db_id < GUC_NUM_DOORBELLS; ++db_id)
-		WARN_ON(!doorbell_ok(guc, db_id));
-
 	return 0;
 }
 
+static void guc_clients_doorbell_fini(struct intel_guc *guc)
+{
+	/*
+	 * By the time we're here, GuC has already been reset.
+	 * Instead of trying (in vain) to communicate with it, let's just
+	 * cleanup the doorbell HW and our internal state.
+	 */
+	__destroy_doorbell(guc->preempt_client);
+	__update_doorbell_desc(guc->preempt_client, GUC_DOORBELL_INVALID);
+	__destroy_doorbell(guc->execbuf_client);
+	__update_doorbell_desc(guc->execbuf_client, GUC_DOORBELL_INVALID);
+}
+
 /**
  * guc_client_alloc() - Allocate an intel_guc_client
  * @dev_priv:	driver private data structure
@@ -1018,7 +936,7 @@ guc_client_alloc(struct drm_i915_private *dev_priv,
 	guc_proc_desc_init(guc, client);
 	guc_stage_desc_init(guc, client);
 
-	ret = create_doorbell(client);
+	ret = reserve_doorbell(client);
 	if (ret)
 		goto err_vaddr;
 
@@ -1042,16 +960,7 @@ err_client:
 
 static void guc_client_free(struct intel_guc_client *client)
 {
-	/*
-	 * XXX: wait for any outstanding submissions before freeing memory.
-	 * Be sure to drop any locks
-	 */
-
-	/* FIXME: in many cases, by the time we get here the GuC has been
-	 * reset, so we cannot destroy the doorbell properly. Ignore the
-	 * error message for now
-	 */
-	destroy_doorbell(client);
+	unreserve_doorbell(client);
 	guc_stage_desc_fini(client->guc, client);
 	i915_gem_object_unpin_map(client->vma->obj);
 	i915_vma_unpin_and_release(&client->vma);
@@ -1214,57 +1123,15 @@ static void guc_ads_destroy(struct intel_guc *guc)
 	i915_vma_unpin_and_release(&guc->ads_vma);
 }
 
-static int guc_preempt_work_create(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	/*
-	 * Even though both sending GuC action, and adding a new workitem to
-	 * GuC workqueue are serialized (each with its own locking), since
-	 * we're using mutliple engines, it's possible that we're going to
-	 * issue a preempt request with two (or more - each for different
-	 * engine) workitems in GuC queue. In this situation, GuC may submit
-	 * all of them, which will make us very confused.
-	 * Our preemption contexts may even already be complete - before we
-	 * even had the chance to sent the preempt action to GuC!. Rather
-	 * than introducing yet another lock, we can just use ordered workqueue
-	 * to make sure we're always sending a single preemption request with a
-	 * single workitem.
-	 */
-	guc->preempt_wq = alloc_ordered_workqueue("i915-guc_preempt",
-						  WQ_HIGHPRI);
-	if (!guc->preempt_wq)
-		return -ENOMEM;
-
-	for_each_engine(engine, dev_priv, id) {
-		guc->preempt_work[id].engine = engine;
-		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
-	}
-
-	return 0;
-}
-
-static void guc_preempt_work_destroy(struct intel_guc *guc)
-{
-	struct drm_i915_private *dev_priv = guc_to_i915(guc);
-	struct intel_engine_cs *engine;
-	enum intel_engine_id id;
-
-	for_each_engine(engine, dev_priv, id)
-		cancel_work_sync(&guc->preempt_work[id].work);
-
-	destroy_workqueue(guc->preempt_wq);
-	guc->preempt_wq = NULL;
-}
-
 /*
  * Set up the memory resources to be shared with the GuC (via the GGTT)
  * at firmware loading time.
  */
 int intel_guc_submission_init(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
 	int ret;
 
 	if (guc->stage_desc_pool)
@@ -1279,33 +1146,29 @@ int intel_guc_submission_init(struct intel_guc *guc)
 	 */
 	GEM_BUG_ON(!guc->stage_desc_pool);
 
-	ret = guc_shared_data_create(guc);
-	if (ret)
-		goto err_stage_desc_pool;
-	GEM_BUG_ON(!guc->shared_data);
-
 	ret = intel_guc_log_create(guc);
 	if (ret < 0)
-		goto err_shared_data;
-
-	ret = guc_preempt_work_create(guc);
-	if (ret)
-		goto err_log;
-	GEM_BUG_ON(!guc->preempt_wq);
+		goto err_stage_desc_pool;
 
 	ret = guc_ads_create(guc);
 	if (ret < 0)
-		goto err_wq;
+		goto err_log;
 	GEM_BUG_ON(!guc->ads_vma);
 
+	WARN_ON(!guc_verify_doorbells(guc));
+	ret = guc_clients_create(guc);
+	if (ret)
+		return ret;
+
+	for_each_engine(engine, dev_priv, id) {
+		guc->preempt_work[id].engine = engine;
+		INIT_WORK(&guc->preempt_work[id].work, inject_preempt_context);
+	}
+
 	return 0;
 
-err_wq:
-	guc_preempt_work_destroy(guc);
 err_log:
 	intel_guc_log_destroy(guc);
-err_shared_data:
-	guc_shared_data_destroy(guc);
 err_stage_desc_pool:
 	guc_stage_desc_pool_destroy(guc);
 	return ret;
@@ -1313,10 +1176,18 @@ err_stage_desc_pool:
 
 void intel_guc_submission_fini(struct intel_guc *guc)
 {
+	struct drm_i915_private *dev_priv = guc_to_i915(guc);
+	struct intel_engine_cs *engine;
+	enum intel_engine_id id;
+
+	for_each_engine(engine, dev_priv, id)
+		cancel_work_sync(&guc->preempt_work[id].work);
+
+	guc_clients_destroy(guc);
+	WARN_ON(!guc_verify_doorbells(guc));
+
 	guc_ads_destroy(guc);
-	guc_preempt_work_destroy(guc);
 	intel_guc_log_destroy(guc);
-	guc_shared_data_destroy(guc);
 	guc_stage_desc_pool_destroy(guc);
 }
 
@@ -1420,28 +1291,18 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 		     sizeof(struct guc_wq_item) *
 		     I915_NUM_ENGINES > GUC_WQ_SIZE);
 
-	/*
-	 * We're being called on both module initialization and on reset,
-	 * until this flow is changed, we're using regular client presence to
-	 * determine which case are we in, and whether we should allocate new
-	 * clients or just reset their workqueues.
-	 */
-	if (!guc->execbuf_client) {
-		err = guc_clients_create(guc);
-		if (err)
-			return err;
-	} else {
-		guc_reset_wq(guc->execbuf_client);
-		guc_reset_wq(guc->preempt_client);
-	}
+	GEM_BUG_ON(!guc->execbuf_client);
+
+	guc_reset_wq(guc->execbuf_client);
+	guc_reset_wq(guc->preempt_client);
 
 	err = intel_guc_sample_forcewake(guc);
 	if (err)
-		goto err_free_clients;
+		return err;
 
-	err = guc_init_doorbell_hw(guc);
+	err = guc_clients_doorbell_init(guc);
 	if (err)
-		goto err_free_clients;
+		return err;
 
 	/* Take over from manual control of ELSP (execlists) */
 	guc_interrupts_capture(dev_priv);
@@ -1458,10 +1319,6 @@ int intel_guc_submission_enable(struct intel_guc *guc)
 	}
 
 	return 0;
-
-err_free_clients:
-	guc_clients_destroy(guc);
-	return err;
 }
 
 void intel_guc_submission_disable(struct intel_guc *guc)
@@ -1471,11 +1328,10 @@ void intel_guc_submission_disable(struct intel_guc *guc)
 	GEM_BUG_ON(dev_priv->gt.awake); /* GT should be parked first */
 
 	guc_interrupts_release(dev_priv);
+	guc_clients_doorbell_fini(guc);
 
 	/* Revert back to manual ELSP submission */
 	intel_engines_reset_default_submission(dev_priv);
-
-	guc_clients_destroy(guc);
 }
 
 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.h b/drivers/gpu/drm/i915/intel_guc_submission.h
index 021fe85c8f71..fb081cefef93 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.h
+++ b/drivers/gpu/drm/i915/intel_guc_submission.h
@@ -77,5 +77,7 @@ int intel_guc_submission_init(struct intel_guc *guc);
 int intel_guc_submission_enable(struct intel_guc *guc);
 void intel_guc_submission_disable(struct intel_guc *guc);
 void intel_guc_submission_fini(struct intel_guc *guc);
+int intel_guc_preempt_work_create(struct intel_guc *guc);
+void intel_guc_preempt_work_destroy(struct intel_guc *guc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_gvt.c b/drivers/gpu/drm/i915/intel_gvt.c
index 126f7c769c69..a2fe7c8d4477 100644
--- a/drivers/gpu/drm/i915/intel_gvt.c
+++ b/drivers/gpu/drm/i915/intel_gvt.c
@@ -95,7 +95,7 @@ int intel_gvt_init(struct drm_i915_private *dev_priv)
 		return 0;
 	}
 
-	if (i915_modparams.enable_guc_submission) {
+	if (USES_GUC_SUBMISSION(dev_priv)) {
 		DRM_ERROR("i915 GVT-g loading failed due to Graphics virtualization is not yet supported with GuC submission\n");
 		return -EIO;
 	}
diff --git a/drivers/gpu/drm/i915/intel_hangcheck.c b/drivers/gpu/drm/i915/intel_hangcheck.c
index 95bbb5a79c4f..31f01d64c021 100644
--- a/drivers/gpu/drm/i915/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/intel_hangcheck.c
@@ -349,13 +349,18 @@ static void hangcheck_accumulate_sample(struct intel_engine_cs *engine,
 
 	case ENGINE_ACTIVE_HEAD:
 	case ENGINE_ACTIVE_SUBUNITS:
-		/* Seqno stuck with still active engine gets leeway,
+		/*
+		 * Seqno stuck with still active engine gets leeway,
 		 * in hopes that it is just a long shader.
 		 */
 		timeout = I915_SEQNO_DEAD_TIMEOUT;
 		break;
 
 	case ENGINE_DEAD:
+		if (drm_debug & DRM_UT_DRIVER) {
+			struct drm_printer p = drm_debug_printer("hangcheck");
+			intel_engine_dump(engine, &p, "%s", engine->name);
+		}
 		break;
 
 	default:
@@ -424,18 +429,18 @@ static void i915_hangcheck_elapsed(struct work_struct *work)
 	intel_uncore_arm_unclaimed_mmio_detection(dev_priv);
 
 	for_each_engine(engine, dev_priv, id) {
-		struct intel_engine_hangcheck cur_state, *hc = &cur_state;
 		const bool busy = intel_engine_has_waiter(engine);
+		struct intel_engine_hangcheck hc;
 
 		semaphore_clear_deadlocks(dev_priv);
 
-		hangcheck_load_sample(engine, hc);
-		hangcheck_accumulate_sample(engine, hc);
-		hangcheck_store_sample(engine, hc);
+		hangcheck_load_sample(engine, &hc);
+		hangcheck_accumulate_sample(engine, &hc);
+		hangcheck_store_sample(engine, &hc);
 
 		if (engine->hangcheck.stalled) {
 			hung |= intel_engine_flag(engine);
-			if (hc->action != ENGINE_DEAD)
+			if (hc.action != ENGINE_DEAD)
 				stuck |= intel_engine_flag(engine);
 		}
 
diff --git a/drivers/gpu/drm/i915/intel_hdmi.c b/drivers/gpu/drm/i915/intel_hdmi.c
index a40f35af225c..bced7b954d93 100644
--- a/drivers/gpu/drm/i915/intel_hdmi.c
+++ b/drivers/gpu/drm/i915/intel_hdmi.c
@@ -1383,7 +1383,7 @@ static bool hdmi_12bpc_possible(const struct intel_crtc_state *crtc_state)
 		}
 	}
 
-	/* Display Wa #1139 */
+	/* Display WA #1139: glk */
 	if (IS_GLK_REVID(dev_priv, 0, GLK_REVID_A1) &&
 	    crtc_state->base.adjusted_mode.htotal > 5460)
 		return false;
diff --git a/drivers/gpu/drm/i915/intel_huc.c b/drivers/gpu/drm/i915/intel_huc.c
index 98d17254593c..974be3defa70 100644
--- a/drivers/gpu/drm/i915/intel_huc.c
+++ b/drivers/gpu/drm/i915/intel_huc.c
@@ -77,43 +77,57 @@ MODULE_FIRMWARE(I915_KBL_HUC_UCODE);
 #define I915_GLK_HUC_UCODE HUC_FW_PATH(glk, GLK_HUC_FW_MAJOR, \
 	GLK_HUC_FW_MINOR, GLK_BLD_NUM)
 
-/**
- * intel_huc_select_fw() - selects HuC firmware for loading
- * @huc:	intel_huc struct
- */
-void intel_huc_select_fw(struct intel_huc *huc)
+static void huc_fw_select(struct intel_uc_fw *huc_fw)
 {
+	struct intel_huc *huc = container_of(huc_fw, struct intel_huc, fw);
 	struct drm_i915_private *dev_priv = huc_to_i915(huc);
 
-	intel_uc_fw_init(&huc->fw, INTEL_UC_FW_TYPE_HUC);
+	GEM_BUG_ON(huc_fw->type != INTEL_UC_FW_TYPE_HUC);
+
+	if (!HAS_HUC(dev_priv))
+		return;
 
 	if (i915_modparams.huc_firmware_path) {
-		huc->fw.path = i915_modparams.huc_firmware_path;
-		huc->fw.major_ver_wanted = 0;
-		huc->fw.minor_ver_wanted = 0;
+		huc_fw->path = i915_modparams.huc_firmware_path;
+		huc_fw->major_ver_wanted = 0;
+		huc_fw->minor_ver_wanted = 0;
 	} else if (IS_SKYLAKE(dev_priv)) {
-		huc->fw.path = I915_SKL_HUC_UCODE;
-		huc->fw.major_ver_wanted = SKL_HUC_FW_MAJOR;
-		huc->fw.minor_ver_wanted = SKL_HUC_FW_MINOR;
+		huc_fw->path = I915_SKL_HUC_UCODE;
+		huc_fw->major_ver_wanted = SKL_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = SKL_HUC_FW_MINOR;
 	} else if (IS_BROXTON(dev_priv)) {
-		huc->fw.path = I915_BXT_HUC_UCODE;
-		huc->fw.major_ver_wanted = BXT_HUC_FW_MAJOR;
-		huc->fw.minor_ver_wanted = BXT_HUC_FW_MINOR;
+		huc_fw->path = I915_BXT_HUC_UCODE;
+		huc_fw->major_ver_wanted = BXT_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = BXT_HUC_FW_MINOR;
 	} else if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) {
-		huc->fw.path = I915_KBL_HUC_UCODE;
-		huc->fw.major_ver_wanted = KBL_HUC_FW_MAJOR;
-		huc->fw.minor_ver_wanted = KBL_HUC_FW_MINOR;
+		huc_fw->path = I915_KBL_HUC_UCODE;
+		huc_fw->major_ver_wanted = KBL_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = KBL_HUC_FW_MINOR;
 	} else if (IS_GEMINILAKE(dev_priv)) {
-		huc->fw.path = I915_GLK_HUC_UCODE;
-		huc->fw.major_ver_wanted = GLK_HUC_FW_MAJOR;
-		huc->fw.minor_ver_wanted = GLK_HUC_FW_MINOR;
+		huc_fw->path = I915_GLK_HUC_UCODE;
+		huc_fw->major_ver_wanted = GLK_HUC_FW_MAJOR;
+		huc_fw->minor_ver_wanted = GLK_HUC_FW_MINOR;
 	} else {
-		DRM_ERROR("No HuC firmware known for platform with HuC!\n");
-		return;
+		DRM_WARN("%s: No firmware known for this platform!\n",
+			 intel_uc_fw_type_repr(huc_fw->type));
 	}
 }
 
 /**
+ * intel_huc_init_early() - initializes HuC struct
+ * @huc: intel_huc struct
+ *
+ * On platforms with HuC selects firmware for uploading
+ */
+void intel_huc_init_early(struct intel_huc *huc)
+{
+	struct intel_uc_fw *huc_fw = &huc->fw;
+
+	intel_uc_fw_init(huc_fw, INTEL_UC_FW_TYPE_HUC);
+	huc_fw_select(huc_fw);
+}
+
+/**
  * huc_ucode_xfer() - DMA's the firmware
  * @dev_priv: the drm_i915_private device
  *
@@ -167,17 +181,17 @@ static int huc_ucode_xfer(struct intel_uc_fw *huc_fw, struct i915_vma *vma)
  * intel_huc_init_hw() - load HuC uCode to device
  * @huc: intel_huc structure
  *
- * Called from guc_setup() during driver loading and also after a GPU reset.
- * Be note that HuC loading must be done before GuC loading.
+ * Called from intel_uc_init_hw() during driver loading and also after a GPU
+ * reset. Be note that HuC loading must be done before GuC loading.
  *
  * The firmware image should have already been fetched into memory by the
- * earlier call to intel_huc_init(), so here we need only check that
+ * earlier call to intel_uc_init_fw(), so here we need only check that
  * is succeeded, and then transfer the image to the h/w.
  *
  */
-void intel_huc_init_hw(struct intel_huc *huc)
+int intel_huc_init_hw(struct intel_huc *huc)
 {
-	intel_uc_fw_upload(&huc->fw, huc_ucode_xfer);
+	return intel_uc_fw_upload(&huc->fw, huc_ucode_xfer);
 }
 
 /**
@@ -191,7 +205,7 @@ void intel_huc_init_hw(struct intel_huc *huc)
  * signature through intel_guc_auth_huc(). It then waits for 50ms for
  * firmware verification ACK and unpins the object.
  */
-void intel_huc_auth(struct intel_huc *huc)
+int intel_huc_auth(struct intel_huc *huc)
 {
 	struct drm_i915_private *i915 = huc_to_i915(huc);
 	struct intel_guc *guc = &i915->guc;
@@ -199,14 +213,14 @@ void intel_huc_auth(struct intel_huc *huc)
 	int ret;
 
 	if (huc->fw.load_status != INTEL_UC_FIRMWARE_SUCCESS)
-		return;
+		return -ENOEXEC;
 
 	vma = i915_gem_object_ggtt_pin(huc->fw.obj, NULL, 0, 0,
 				PIN_OFFSET_BIAS | GUC_WOPCM_TOP);
 	if (IS_ERR(vma)) {
-		DRM_ERROR("failed to pin huc fw object %d\n",
-				(int)PTR_ERR(vma));
-		return;
+		ret = PTR_ERR(vma);
+		DRM_ERROR("HuC: Failed to pin huc fw object %d\n", ret);
+		return ret;
 	}
 
 	ret = intel_guc_auth_huc(guc,
@@ -229,4 +243,5 @@ void intel_huc_auth(struct intel_huc *huc)
 
 out:
 	i915_vma_unpin(vma);
+	return ret;
 }
diff --git a/drivers/gpu/drm/i915/intel_huc.h b/drivers/gpu/drm/i915/intel_huc.h
index aaa38b9e5817..40039db59e04 100644
--- a/drivers/gpu/drm/i915/intel_huc.h
+++ b/drivers/gpu/drm/i915/intel_huc.h
@@ -34,8 +34,8 @@ struct intel_huc {
 	/* HuC-specific additions */
 };
 
-void intel_huc_select_fw(struct intel_huc *huc);
-void intel_huc_init_hw(struct intel_huc *huc);
-void intel_huc_auth(struct intel_huc *huc);
+void intel_huc_init_early(struct intel_huc *huc);
+int intel_huc_init_hw(struct intel_huc *huc);
+int intel_huc_auth(struct intel_huc *huc);
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index 49fdf09f9919..ef9f91a0b0c9 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -128,22 +128,46 @@ intel_i2c_reset(struct drm_i915_private *dev_priv)
 	I915_WRITE(GMBUS4, 0);
 }
 
-static void intel_i2c_quirk_set(struct drm_i915_private *dev_priv, bool enable)
+static void pnv_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+				   bool enable)
 {
 	u32 val;
 
 	/* When using bit bashing for I2C, this bit needs to be set to 1 */
-	if (!IS_PINEVIEW(dev_priv))
-		return;
-
 	val = I915_READ(DSPCLK_GATE_D);
-	if (enable)
-		val |= DPCUNIT_CLOCK_GATE_DISABLE;
+	if (!enable)
+		val |= PNV_GMBUSUNIT_CLOCK_GATE_DISABLE;
 	else
-		val &= ~DPCUNIT_CLOCK_GATE_DISABLE;
+		val &= ~PNV_GMBUSUNIT_CLOCK_GATE_DISABLE;
 	I915_WRITE(DSPCLK_GATE_D, val);
 }
 
+static void pch_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+				   bool enable)
+{
+	u32 val;
+
+	val = I915_READ(SOUTH_DSPCLK_GATE_D);
+	if (!enable)
+		val |= PCH_GMBUSUNIT_CLOCK_GATE_DISABLE;
+	else
+		val &= ~PCH_GMBUSUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, val);
+}
+
+static void bxt_gmbus_clock_gating(struct drm_i915_private *dev_priv,
+				   bool enable)
+{
+	u32 val;
+
+	val = I915_READ(GEN9_CLKGATE_DIS_4);
+	if (!enable)
+		val |= BXT_GMBUS_GATING_DIS;
+	else
+		val &= ~BXT_GMBUS_GATING_DIS;
+	I915_WRITE(GEN9_CLKGATE_DIS_4, val);
+}
+
 static u32 get_reserved(struct intel_gmbus *bus)
 {
 	struct drm_i915_private *dev_priv = bus->dev_priv;
@@ -221,7 +245,10 @@ intel_gpio_pre_xfer(struct i2c_adapter *adapter)
 	struct drm_i915_private *dev_priv = bus->dev_priv;
 
 	intel_i2c_reset(dev_priv);
-	intel_i2c_quirk_set(dev_priv, true);
+
+	if (IS_PINEVIEW(dev_priv))
+		pnv_gmbus_clock_gating(dev_priv, false);
+
 	set_data(bus, 1);
 	set_clock(bus, 1);
 	udelay(I2C_RISEFALL_TIME);
@@ -238,7 +265,9 @@ intel_gpio_post_xfer(struct i2c_adapter *adapter)
 
 	set_data(bus, 1);
 	set_clock(bus, 1);
-	intel_i2c_quirk_set(dev_priv, false);
+
+	if (IS_PINEVIEW(dev_priv))
+		pnv_gmbus_clock_gating(dev_priv, true);
 }
 
 static void
@@ -481,6 +510,13 @@ do_gmbus_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs, int num)
 	int i = 0, inc, try = 0;
 	int ret = 0;
 
+	/* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
+	if (IS_GEN9_LP(dev_priv))
+		bxt_gmbus_clock_gating(dev_priv, false);
+	else if (HAS_PCH_SPT(dev_priv) ||
+		 HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+		pch_gmbus_clock_gating(dev_priv, false);
+
 retry:
 	I915_WRITE_FW(GMBUS0, bus->reg0);
 
@@ -582,6 +618,13 @@ timeout:
 	ret = -EAGAIN;
 
 out:
+	/* Display WA #0868: skl,bxt,kbl,cfl,glk,cnl */
+	if (IS_GEN9_LP(dev_priv))
+		bxt_gmbus_clock_gating(dev_priv, true);
+	else if (HAS_PCH_SPT(dev_priv) ||
+		 HAS_PCH_KBP(dev_priv) || HAS_PCH_CNP(dev_priv))
+		pch_gmbus_clock_gating(dev_priv, true);
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/intel_lpe_audio.c b/drivers/gpu/drm/i915/intel_lpe_audio.c
index 3bf65288ffff..5809b29044fc 100644
--- a/drivers/gpu/drm/i915/intel_lpe_audio.c
+++ b/drivers/gpu/drm/i915/intel_lpe_audio.c
@@ -193,7 +193,7 @@ static bool lpe_audio_detect(struct drm_i915_private *dev_priv)
 		};
 
 		if (!pci_dev_present(atom_hdaudio_ids)) {
-			DRM_INFO("%s\n", "HDaudio controller not detected, using LPE audio instead\n");
+			DRM_INFO("HDaudio controller not detected, using LPE audio instead\n");
 			lpe_present = true;
 		}
 	}
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 2a8160f603ab..739c33b07c59 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -431,8 +431,6 @@ static inline void elsp_write(u64 desc, u32 __iomem *elsp)
 static void execlists_submit_ports(struct intel_engine_cs *engine)
 {
 	struct execlist_port *port = engine->execlists.port;
-	u32 __iomem *elsp =
-		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
 	for (n = execlists_num_ports(&engine->execlists); n--; ) {
@@ -451,14 +449,14 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
 
 			GEM_TRACE("%s in[%d]:  ctx=%d.%d, seqno=%x\n",
 				  engine->name, n,
-				  rq->ctx->hw_id, count,
+				  port[n].context_id, count,
 				  rq->global_seqno);
 		} else {
 			GEM_BUG_ON(!n);
 			desc = 0;
 		}
 
-		elsp_write(desc, elsp);
+		elsp_write(desc, engine->execlists.elsp);
 	}
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
@@ -496,8 +494,6 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 {
 	struct intel_context *ce =
 		&engine->i915->preempt_context->engine[engine->id];
-	u32 __iomem *elsp =
-		engine->i915->regs + i915_mmio_reg_offset(RING_ELSP(engine));
 	unsigned int n;
 
 	GEM_BUG_ON(engine->i915->preempt_context->hw_id != PREEMPT_ID);
@@ -508,11 +504,11 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	ce->ring->tail &= (ce->ring->size - 1);
 	ce->lrc_reg_state[CTX_RING_TAIL+1] = ce->ring->tail;
 
-	GEM_TRACE("\n");
+	GEM_TRACE("%s\n", engine->name);
 	for (n = execlists_num_ports(&engine->execlists); --n; )
-		elsp_write(0, elsp);
+		elsp_write(0, engine->execlists.elsp);
 
-	elsp_write(ce->lrc_desc, elsp);
+	elsp_write(ce->lrc_desc, engine->execlists.elsp);
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
@@ -865,9 +861,10 @@ static void execlists_submission_tasklet(unsigned long data)
 			 */
 
 			status = READ_ONCE(buf[2 * head]); /* maybe mmio! */
-			GEM_TRACE("%s csb[%dd]: status=0x%08x:0x%08x\n",
+			GEM_TRACE("%s csb[%d]: status=0x%08x:0x%08x, active=0x%x\n",
 				  engine->name, head,
-				  status, buf[2*head + 1]);
+				  status, buf[2*head + 1],
+				  execlists->active);
 
 			if (status & (GEN8_CTX_STATUS_IDLE_ACTIVE |
 				      GEN8_CTX_STATUS_PREEMPTED))
@@ -885,6 +882,8 @@ static void execlists_submission_tasklet(unsigned long data)
 
 			if (status & GEN8_CTX_STATUS_COMPLETE &&
 			    buf[2*head + 1] == PREEMPT_ID) {
+				GEM_TRACE("%s preempt-idle\n", engine->name);
+
 				execlists_cancel_port_requests(execlists);
 				execlists_unwind_incomplete_requests(execlists);
 
@@ -909,8 +908,8 @@ static void execlists_submission_tasklet(unsigned long data)
 			rq = port_unpack(port, &count);
 			GEM_TRACE("%s out[0]: ctx=%d.%d, seqno=%x\n",
 				  engine->name,
-				  rq->ctx->hw_id, count,
-				  rq->global_seqno);
+				  port->context_id, count,
+				  rq ? rq->global_seqno : 0);
 			GEM_BUG_ON(count == 0);
 			if (--count == 0) {
 				GEM_BUG_ON(status & GEN8_CTX_STATUS_PREEMPTED);
@@ -1509,6 +1508,9 @@ static int gen8_init_common_ring(struct intel_engine_cs *engine)
 	execlists->csb_head = -1;
 	execlists->active = 0;
 
+	execlists->elsp =
+		dev_priv->regs + i915_mmio_reg_offset(RING_ELSP(engine));
+
 	/* After a GPU reset, we may have requests to replay */
 	if (execlists->first)
 		tasklet_schedule(&execlists->tasklet);
@@ -1556,6 +1558,8 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 	struct intel_context *ce;
 	unsigned long flags;
 
+	GEM_TRACE("%s seqno=%x\n",
+		  engine->name, request ? request->global_seqno : 0);
 	spin_lock_irqsave(&engine->timeline->lock, flags);
 
 	/*
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index fc65f5e451dd..c58e5f53bab0 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -32,6 +32,8 @@
 
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
+#include "intel_opregion.h"
 #include "i915_drv.h"
 #include "intel_drv.h"
 
diff --git a/drivers/gpu/drm/i915/intel_opregion.h b/drivers/gpu/drm/i915/intel_opregion.h
new file mode 100644
index 000000000000..e0e437ba9e51
--- /dev/null
+++ b/drivers/gpu/drm/i915/intel_opregion.h
@@ -0,0 +1,106 @@
+/*
+ * Copyright © 2008-2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#ifndef _INTEL_OPREGION_H_
+#define _INTEL_OPREGION_H_
+
+#include <linux/workqueue.h>
+#include <linux/pci.h>
+
+struct drm_i915_private;
+struct intel_encoder;
+
+struct opregion_header;
+struct opregion_acpi;
+struct opregion_swsci;
+struct opregion_asle;
+
+struct intel_opregion {
+	struct opregion_header *header;
+	struct opregion_acpi *acpi;
+	struct opregion_swsci *swsci;
+	u32 swsci_gbda_sub_functions;
+	u32 swsci_sbcb_sub_functions;
+	struct opregion_asle *asle;
+	void *rvda;
+	void *vbt_firmware;
+	const void *vbt;
+	u32 vbt_size;
+	u32 *lid_state;
+	struct work_struct asle_work;
+};
+
+#define OPREGION_SIZE            (8 * 1024)
+
+#ifdef CONFIG_ACPI
+
+int intel_opregion_setup(struct drm_i915_private *dev_priv);
+void intel_opregion_register(struct drm_i915_private *dev_priv);
+void intel_opregion_unregister(struct drm_i915_private *dev_priv);
+void intel_opregion_asle_intr(struct drm_i915_private *dev_priv);
+int intel_opregion_notify_encoder(struct intel_encoder *intel_encoder,
+				  bool enable);
+int intel_opregion_notify_adapter(struct drm_i915_private *dev_priv,
+				  pci_power_t state);
+int intel_opregion_get_panel_type(struct drm_i915_private *dev_priv);
+
+#else /* CONFIG_ACPI*/
+
+static inline int intel_opregion_setup(struct drm_i915_private *dev_priv)
+{
+	return 0;
+}
+
+static inline void intel_opregion_register(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline void intel_opregion_unregister(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline void intel_opregion_asle_intr(struct drm_i915_private *dev_priv)
+{
+}
+
+static inline int
+intel_opregion_notify_encoder(struct intel_encoder *intel_encoder, bool enable)
+{
+	return 0;
+}
+
+static inline int
+intel_opregion_notify_adapter(struct drm_i915_private *dev, pci_power_t state)
+{
+	return 0;
+}
+
+static inline int intel_opregion_get_panel_type(struct drm_i915_private *dev)
+{
+	return -ENODEV;
+}
+
+#endif /* CONFIG_ACPI */
+
+#endif
diff --git a/drivers/gpu/drm/i915/intel_overlay.c b/drivers/gpu/drm/i915/intel_overlay.c
index 1b397b41cb4f..41e9465d44a8 100644
--- a/drivers/gpu/drm/i915/intel_overlay.c
+++ b/drivers/gpu/drm/i915/intel_overlay.c
@@ -219,7 +219,7 @@ intel_overlay_map_regs(struct intel_overlay *overlay)
 	if (OVERLAY_NEEDS_PHYSICAL(dev_priv))
 		regs = (struct overlay_registers __iomem *)overlay->reg_bo->phys_handle->vaddr;
 	else
-		regs = io_mapping_map_wc(&dev_priv->ggtt.mappable,
+		regs = io_mapping_map_wc(&dev_priv->ggtt.iomap,
 					 overlay->flip_addr,
 					 PAGE_SIZE);
 
@@ -1508,7 +1508,7 @@ intel_overlay_map_regs_atomic(struct intel_overlay *overlay)
 		regs = (struct overlay_registers __iomem *)
 			overlay->reg_bo->phys_handle->vaddr;
 	else
-		regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.mappable,
+		regs = io_mapping_map_atomic_wc(&dev_priv->ggtt.iomap,
 						overlay->flip_addr);
 
 	return regs;
diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c
index 67f326230a7e..1db79a860b96 100644
--- a/drivers/gpu/drm/i915/intel_pm.c
+++ b/drivers/gpu/drm/i915/intel_pm.c
@@ -58,7 +58,7 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv)
 	if (HAS_LLC(dev_priv)) {
 		/*
 		 * WaCompressedResourceDisplayNewHashMode:skl,kbl
-		 * Display WA#0390: skl,kbl
+		 * Display WA #0390: skl,kbl
 		 *
 		 * Must match Sampler, Pixel Back End, and Media. See
 		 * WaCompressedResourceSamplerPbeMediaNewHashMode.
@@ -6416,7 +6416,6 @@ static void valleyview_disable_rps(struct drm_i915_private *dev_priv)
 
 static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
 	bool enable_rc6 = true;
 	unsigned long rc6_ctx_base;
 	u32 rc_ctl;
@@ -6441,9 +6440,8 @@ static bool bxt_check_bios_rc6_setup(struct drm_i915_private *dev_priv)
 	 * for this check.
 	 */
 	rc6_ctx_base = I915_READ(RC6_CTX_BASE) & RC6_CTX_BASE_MASK;
-	if (!((rc6_ctx_base >= ggtt->stolen_reserved_base) &&
-	      (rc6_ctx_base + PAGE_SIZE <= ggtt->stolen_reserved_base +
-					ggtt->stolen_reserved_size))) {
+	if (!((rc6_ctx_base >= dev_priv->dsm_reserved.start) &&
+	      (rc6_ctx_base + PAGE_SIZE < dev_priv->dsm_reserved.end))) {
 		DRM_DEBUG_DRIVER("RC6 Base address not as expected.\n");
 		enable_rc6 = false;
 	}
@@ -7020,7 +7018,7 @@ static void valleyview_check_pctx(struct drm_i915_private *dev_priv)
 {
 	unsigned long pctx_addr = I915_READ(VLV_PCBR) & ~4095;
 
-	WARN_ON(pctx_addr != dev_priv->mm.stolen_base +
+	WARN_ON(pctx_addr != dev_priv->dsm.start +
 			     dev_priv->vlv_pctx->stolen->start);
 }
 
@@ -7035,16 +7033,15 @@ static void cherryview_check_pctx(struct drm_i915_private *dev_priv)
 
 static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
 {
-	struct i915_ggtt *ggtt = &dev_priv->ggtt;
-	unsigned long pctx_paddr, paddr;
+	resource_size_t pctx_paddr, paddr;
+	resource_size_t pctx_size = 32*1024;
 	u32 pcbr;
-	int pctx_size = 32*1024;
 
 	pcbr = I915_READ(VLV_PCBR);
 	if ((pcbr >> VLV_PCBR_ADDR_SHIFT) == 0) {
 		DRM_DEBUG_DRIVER("BIOS didn't set up PCBR, fixing up\n");
-		paddr = (dev_priv->mm.stolen_base +
-			 (ggtt->stolen_size - pctx_size));
+		paddr = dev_priv->dsm.end + 1 - pctx_size;
+		GEM_BUG_ON(paddr > U32_MAX);
 
 		pctx_paddr = (paddr & (~4095));
 		I915_WRITE(VLV_PCBR, pctx_paddr);
@@ -7056,16 +7053,16 @@ static void cherryview_setup_pctx(struct drm_i915_private *dev_priv)
 static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
 {
 	struct drm_i915_gem_object *pctx;
-	unsigned long pctx_paddr;
+	resource_size_t pctx_paddr;
+	resource_size_t pctx_size = 24*1024;
 	u32 pcbr;
-	int pctx_size = 24*1024;
 
 	pcbr = I915_READ(VLV_PCBR);
 	if (pcbr) {
 		/* BIOS set it up already, grab the pre-alloc'd space */
-		int pcbr_offset;
+		resource_size_t pcbr_offset;
 
-		pcbr_offset = (pcbr & (~4095)) - dev_priv->mm.stolen_base;
+		pcbr_offset = (pcbr & (~4095)) - dev_priv->dsm.start;
 		pctx = i915_gem_object_create_stolen_for_preallocated(dev_priv,
 								      pcbr_offset,
 								      I915_GTT_OFFSET_NONE,
@@ -7089,7 +7086,11 @@ static void valleyview_setup_pctx(struct drm_i915_private *dev_priv)
 		goto out;
 	}
 
-	pctx_paddr = dev_priv->mm.stolen_base + pctx->stolen->start;
+	GEM_BUG_ON(range_overflows_t(u64,
+				     dev_priv->dsm.start,
+				     pctx->stolen->start,
+				     U32_MAX));
+	pctx_paddr = dev_priv->dsm.start + pctx->stolen->start;
 	I915_WRITE(VLV_PCBR, pctx_paddr);
 
 out:
@@ -8417,7 +8418,7 @@ static void cnp_init_clock_gating(struct drm_i915_private *dev_priv)
 	if (!HAS_PCH_CNP(dev_priv))
 		return;
 
-	/* Wa #1181 */
+	/* Display WA #1181: cnp */
 	I915_WRITE(SOUTH_DSPCLK_GATE_D, I915_READ(SOUTH_DSPCLK_GATE_D) |
 		   CNP_PWM_CGE_GATING_DISABLE);
 }
@@ -8446,6 +8447,11 @@ static void cnl_init_clock_gating(struct drm_i915_private *dev_priv)
 	if (IS_CNL_REVID(dev_priv, CNL_REVID_A0, CNL_REVID_B0))
 		val |= SARBUNIT_CLKGATE_DIS;
 	I915_WRITE(SLICE_UNIT_LEVEL_CLKGATE, val);
+
+	/* WaDisableVFclkgate:cnl */
+	val = I915_READ(UNSLICE_UNIT_LEVEL_CLKGATE);
+	val |= VFUNIT_CLKGATE_DIS;
+	I915_WRITE(UNSLICE_UNIT_LEVEL_CLKGATE, val);
 }
 
 static void cfl_init_clock_gating(struct drm_i915_private *dev_priv)
diff --git a/drivers/gpu/drm/i915/intel_psr.c b/drivers/gpu/drm/i915/intel_psr.c
index a1ad85fa5c1a..2e32615eeada 100644
--- a/drivers/gpu/drm/i915/intel_psr.c
+++ b/drivers/gpu/drm/i915/intel_psr.c
@@ -590,7 +590,7 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
 	struct drm_i915_private *dev_priv = to_i915(dev);
 
 	if (dev_priv->psr.active) {
-		i915_reg_t psr_ctl;
+		i915_reg_t psr_status;
 		u32 psr_status_mask;
 
 		if (dev_priv->psr.aux_frame_sync)
@@ -599,24 +599,24 @@ static void hsw_psr_disable(struct intel_dp *intel_dp,
 					0);
 
 		if (dev_priv->psr.psr2_support) {
-			psr_ctl = EDP_PSR2_CTL;
+			psr_status = EDP_PSR2_STATUS_CTL;
 			psr_status_mask = EDP_PSR2_STATUS_STATE_MASK;
 
-			I915_WRITE(psr_ctl,
-				   I915_READ(psr_ctl) &
+			I915_WRITE(EDP_PSR2_CTL,
+				   I915_READ(EDP_PSR2_CTL) &
 				   ~(EDP_PSR2_ENABLE | EDP_SU_TRACK_ENABLE));
 
 		} else {
-			psr_ctl = EDP_PSR_STATUS_CTL;
+			psr_status = EDP_PSR_STATUS_CTL;
 			psr_status_mask = EDP_PSR_STATUS_STATE_MASK;
 
-			I915_WRITE(psr_ctl,
-				   I915_READ(psr_ctl) & ~EDP_PSR_ENABLE);
+			I915_WRITE(EDP_PSR_CTL,
+				   I915_READ(EDP_PSR_CTL) & ~EDP_PSR_ENABLE);
 		}
 
 		/* Wait till PSR is idle */
 		if (intel_wait_for_register(dev_priv,
-					    psr_ctl, psr_status_mask, 0,
+					    psr_status, psr_status_mask, 0,
 					    2000))
 			DRM_ERROR("Timed out waiting for PSR Idle State\n");
 
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index c68ab3ead83c..c5ff203e42d6 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -200,6 +200,11 @@ struct intel_engine_execlists {
 	bool no_priolist;
 
 	/**
+	 * @elsp: the ExecList Submission Port register
+	 */
+	u32 __iomem *elsp;
+
+	/**
 	 * @port: execlist port states
 	 *
 	 * For each hardware ELSP (ExecList Submission Port) we keep
@@ -1008,7 +1013,10 @@ unsigned int intel_engines_has_context_isolation(struct drm_i915_private *i915);
 
 bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
 
-void intel_engine_dump(struct intel_engine_cs *engine, struct drm_printer *p);
+__printf(3, 4)
+void intel_engine_dump(struct intel_engine_cs *engine,
+		       struct drm_printer *m,
+		       const char *header, ...);
 
 struct intel_engine_cs *
 intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
diff --git a/drivers/gpu/drm/i915/intel_runtime_pm.c b/drivers/gpu/drm/i915/intel_runtime_pm.c
index 8315499452dc..db9d57f39534 100644
--- a/drivers/gpu/drm/i915/intel_runtime_pm.c
+++ b/drivers/gpu/drm/i915/intel_runtime_pm.c
@@ -130,6 +130,8 @@ intel_display_power_domain_str(enum intel_display_power_domain domain)
 		return "INIT";
 	case POWER_DOMAIN_MODESET:
 		return "MODESET";
+	case POWER_DOMAIN_GT_IRQ:
+		return "GT_IRQ";
 	default:
 		MISSING_CASE(domain);
 		return "?";
@@ -1705,6 +1707,7 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define SKL_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	SKL_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
@@ -1723,12 +1726,13 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_AUX_C) |			\
 	BIT_ULL(POWER_DOMAIN_AUDIO) |			\
 	BIT_ULL(POWER_DOMAIN_VGA) |				\
-	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define BXT_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	BXT_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define BXT_DPIO_CMN_A_POWER_DOMAINS (			\
 	BIT_ULL(POWER_DOMAIN_PORT_DDI_A_LANES) |		\
@@ -1785,8 +1789,10 @@ void intel_display_power_put(struct drm_i915_private *dev_priv,
 	BIT_ULL(POWER_DOMAIN_INIT))
 #define GLK_DISPLAY_DC_OFF_POWER_DOMAINS (		\
 	GLK_DISPLAY_POWERWELL_2_POWER_DOMAINS |		\
+	BIT_ULL(POWER_DOMAIN_GT_IRQ) |			\
 	BIT_ULL(POWER_DOMAIN_MODESET) |			\
 	BIT_ULL(POWER_DOMAIN_AUX_A) |			\
+	BIT_ULL(POWER_DOMAIN_GMBUS) |			\
 	BIT_ULL(POWER_DOMAIN_INIT))
 
 #define CNL_DISPLAY_POWERWELL_2_POWER_DOMAINS (		\
diff --git a/drivers/gpu/drm/i915/intel_uc.c b/drivers/gpu/drm/i915/intel_uc.c
index 1e2a30a40ede..907deac6e3fa 100644
--- a/drivers/gpu/drm/i915/intel_uc.c
+++ b/drivers/gpu/drm/i915/intel_uc.c
@@ -24,6 +24,7 @@
 
 #include "intel_uc.h"
 #include "intel_guc_submission.h"
+#include "intel_guc.h"
 #include "i915_drv.h"
 
 /* Reset GuC providing us with fresh state for both GuC and HuC.
@@ -47,55 +48,93 @@ static int __intel_uc_reset_hw(struct drm_i915_private *dev_priv)
 	return ret;
 }
 
-void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
+static int __get_platform_enable_guc(struct drm_i915_private *dev_priv)
 {
-	if (!HAS_GUC(dev_priv)) {
-		if (i915_modparams.enable_guc_loading > 0 ||
-		    i915_modparams.enable_guc_submission > 0)
-			DRM_INFO("Ignoring GuC options, no hardware\n");
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
+	struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
+	int enable_guc = 0;
 
-		i915_modparams.enable_guc_loading = 0;
-		i915_modparams.enable_guc_submission = 0;
-		return;
-	}
+	/* Default is to enable GuC/HuC if we know their firmwares */
+	if (intel_uc_fw_is_selected(guc_fw))
+		enable_guc |= ENABLE_GUC_SUBMISSION;
+	if (intel_uc_fw_is_selected(huc_fw))
+		enable_guc |= ENABLE_GUC_LOAD_HUC;
 
-	/* A negative value means "use platform default" */
-	if (i915_modparams.enable_guc_loading < 0)
-		i915_modparams.enable_guc_loading = HAS_GUC_UCODE(dev_priv);
+	/* Any platform specific fine-tuning can be done here */
+
+	return enable_guc;
+}
 
-	/* Verify firmware version */
-	if (i915_modparams.enable_guc_loading) {
-		if (HAS_HUC_UCODE(dev_priv))
-			intel_huc_select_fw(&dev_priv->huc);
+/**
+ * intel_uc_sanitize_options - sanitize uC related modparam options
+ * @dev_priv: device private
+ *
+ * In case of "enable_guc" option this function will attempt to modify
+ * it only if it was initially set to "auto(-1)". Default value for this
+ * modparam varies between platforms and it is hardcoded in driver code.
+ * Any other modparam value is only monitored against availability of the
+ * related hardware or firmware definitions.
+ */
+void intel_uc_sanitize_options(struct drm_i915_private *dev_priv)
+{
+	struct intel_uc_fw *guc_fw = &dev_priv->guc.fw;
+	struct intel_uc_fw *huc_fw = &dev_priv->huc.fw;
 
-		if (intel_guc_fw_select(&dev_priv->guc))
-			i915_modparams.enable_guc_loading = 0;
+	/* A negative value means "use platform default" */
+	if (i915_modparams.enable_guc < 0)
+		i915_modparams.enable_guc = __get_platform_enable_guc(dev_priv);
+
+	DRM_DEBUG_DRIVER("enable_guc=%d (submission:%s huc:%s)\n",
+			 i915_modparams.enable_guc,
+			 yesno(intel_uc_is_using_guc_submission()),
+			 yesno(intel_uc_is_using_huc()));
+
+	/* Verify GuC firmware availability */
+	if (intel_uc_is_using_guc() && !intel_uc_fw_is_selected(guc_fw)) {
+		DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n",
+			 i915_modparams.enable_guc,
+			 !HAS_GUC(dev_priv) ? "no GuC hardware" :
+					      "no GuC firmware");
 	}
 
-	/* Can't enable guc submission without guc loaded */
-	if (!i915_modparams.enable_guc_loading)
-		i915_modparams.enable_guc_submission = 0;
+	/* Verify HuC firmware availability */
+	if (intel_uc_is_using_huc() && !intel_uc_fw_is_selected(huc_fw)) {
+		DRM_WARN("Incompatible option detected: enable_guc=%d, %s!\n",
+			 i915_modparams.enable_guc,
+			 !HAS_HUC(dev_priv) ? "no HuC hardware" :
+					      "no HuC firmware");
+	}
 
-	/* A negative value means "use platform default" */
-	if (i915_modparams.enable_guc_submission < 0)
-		i915_modparams.enable_guc_submission = HAS_GUC_SCHED(dev_priv);
+	/* Make sure that sanitization was done */
+	GEM_BUG_ON(i915_modparams.enable_guc < 0);
 }
 
 void intel_uc_init_early(struct drm_i915_private *dev_priv)
 {
 	intel_guc_init_early(&dev_priv->guc);
+	intel_huc_init_early(&dev_priv->huc);
 }
 
 void intel_uc_init_fw(struct drm_i915_private *dev_priv)
 {
-	intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
+	if (!USES_GUC(dev_priv))
+		return;
+
+	if (USES_HUC(dev_priv))
+		intel_uc_fw_fetch(dev_priv, &dev_priv->huc.fw);
+
 	intel_uc_fw_fetch(dev_priv, &dev_priv->guc.fw);
 }
 
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv)
 {
+	if (!USES_GUC(dev_priv))
+		return;
+
 	intel_uc_fw_fini(&dev_priv->guc.fw);
-	intel_uc_fw_fini(&dev_priv->huc.fw);
+
+	if (USES_HUC(dev_priv))
+		intel_uc_fw_fini(&dev_priv->huc.fw);
 }
 
 /**
@@ -149,30 +188,91 @@ static void guc_disable_communication(struct intel_guc *guc)
 	guc->send = intel_guc_send_nop;
 }
 
-int intel_uc_init_hw(struct drm_i915_private *dev_priv)
+int intel_uc_init_wq(struct drm_i915_private *dev_priv)
+{
+	int ret;
+
+	if (!USES_GUC(dev_priv))
+		return 0;
+
+	ret = intel_guc_init_wq(&dev_priv->guc);
+	if (ret) {
+		DRM_ERROR("Couldn't allocate workqueues for GuC\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv)
+{
+	if (!USES_GUC(dev_priv))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	intel_guc_fini_wq(&dev_priv->guc);
+}
+
+int intel_uc_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_guc *guc = &dev_priv->guc;
-	int ret, attempts;
+	int ret;
 
-	if (!i915_modparams.enable_guc_loading)
+	if (!USES_GUC(dev_priv))
 		return 0;
 
-	guc_disable_communication(guc);
-	gen9_reset_guc_interrupts(dev_priv);
+	if (!HAS_GUC(dev_priv))
+		return -ENODEV;
 
-	/* We need to notify the guc whenever we change the GGTT */
-	i915_ggtt_enable_guc(dev_priv);
+	ret = intel_guc_init(guc);
+	if (ret)
+		return ret;
 
-	if (i915_modparams.enable_guc_submission) {
+	if (USES_GUC_SUBMISSION(dev_priv)) {
 		/*
 		 * This is stuff we need to have available at fw load time
 		 * if we are planning to enable submission later
 		 */
 		ret = intel_guc_submission_init(guc);
-		if (ret)
-			goto err_guc;
+		if (ret) {
+			intel_guc_fini(guc);
+			return ret;
+		}
 	}
 
+	return 0;
+}
+
+void intel_uc_fini(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+
+	if (!USES_GUC(dev_priv))
+		return;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	if (USES_GUC_SUBMISSION(dev_priv))
+		intel_guc_submission_fini(guc);
+
+	intel_guc_fini(guc);
+}
+
+int intel_uc_init_hw(struct drm_i915_private *dev_priv)
+{
+	struct intel_guc *guc = &dev_priv->guc;
+	struct intel_huc *huc = &dev_priv->huc;
+	int ret, attempts;
+
+	if (!USES_GUC(dev_priv))
+		return 0;
+
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	guc_disable_communication(guc);
+	gen9_reset_guc_interrupts(dev_priv);
+
 	/* init WOPCM */
 	I915_WRITE(GUC_WOPCM_SIZE, intel_guc_wopcm_size(dev_priv));
 	I915_WRITE(DMA_GUC_WOPCM_OFFSET,
@@ -192,9 +292,14 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 		 */
 		ret = __intel_uc_reset_hw(dev_priv);
 		if (ret)
-			goto err_submission;
+			goto err_out;
+
+		if (USES_HUC(dev_priv)) {
+			ret = intel_huc_init_hw(huc);
+			if (ret)
+				goto err_out;
+		}
 
-		intel_huc_init_hw(&dev_priv->huc);
 		intel_guc_init_params(guc);
 		ret = intel_guc_fw_upload(guc);
 		if (ret == 0 || ret != -EAGAIN)
@@ -212,8 +317,13 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 	if (ret)
 		goto err_log_capture;
 
-	intel_huc_auth(&dev_priv->huc);
-	if (i915_modparams.enable_guc_submission) {
+	if (USES_HUC(dev_priv)) {
+		ret = intel_huc_auth(huc);
+		if (ret)
+			goto err_communication;
+	}
+
+	if (USES_GUC_SUBMISSION(dev_priv)) {
 		if (i915_modparams.guc_log_level >= 0)
 			gen9_enable_guc_interrupts(dev_priv);
 
@@ -222,50 +332,33 @@ int intel_uc_init_hw(struct drm_i915_private *dev_priv)
 			goto err_interrupts;
 	}
 
-	dev_info(dev_priv->drm.dev, "GuC %s (firmware %s [version %u.%u])\n",
-		 i915_modparams.enable_guc_submission ? "submission enabled" :
-							"loaded",
-		 guc->fw.path,
+	dev_info(dev_priv->drm.dev, "GuC firmware version %u.%u\n",
 		 guc->fw.major_ver_found, guc->fw.minor_ver_found);
+	dev_info(dev_priv->drm.dev, "GuC submission %s\n",
+		 enableddisabled(USES_GUC_SUBMISSION(dev_priv)));
+	dev_info(dev_priv->drm.dev, "HuC %s\n",
+		 enableddisabled(USES_HUC(dev_priv)));
 
 	return 0;
 
 	/*
 	 * We've failed to load the firmware :(
-	 *
-	 * Decide whether to disable GuC submission and fall back to
-	 * execlist mode, and whether to hide the error by returning
-	 * zero or to return -EIO, which the caller will treat as a
-	 * nonfatal error (i.e. it doesn't prevent driver load, but
-	 * marks the GPU as wedged until reset).
 	 */
 err_interrupts:
-	guc_disable_communication(guc);
 	gen9_disable_guc_interrupts(dev_priv);
+err_communication:
+	guc_disable_communication(guc);
 err_log_capture:
 	guc_capture_load_err_log(guc);
-err_submission:
-	if (i915_modparams.enable_guc_submission)
-		intel_guc_submission_fini(guc);
-err_guc:
-	i915_ggtt_disable_guc(dev_priv);
-
-	if (i915_modparams.enable_guc_loading > 1 ||
-	    i915_modparams.enable_guc_submission > 1) {
-		DRM_ERROR("GuC init failed. Firmware loading disabled.\n");
-		ret = -EIO;
-	} else {
-		DRM_NOTE("GuC init failed. Firmware loading disabled.\n");
-		ret = 0;
-	}
-
-	if (i915_modparams.enable_guc_submission) {
-		i915_modparams.enable_guc_submission = 0;
-		DRM_NOTE("Falling back from GuC submission to execlist mode\n");
-	}
-
-	i915_modparams.enable_guc_loading = 0;
+err_out:
+	/*
+	 * Note that there is no fallback as either user explicitly asked for
+	 * the GuC or driver default option was to run with the GuC enabled.
+	 */
+	if (GEM_WARN_ON(ret == -EIO))
+		ret = -EINVAL;
 
+	dev_err(dev_priv->drm.dev, "GuC initialization failed %d\n", ret);
 	return ret;
 }
 
@@ -275,18 +368,16 @@ void intel_uc_fini_hw(struct drm_i915_private *dev_priv)
 
 	guc_free_load_err_log(guc);
 
-	if (!i915_modparams.enable_guc_loading)
+	if (!USES_GUC(dev_priv))
 		return;
 
-	if (i915_modparams.enable_guc_submission)
+	GEM_BUG_ON(!HAS_GUC(dev_priv));
+
+	if (USES_GUC_SUBMISSION(dev_priv))
 		intel_guc_submission_disable(guc);
 
 	guc_disable_communication(guc);
 
-	if (i915_modparams.enable_guc_submission) {
+	if (USES_GUC_SUBMISSION(dev_priv))
 		gen9_disable_guc_interrupts(dev_priv);
-		intel_guc_submission_fini(guc);
-	}
-
-	i915_ggtt_disable_guc(dev_priv);
 }
diff --git a/drivers/gpu/drm/i915/intel_uc.h b/drivers/gpu/drm/i915/intel_uc.h
index e18d3bb02088..8a7249722ef1 100644
--- a/drivers/gpu/drm/i915/intel_uc.h
+++ b/drivers/gpu/drm/i915/intel_uc.h
@@ -26,13 +26,36 @@
 
 #include "intel_guc.h"
 #include "intel_huc.h"
+#include "i915_params.h"
 
 void intel_uc_sanitize_options(struct drm_i915_private *dev_priv);
 void intel_uc_init_early(struct drm_i915_private *dev_priv);
 void intel_uc_init_mmio(struct drm_i915_private *dev_priv);
 void intel_uc_init_fw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_fw(struct drm_i915_private *dev_priv);
+int intel_uc_init_wq(struct drm_i915_private *dev_priv);
+void intel_uc_fini_wq(struct drm_i915_private *dev_priv);
 int intel_uc_init_hw(struct drm_i915_private *dev_priv);
 void intel_uc_fini_hw(struct drm_i915_private *dev_priv);
+int intel_uc_init(struct drm_i915_private *dev_priv);
+void intel_uc_fini(struct drm_i915_private *dev_priv);
+
+static inline bool intel_uc_is_using_guc(void)
+{
+	GEM_BUG_ON(i915_modparams.enable_guc < 0);
+	return i915_modparams.enable_guc > 0;
+}
+
+static inline bool intel_uc_is_using_guc_submission(void)
+{
+	GEM_BUG_ON(i915_modparams.enable_guc < 0);
+	return i915_modparams.enable_guc & ENABLE_GUC_SUBMISSION;
+}
+
+static inline bool intel_uc_is_using_huc(void)
+{
+	GEM_BUG_ON(i915_modparams.enable_guc < 0);
+	return i915_modparams.enable_guc & ENABLE_GUC_LOAD_HUC;
+}
 
 #endif
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.c b/drivers/gpu/drm/i915/intel_uc_fw.c
index b376dd3b28cc..784eff9cdfc8 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.c
+++ b/drivers/gpu/drm/i915/intel_uc_fw.c
@@ -214,7 +214,7 @@ int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
 			 intel_uc_fw_type_repr(uc_fw->type), uc_fw->path);
 
 	if (uc_fw->fetch_status != INTEL_UC_FIRMWARE_SUCCESS)
-		return -EIO;
+		return -ENOEXEC;
 
 	uc_fw->load_status = INTEL_UC_FIRMWARE_PENDING;
 	DRM_DEBUG_DRIVER("%s fw load %s\n",
diff --git a/drivers/gpu/drm/i915/intel_uc_fw.h b/drivers/gpu/drm/i915/intel_uc_fw.h
index 5394d9d1e683..d5fd4609c785 100644
--- a/drivers/gpu/drm/i915/intel_uc_fw.h
+++ b/drivers/gpu/drm/i915/intel_uc_fw.h
@@ -110,6 +110,11 @@ void intel_uc_fw_init(struct intel_uc_fw *uc_fw, enum intel_uc_fw_type type)
 	uc_fw->type = type;
 }
 
+static inline bool intel_uc_fw_is_selected(struct intel_uc_fw *uc_fw)
+{
+	return uc_fw->path != NULL;
+}
+
 void intel_uc_fw_fetch(struct drm_i915_private *dev_priv,
 		       struct intel_uc_fw *uc_fw);
 int intel_uc_fw_upload(struct intel_uc_fw *uc_fw,
diff --git a/drivers/gpu/drm/i915/selftests/huge_pages.c b/drivers/gpu/drm/i915/selftests/huge_pages.c
index e6b31041cc88..2ea69394f428 100644
--- a/drivers/gpu/drm/i915/selftests/huge_pages.c
+++ b/drivers/gpu/drm/i915/selftests/huge_pages.c
@@ -1637,7 +1637,7 @@ static int igt_shrink_thp(void *arg)
 	 * shmem to truncate our pages.
 	 */
 	i915_gem_shrink_all(i915);
-	if (!IS_ERR_OR_NULL(obj->mm.pages)) {
+	if (i915_gem_object_has_pages(obj)) {
 		pr_err("shrink-all didn't truncate the pages\n");
 		err = -EINVAL;
 		goto out_close;
diff --git a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
index 6491cf0a4f46..4a28d713a7d8 100644
--- a/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/i915_gem_gtt.c
@@ -1074,7 +1074,7 @@ static int igt_ggtt_page(void *arg)
 				       i915_gem_object_get_dma_address(obj, 0),
 				       offset, I915_CACHE_NONE, 0);
 
-		vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset);
+		vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset);
 		iowrite32(n, vaddr + n);
 		io_mapping_unmap_atomic(vaddr);
 
@@ -1092,7 +1092,7 @@ static int igt_ggtt_page(void *arg)
 				       i915_gem_object_get_dma_address(obj, 0),
 				       offset, I915_CACHE_NONE, 0);
 
-		vaddr = io_mapping_map_atomic_wc(&ggtt->mappable, offset);
+		vaddr = io_mapping_map_atomic_wc(&ggtt->iomap, offset);
 		val = ioread32(vaddr + n);
 		io_mapping_unmap_atomic(vaddr);
 
diff --git a/drivers/gpu/drm/i915/selftests/intel_guc.c b/drivers/gpu/drm/i915/selftests/intel_guc.c
index 7b23597858bb..3f9016466dea 100644
--- a/drivers/gpu/drm/i915/selftests/intel_guc.c
+++ b/drivers/gpu/drm/i915/selftests/intel_guc.c
@@ -85,21 +85,26 @@ static int validate_client(struct intel_guc_client *client,
 		return 0;
 }
 
+static bool client_doorbell_in_sync(struct intel_guc_client *client)
+{
+	return doorbell_ok(client->guc, client->doorbell_id);
+}
+
 /*
- * Check that guc_init_doorbell_hw is doing what it should.
+ * Check that we're able to synchronize guc_clients with their doorbells
  *
- * During GuC submission enable, we create GuC clients and their doorbells,
- * but after resetting the microcontroller (resume & gpu reset), these
- * GuC clients are still around, but the status of their doorbells may be
- * incorrect. This is the reason behind validating that the doorbells status
- * expected by the driver matches what the GuC/HW have.
+ * We're creating clients and reserving doorbells once, at module load. During
+ * module lifetime, GuC, doorbell HW, and i915 state may go out of sync due to
+ * GuC being reset. In other words - GuC clients are still around, but the
+ * status of their doorbells may be incorrect. This is the reason behind
+ * validating that the doorbells status expected by the driver matches what the
+ * GuC/HW have.
  */
-static int igt_guc_init_doorbell_hw(void *args)
+static int igt_guc_clients(void *args)
 {
 	struct drm_i915_private *dev_priv = args;
 	struct intel_guc *guc;
-	DECLARE_BITMAP(db_bitmap_bk, GUC_NUM_DOORBELLS);
-	int i, err = 0;
+	int err = 0;
 
 	GEM_BUG_ON(!HAS_GUC(dev_priv));
 	mutex_lock(&dev_priv->drm.struct_mutex);
@@ -148,10 +153,21 @@ static int igt_guc_init_doorbell_hw(void *args)
 		goto out;
 	}
 
-	/* each client should have received a doorbell during alloc */
+	/* each client should now have reserved a doorbell */
 	if (!has_doorbell(guc->execbuf_client) ||
 	    !has_doorbell(guc->preempt_client)) {
-		pr_err("guc_clients_create didn't create doorbells\n");
+		pr_err("guc_clients_create didn't reserve doorbells\n");
+		err = -EINVAL;
+		goto out;
+	}
+
+	/* Now create the doorbells */
+	guc_clients_doorbell_init(guc);
+
+	/* each client should now have received a doorbell */
+	if (!client_doorbell_in_sync(guc->execbuf_client) ||
+	    !client_doorbell_in_sync(guc->preempt_client)) {
+		pr_err("failed to initialize the doorbells\n");
 		err = -EINVAL;
 		goto out;
 	}
@@ -160,25 +176,26 @@ static int igt_guc_init_doorbell_hw(void *args)
 	 * Basic test - an attempt to reallocate a valid doorbell to the
 	 * client it is currently assigned should not cause a failure.
 	 */
-	err = guc_init_doorbell_hw(guc);
+	err = guc_clients_doorbell_init(guc);
 	if (err)
 		goto out;
 
 	/*
 	 * Negative test - a client with no doorbell (invalid db id).
-	 * Each client gets a doorbell when it is created, after destroying
-	 * the doorbell, the db id is changed to GUC_DOORBELL_INVALID and the
-	 * firmware will reject any attempt to allocate a doorbell with an
-	 * invalid id (db has to be reserved before allocation).
+	 * After destroying the doorbell, the db id is changed to
+	 * GUC_DOORBELL_INVALID and the firmware will reject any attempt to
+	 * allocate a doorbell with an invalid id (db has to be reserved before
+	 * allocation).
 	 */
 	destroy_doorbell(guc->execbuf_client);
-	if (has_doorbell(guc->execbuf_client)) {
+	if (client_doorbell_in_sync(guc->execbuf_client)) {
 		pr_err("destroy db did not work\n");
 		err = -EINVAL;
 		goto out;
 	}
 
-	err = guc_init_doorbell_hw(guc);
+	unreserve_doorbell(guc->execbuf_client);
+	err = guc_clients_doorbell_init(guc);
 	if (err != -EIO) {
 		pr_err("unexpected (err = %d)", err);
 		goto out;
@@ -191,33 +208,13 @@ static int igt_guc_init_doorbell_hw(void *args)
 	}
 
 	/* clean after test */
-	err = create_doorbell(guc->execbuf_client);
-	if (err) {
-		pr_err("recreate doorbell failed\n");
-		goto out;
-	}
-
-	/*
-	 * Negative test - doorbell_bitmap out of sync, will trigger a few of
-	 * WARN_ON(!doorbell_ok(guc, db_id)) but that's ok as long as the
-	 * doorbells from our clients don't fail.
-	 */
-	bitmap_copy(db_bitmap_bk, guc->doorbell_bitmap, GUC_NUM_DOORBELLS);
-	for (i = 0; i < GUC_NUM_DOORBELLS; i++)
-		if (i % 2)
-			test_and_change_bit(i, guc->doorbell_bitmap);
-
-	err = guc_init_doorbell_hw(guc);
+	err = reserve_doorbell(guc->execbuf_client);
 	if (err) {
-		pr_err("out of sync doorbell caused an error\n");
-		goto out;
+		pr_err("failed to reserve back the doorbell back\n");
 	}
-
-	/* restore 'correct' db bitmap */
-	bitmap_copy(guc->doorbell_bitmap, db_bitmap_bk, GUC_NUM_DOORBELLS);
-	err = guc_init_doorbell_hw(guc);
+	err = create_doorbell(guc->execbuf_client);
 	if (err) {
-		pr_err("restored doorbell caused an error\n");
+		pr_err("recreate doorbell failed\n");
 		goto out;
 	}
 
@@ -226,8 +223,11 @@ out:
 	 * Leave clean state for other test, plus the driver always destroy the
 	 * clients during unload.
 	 */
+	destroy_doorbell(guc->execbuf_client);
+	destroy_doorbell(guc->preempt_client);
 	guc_clients_destroy(guc);
 	guc_clients_create(guc);
+	guc_clients_doorbell_init(guc);
 unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
@@ -309,25 +309,7 @@ static int igt_guc_doorbells(void *arg)
 
 		db_id = clients[i]->doorbell_id;
 
-		/*
-		 * Client alloc gives us a doorbell, but we want to exercise
-		 * this ourselves (this resembles guc_init_doorbell_hw)
-		 */
-		destroy_doorbell(clients[i]);
-		if (clients[i]->doorbell_id != GUC_DOORBELL_INVALID) {
-			pr_err("[%d] destroy db did not work!\n", i);
-			err = -EINVAL;
-			goto out;
-		}
-
-		err = __reserve_doorbell(clients[i]);
-		if (err) {
-			pr_err("[%d] Failed to reserve a doorbell\n", i);
-			goto out;
-		}
-
-		__update_doorbell_desc(clients[i], clients[i]->doorbell_id);
-		err = __create_doorbell(clients[i]);
+		err = create_doorbell(clients[i]);
 		if (err) {
 			pr_err("[%d] Failed to create a doorbell\n", i);
 			goto out;
@@ -348,8 +330,10 @@ static int igt_guc_doorbells(void *arg)
 
 out:
 	for (i = 0; i < ATTEMPTS; i++)
-		if (!IS_ERR_OR_NULL(clients[i]))
+		if (!IS_ERR_OR_NULL(clients[i])) {
+			destroy_doorbell(clients[i]);
 			guc_client_free(clients[i]);
+		}
 unlock:
 	mutex_unlock(&dev_priv->drm.struct_mutex);
 	return err;
@@ -358,11 +342,11 @@ unlock:
 int intel_guc_live_selftest(struct drm_i915_private *dev_priv)
 {
 	static const struct i915_subtest tests[] = {
-		SUBTEST(igt_guc_init_doorbell_hw),
+		SUBTEST(igt_guc_clients),
 		SUBTEST(igt_guc_doorbells),
 	};
 
-	if (!i915_modparams.enable_guc_submission)
+	if (!USES_GUC_SUBMISSION(dev_priv))
 		return 0;
 
 	return i915_subtests(tests, dev_priv);
diff --git a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
index 1bbb8c46e2d9..d1f91a533afa 100644
--- a/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
+++ b/drivers/gpu/drm/i915/selftests/intel_hangcheck.c
@@ -132,6 +132,12 @@ static int emit_recurse_batch(struct hang *h,
 		*batch++ = lower_32_bits(hws_address(hws, rq));
 		*batch++ = upper_32_bits(hws_address(hws, rq));
 		*batch++ = rq->fence.seqno;
+		*batch++ = MI_ARB_CHECK;
+
+		memset(batch, 0, 1024);
+		batch += 1024 / sizeof(*batch);
+
+		*batch++ = MI_ARB_CHECK;
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8 | 1;
 		*batch++ = lower_32_bits(vma->node.start);
 		*batch++ = upper_32_bits(vma->node.start);
@@ -140,6 +146,12 @@ static int emit_recurse_batch(struct hang *h,
 		*batch++ = 0;
 		*batch++ = lower_32_bits(hws_address(hws, rq));
 		*batch++ = rq->fence.seqno;
+		*batch++ = MI_ARB_CHECK;
+
+		memset(batch, 0, 1024);
+		batch += 1024 / sizeof(*batch);
+
+		*batch++ = MI_ARB_CHECK;
 		*batch++ = MI_BATCH_BUFFER_START | 1 << 8;
 		*batch++ = lower_32_bits(vma->node.start);
 	} else if (INTEL_GEN(i915) >= 4) {
@@ -147,12 +159,24 @@ static int emit_recurse_batch(struct hang *h,
 		*batch++ = 0;
 		*batch++ = lower_32_bits(hws_address(hws, rq));
 		*batch++ = rq->fence.seqno;
+		*batch++ = MI_ARB_CHECK;
+
+		memset(batch, 0, 1024);
+		batch += 1024 / sizeof(*batch);
+
+		*batch++ = MI_ARB_CHECK;
 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6;
 		*batch++ = lower_32_bits(vma->node.start);
 	} else {
 		*batch++ = MI_STORE_DWORD_IMM;
 		*batch++ = lower_32_bits(hws_address(hws, rq));
 		*batch++ = rq->fence.seqno;
+		*batch++ = MI_ARB_CHECK;
+
+		memset(batch, 0, 1024);
+		batch += 1024 / sizeof(*batch);
+
+		*batch++ = MI_ARB_CHECK;
 		*batch++ = MI_BATCH_BUFFER_START | 2 << 6 | 1;
 		*batch++ = lower_32_bits(vma->node.start);
 	}
@@ -234,6 +258,16 @@ static void hang_fini(struct hang *h)
 	i915_gem_wait_for_idle(h->i915, I915_WAIT_LOCKED);
 }
 
+static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
+{
+	return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
+					       rq->fence.seqno),
+			     10) &&
+		 wait_for(i915_seqno_passed(hws_seqno(h, rq),
+					    rq->fence.seqno),
+			  1000));
+}
+
 static int igt_hang_sanitycheck(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -296,6 +330,9 @@ static void global_reset_lock(struct drm_i915_private *i915)
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
 
+	pr_debug("%s: current gpu_error=%08lx\n",
+		 __func__, i915->gpu_error.flags);
+
 	while (test_and_set_bit(I915_RESET_BACKOFF, &i915->gpu_error.flags))
 		wait_event(i915->gpu_error.reset_queue,
 			   !test_bit(I915_RESET_BACKOFF,
@@ -353,54 +390,128 @@ static int igt_global_reset(void *arg)
 	return err;
 }
 
-static int igt_reset_engine(void *arg)
+static int __igt_reset_engine(struct drm_i915_private *i915, bool active)
 {
-	struct drm_i915_private *i915 = arg;
 	struct intel_engine_cs *engine;
 	enum intel_engine_id id;
-	unsigned int reset_count, reset_engine_count;
+	struct hang h;
 	int err = 0;
 
-	/* Check that we can issue a global GPU and engine reset */
+	/* Check that we can issue an engine reset on an idle engine (no-op) */
 
 	if (!intel_has_reset_engine(i915))
 		return 0;
 
+	if (active) {
+		mutex_lock(&i915->drm.struct_mutex);
+		err = hang_init(&h, i915);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			return err;
+	}
+
 	for_each_engine(engine, i915, id) {
-		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
+		unsigned int reset_count, reset_engine_count;
+		IGT_TIMEOUT(end_time);
+
+		if (active && !intel_engine_can_store_dword(engine))
+			continue;
+
 		reset_count = i915_reset_count(&i915->gpu_error);
 		reset_engine_count = i915_reset_engine_count(&i915->gpu_error,
 							     engine);
 
-		err = i915_reset_engine(engine, I915_RESET_QUIET);
-		if (err) {
-			pr_err("i915_reset_engine failed\n");
-			break;
-		}
+		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		do {
+			if (active) {
+				struct drm_i915_gem_request *rq;
+
+				mutex_lock(&i915->drm.struct_mutex);
+				rq = hang_create_request(&h, engine,
+							 i915->kernel_context);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					mutex_unlock(&i915->drm.struct_mutex);
+					break;
+				}
+
+				i915_gem_request_get(rq);
+				__i915_add_request(rq, true);
+				mutex_unlock(&i915->drm.struct_mutex);
+
+				if (!wait_for_hang(&h, rq)) {
+					struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+					pr_err("%s: Failed to start request %x, at %x\n",
+					       __func__, rq->fence.seqno, hws_seqno(&h, rq));
+					intel_engine_dump(engine, &p,
+							  "%s\n", engine->name);
+
+					i915_gem_request_put(rq);
+					err = -EIO;
+					break;
+				}
 
-		if (i915_reset_count(&i915->gpu_error) != reset_count) {
-			pr_err("Full GPU reset recorded! (engine reset expected)\n");
-			err = -EINVAL;
-			break;
-		}
+				i915_gem_request_put(rq);
+			}
+
+			engine->hangcheck.stalled = true;
+			engine->hangcheck.seqno =
+				intel_engine_get_seqno(engine);
+
+			err = i915_reset_engine(engine, I915_RESET_QUIET);
+			if (err) {
+				pr_err("i915_reset_engine failed\n");
+				break;
+			}
+
+			if (i915_reset_count(&i915->gpu_error) != reset_count) {
+				pr_err("Full GPU reset recorded! (engine reset expected)\n");
+				err = -EINVAL;
+				break;
+			}
+
+			reset_engine_count += active;
+			if (i915_reset_engine_count(&i915->gpu_error, engine) !=
+			    reset_engine_count) {
+				pr_err("%s engine reset %srecorded!\n",
+				       engine->name, active ? "not " : "");
+				err = -EINVAL;
+				break;
+			}
+
+			engine->hangcheck.stalled = false;
+		} while (time_before(jiffies, end_time));
+		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 
-		if (i915_reset_engine_count(&i915->gpu_error, engine) ==
-		    reset_engine_count) {
-			pr_err("No %s engine reset recorded!\n", engine->name);
-			err = -EINVAL;
+		if (err)
 			break;
-		}
 
-		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &i915->gpu_error.flags);
+		cond_resched();
 	}
 
 	if (i915_terminally_wedged(&i915->gpu_error))
 		err = -EIO;
 
+	if (active) {
+		mutex_lock(&i915->drm.struct_mutex);
+		hang_fini(&h);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
 	return err;
 }
 
+static int igt_reset_idle_engine(void *arg)
+{
+	return __igt_reset_engine(arg, false);
+}
+
+static int igt_reset_active_engine(void *arg)
+{
+	return __igt_reset_engine(arg, true);
+}
+
 static int active_engine(void *data)
 {
 	struct intel_engine_cs *engine = data;
@@ -462,11 +573,12 @@ err_file:
 	return err;
 }
 
-static int igt_reset_active_engines(void *arg)
+static int __igt_reset_engine_others(struct drm_i915_private *i915,
+				     bool active)
 {
-	struct drm_i915_private *i915 = arg;
-	struct intel_engine_cs *engine, *active;
+	struct intel_engine_cs *engine, *other;
 	enum intel_engine_id id, tmp;
+	struct hang h;
 	int err = 0;
 
 	/* Check that issuing a reset on one engine does not interfere
@@ -476,24 +588,36 @@ static int igt_reset_active_engines(void *arg)
 	if (!intel_has_reset_engine(i915))
 		return 0;
 
+	if (active) {
+		mutex_lock(&i915->drm.struct_mutex);
+		err = hang_init(&h, i915);
+		mutex_unlock(&i915->drm.struct_mutex);
+		if (err)
+			return err;
+	}
+
 	for_each_engine(engine, i915, id) {
-		struct task_struct *threads[I915_NUM_ENGINES];
+		struct task_struct *threads[I915_NUM_ENGINES] = {};
 		unsigned long resets[I915_NUM_ENGINES];
 		unsigned long global = i915_reset_count(&i915->gpu_error);
+		unsigned long count = 0;
 		IGT_TIMEOUT(end_time);
 
+		if (active && !intel_engine_can_store_dword(engine))
+			continue;
+
 		memset(threads, 0, sizeof(threads));
-		for_each_engine(active, i915, tmp) {
+		for_each_engine(other, i915, tmp) {
 			struct task_struct *tsk;
 
-			if (active == engine)
-				continue;
-
 			resets[tmp] = i915_reset_engine_count(&i915->gpu_error,
-							      active);
+							      other);
 
-			tsk = kthread_run(active_engine, active,
-					  "igt/%s", active->name);
+			if (other == engine)
+				continue;
+
+			tsk = kthread_run(active_engine, other,
+					  "igt/%s", other->name);
 			if (IS_ERR(tsk)) {
 				err = PTR_ERR(tsk);
 				goto unwind;
@@ -503,20 +627,70 @@ static int igt_reset_active_engines(void *arg)
 			get_task_struct(tsk);
 		}
 
-		set_bit(I915_RESET_ENGINE + engine->id, &i915->gpu_error.flags);
+		set_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
 		do {
+			if (active) {
+				struct drm_i915_gem_request *rq;
+
+				mutex_lock(&i915->drm.struct_mutex);
+				rq = hang_create_request(&h, engine,
+							 i915->kernel_context);
+				if (IS_ERR(rq)) {
+					err = PTR_ERR(rq);
+					mutex_unlock(&i915->drm.struct_mutex);
+					break;
+				}
+
+				i915_gem_request_get(rq);
+				__i915_add_request(rq, true);
+				mutex_unlock(&i915->drm.struct_mutex);
+
+				if (!wait_for_hang(&h, rq)) {
+					struct drm_printer p = drm_info_printer(i915->drm.dev);
+
+					pr_err("%s: Failed to start request %x, at %x\n",
+					       __func__, rq->fence.seqno, hws_seqno(&h, rq));
+					intel_engine_dump(engine, &p,
+							  "%s\n", engine->name);
+
+					i915_gem_request_put(rq);
+					err = -EIO;
+					break;
+				}
+
+				i915_gem_request_put(rq);
+			}
+
+			engine->hangcheck.stalled = true;
+			engine->hangcheck.seqno =
+				intel_engine_get_seqno(engine);
+
 			err = i915_reset_engine(engine, I915_RESET_QUIET);
 			if (err) {
-				pr_err("i915_reset_engine(%s) failed, err=%d\n",
-				       engine->name, err);
+				pr_err("i915_reset_engine(%s:%s) failed, err=%d\n",
+				       engine->name, active ? "active" : "idle", err);
 				break;
 			}
+
+			engine->hangcheck.stalled = false;
+			count++;
 		} while (time_before(jiffies, end_time));
-		clear_bit(I915_RESET_ENGINE + engine->id,
-			  &i915->gpu_error.flags);
+		clear_bit(I915_RESET_ENGINE + id, &i915->gpu_error.flags);
+		pr_info("i915_reset_engine(%s:%s): %lu resets\n",
+			engine->name, active ? "active" : "idle", count);
+
+		if (i915_reset_engine_count(&i915->gpu_error, engine) -
+		    resets[engine->id] != (active ? count : 0)) {
+			pr_err("i915_reset_engine(%s:%s): reset %lu times, but reported %lu\n",
+			       engine->name, active ? "active" : "idle", count,
+			       i915_reset_engine_count(&i915->gpu_error,
+						       engine) - resets[engine->id]);
+			if (!err)
+				err = -EINVAL;
+		}
 
 unwind:
-		for_each_engine(active, i915, tmp) {
+		for_each_engine(other, i915, tmp) {
 			int ret;
 
 			if (!threads[tmp])
@@ -524,27 +698,29 @@ unwind:
 
 			ret = kthread_stop(threads[tmp]);
 			if (ret) {
-				pr_err("kthread for active engine %s failed, err=%d\n",
-				       active->name, ret);
+				pr_err("kthread for other engine %s failed, err=%d\n",
+				       other->name, ret);
 				if (!err)
 					err = ret;
 			}
 			put_task_struct(threads[tmp]);
 
 			if (resets[tmp] != i915_reset_engine_count(&i915->gpu_error,
-								   active)) {
+								   other)) {
 				pr_err("Innocent engine %s was reset (count=%ld)\n",
-				       active->name,
+				       other->name,
 				       i915_reset_engine_count(&i915->gpu_error,
-							       active) - resets[tmp]);
-				err = -EIO;
+							       other) - resets[tmp]);
+				if (!err)
+					err = -EINVAL;
 			}
 		}
 
 		if (global != i915_reset_count(&i915->gpu_error)) {
 			pr_err("Global reset (count=%ld)!\n",
 			       i915_reset_count(&i915->gpu_error) - global);
-			err = -EIO;
+			if (!err)
+				err = -EINVAL;
 		}
 
 		if (err)
@@ -556,9 +732,25 @@ unwind:
 	if (i915_terminally_wedged(&i915->gpu_error))
 		err = -EIO;
 
+	if (active) {
+		mutex_lock(&i915->drm.struct_mutex);
+		hang_fini(&h);
+		mutex_unlock(&i915->drm.struct_mutex);
+	}
+
 	return err;
 }
 
+static int igt_reset_idle_engine_others(void *arg)
+{
+	return __igt_reset_engine_others(arg, false);
+}
+
+static int igt_reset_active_engine_others(void *arg)
+{
+	return __igt_reset_engine_others(arg, true);
+}
+
 static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
 {
 	u32 reset_count;
@@ -574,16 +766,6 @@ static u32 fake_hangcheck(struct drm_i915_gem_request *rq)
 	return reset_count;
 }
 
-static bool wait_for_hang(struct hang *h, struct drm_i915_gem_request *rq)
-{
-	return !(wait_for_us(i915_seqno_passed(hws_seqno(h, rq),
-					       rq->fence.seqno),
-			     10) &&
-		 wait_for(i915_seqno_passed(hws_seqno(h, rq),
-					    rq->fence.seqno),
-			  1000));
-}
-
 static int igt_wait_reset(void *arg)
 {
 	struct drm_i915_private *i915 = arg;
@@ -617,9 +799,9 @@ static int igt_wait_reset(void *arg)
 	if (!wait_for_hang(&h, rq)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
-		pr_err("Failed to start request %x, at %x\n",
-		       rq->fence.seqno, hws_seqno(&h, rq));
-		intel_engine_dump(rq->engine, &p);
+		pr_err("%s: Failed to start request %x, at %x\n",
+		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
+		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
 		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
@@ -712,9 +894,10 @@ static int igt_reset_queue(void *arg)
 			if (!wait_for_hang(&h, prev)) {
 				struct drm_printer p = drm_info_printer(i915->drm.dev);
 
-				pr_err("Failed to start request %x, at %x\n",
-				       prev->fence.seqno, hws_seqno(&h, prev));
-				intel_engine_dump(rq->engine, &p);
+				pr_err("%s: Failed to start request %x, at %x\n",
+				       __func__, prev->fence.seqno, hws_seqno(&h, prev));
+				intel_engine_dump(prev->engine, &p,
+						  "%s\n", prev->engine->name);
 
 				i915_gem_request_put(rq);
 				i915_gem_request_put(prev);
@@ -818,9 +1001,9 @@ static int igt_handle_error(void *arg)
 	if (!wait_for_hang(&h, rq)) {
 		struct drm_printer p = drm_info_printer(i915->drm.dev);
 
-		pr_err("Failed to start request %x, at %x\n",
-		       rq->fence.seqno, hws_seqno(&h, rq));
-		intel_engine_dump(rq->engine, &p);
+		pr_err("%s: Failed to start request %x, at %x\n",
+		       __func__, rq->fence.seqno, hws_seqno(&h, rq));
+		intel_engine_dump(rq->engine, &p, "%s\n", rq->engine->name);
 
 		i915_reset(i915, 0);
 		i915_gem_set_wedged(i915);
@@ -863,21 +1046,26 @@ int intel_hangcheck_live_selftests(struct drm_i915_private *i915)
 	static const struct i915_subtest tests[] = {
 		SUBTEST(igt_global_reset), /* attempt to recover GPU first */
 		SUBTEST(igt_hang_sanitycheck),
-		SUBTEST(igt_reset_engine),
-		SUBTEST(igt_reset_active_engines),
+		SUBTEST(igt_reset_idle_engine),
+		SUBTEST(igt_reset_active_engine),
+		SUBTEST(igt_reset_idle_engine_others),
+		SUBTEST(igt_reset_active_engine_others),
 		SUBTEST(igt_wait_reset),
 		SUBTEST(igt_reset_queue),
 		SUBTEST(igt_handle_error),
 	};
+	bool saved_hangcheck;
 	int err;
 
 	if (!intel_has_gpu_reset(i915))
 		return 0;
 
 	intel_runtime_pm_get(i915);
+	saved_hangcheck = fetch_and_zero(&i915_modparams.enable_hangcheck);
 
 	err = i915_subtests(tests, i915);
 
+	i915_modparams.enable_hangcheck = saved_hangcheck;
 	intel_runtime_pm_put(i915);
 
 	return err;
diff --git a/drivers/gpu/drm/i915/selftests/mock_gem_device.c b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
index 80f152aaedf9..1bc61f3f76fc 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gem_device.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gem_device.c
@@ -85,6 +85,8 @@ static void mock_device_release(struct drm_device *dev)
 
 	i915_gemfs_fini(i915);
 
+	drm_mode_config_cleanup(&i915->drm);
+
 	drm_dev_fini(&i915->drm);
 	put_device(&i915->drm.pdev->dev);
 }
@@ -187,7 +189,7 @@ struct drm_i915_private *mock_gem_device(void)
 
 	i915->wq = alloc_ordered_workqueue("mock", 0);
 	if (!i915->wq)
-		goto put_device;
+		goto err_drv;
 
 	mock_init_contexts(i915);
 
@@ -266,6 +268,9 @@ err_objects:
 	kmem_cache_destroy(i915->objects);
 err_wq:
 	destroy_workqueue(i915->wq);
+err_drv:
+	drm_mode_config_cleanup(&i915->drm);
+	drm_dev_fini(&i915->drm);
 put_device:
 	put_device(&pdev->dev);
 err:
diff --git a/drivers/gpu/drm/i915/selftests/mock_gtt.c b/drivers/gpu/drm/i915/selftests/mock_gtt.c
index 336e1afb250f..e96873f96116 100644
--- a/drivers/gpu/drm/i915/selftests/mock_gtt.c
+++ b/drivers/gpu/drm/i915/selftests/mock_gtt.c
@@ -110,8 +110,8 @@ void mock_init_ggtt(struct drm_i915_private *i915)
 
 	ggtt->base.i915 = i915;
 
-	ggtt->mappable_base = 0;
-	ggtt->mappable_end = 2048 * PAGE_SIZE;
+	ggtt->gmadr = (struct resource) DEFINE_RES_MEM(0, 2048 * PAGE_SIZE);
+	ggtt->mappable_end = resource_size(&ggtt->gmadr);
 	ggtt->base.total = 4096 * PAGE_SIZE;
 
 	ggtt->base.clear_range = nop_clear_range;