50 files changed, 1282 insertions, 299 deletions
diff --git a/drivers/gpu/drm/ast/ast_fb.c b/drivers/gpu/drm/ast/ast_fb.c
index 3f65dd6676b2..a28640f47c27 100644
--- a/drivers/gpu/drm/ast/ast_fb.c
+++ b/drivers/gpu/drm/ast/ast_fb.c
@@ -65,7 +65,7 @@ static void ast_dirty_update(struct ast_fbdev *afbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = ast_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)
diff --git a/drivers/gpu/drm/cirrus/cirrus_fbdev.c b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
index 2fd4a92162cb..32bbba0a787b 100644
--- a/drivers/gpu/drm/cirrus/cirrus_fbdev.c
+++ b/drivers/gpu/drm/cirrus/cirrus_fbdev.c
@@ -39,7 +39,7 @@ static void cirrus_dirty_update(struct cirrus_fbdev *afbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = cirrus_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)
diff --git a/drivers/gpu/drm/exynos/Kconfig b/drivers/gpu/drm/exynos/Kconfig
index f227f544aa36..6e1a1a20cf6b 100644
--- a/drivers/gpu/drm/exynos/Kconfig
+++ b/drivers/gpu/drm/exynos/Kconfig
@@ -51,7 +51,7 @@ config DRM_EXYNOS_G2D
 
 config DRM_EXYNOS_IPP
 	bool "Exynos DRM IPP"
-	depends on DRM_EXYNOS && !ARCH_MULTIPLATFORM
+	depends on DRM_EXYNOS
 	help
 	  Choose this option if you want to use IPP feature for DRM.
 
@@ -69,6 +69,6 @@ config DRM_EXYNOS_ROTATOR
 
 config DRM_EXYNOS_GSC
 	bool "Exynos DRM GSC"
-	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5
+	depends on DRM_EXYNOS_IPP && ARCH_EXYNOS5 && !ARCH_MULTIPLATFORM
 	help
 	  Choose this option if you want to use Exynos GSC for DRM.
diff --git a/drivers/gpu/drm/exynos/exynos_drm_drv.c b/drivers/gpu/drm/exynos/exynos_drm_drv.c
index 9d096a0c5f8d..215131ab1dd2 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_drv.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_drv.c
@@ -171,22 +171,24 @@ static int exynos_drm_open(struct drm_device *dev, struct drm_file *file)
 	file->driver_priv = file_priv;
 
 	ret = exynos_drm_subdrv_open(dev, file);
-	if (ret) {
-		kfree(file_priv);
-		file->driver_priv = NULL;
-	}
+	if (ret)
+		goto out;
 
 	anon_filp = anon_inode_getfile("exynos_gem", &exynos_drm_gem_fops,
 					NULL, 0);
 	if (IS_ERR(anon_filp)) {
-		kfree(file_priv);
-		return PTR_ERR(anon_filp);
+		ret = PTR_ERR(anon_filp);
+		goto out;
 	}
 
 	anon_filp->f_mode = FMODE_READ | FMODE_WRITE;
 	file_priv->anon_filp = anon_filp;
 
 	return ret;
+out:
+	kfree(file_priv);
+	file->driver_priv = NULL;
+	return ret;
 }
 
 static void exynos_drm_preclose(struct drm_device *dev,
diff --git a/drivers/gpu/drm/exynos/exynos_drm_g2d.c b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
index 380aec28840b..6c1885eedfdf 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_g2d.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_g2d.c
@@ -607,7 +607,7 @@ static enum g2d_reg_type g2d_get_reg_type(int reg_offset)
 		reg_type = REG_TYPE_NONE;
 		DRM_ERROR("Unknown register offset![%d]\n", reg_offset);
 		break;
-	};
+	}
 
 	return reg_type;
 }
diff --git a/drivers/gpu/drm/exynos/exynos_drm_ipp.c b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
index d519a4e5fe40..09312b877470 100644
--- a/drivers/gpu/drm/exynos/exynos_drm_ipp.c
+++ b/drivers/gpu/drm/exynos/exynos_drm_ipp.c
@@ -16,7 +16,6 @@
 #include <linux/types.h>
 #include <linux/clk.h>
 #include <linux/pm_runtime.h>
-#include <plat/map-base.h>
 
 #include <drm/drmP.h>
 #include <drm/exynos_drm.h>
@@ -826,7 +825,7 @@ static void ipp_put_event(struct drm_exynos_ipp_cmd_node *c_node,
 		DRM_DEBUG_KMS("count[%d]e[0x%x]\n", count++, (int)e);
 
 		/*
-		 * quf == NULL condition means all event deletion.
+		 * qbuf == NULL condition means all event deletion.
 		 * stop operations want to delete all event list.
 		 * another case delete only same buf id.
 		 */
diff --git a/drivers/gpu/drm/exynos/exynos_hdmi.c b/drivers/gpu/drm/exynos/exynos_hdmi.c
index a0e10aeb0e67..c021ddc1ffb4 100644
--- a/drivers/gpu/drm/exynos/exynos_hdmi.c
+++ b/drivers/gpu/drm/exynos/exynos_hdmi.c
@@ -34,6 +34,7 @@
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
+#include <linux/hdmi.h>
 
 #include <drm/exynos_drm.h>
 
@@ -59,19 +60,6 @@
 #define HDMI_AUI_VERSION	0x01
 #define HDMI_AUI_LENGTH	0x0A
 
-/* HDMI infoframe to configure HDMI out packet header, AUI and AVI */
-enum HDMI_PACKET_TYPE {
-	/* refer to Table 5-8 Packet Type in HDMI specification v1.4a */
-	/* InfoFrame packet type */
-	HDMI_PACKET_TYPE_INFOFRAME = 0x80,
-	/* Vendor-Specific InfoFrame */
-	HDMI_PACKET_TYPE_VSI = HDMI_PACKET_TYPE_INFOFRAME + 1,
-	/* Auxiliary Video information InfoFrame */
-	HDMI_PACKET_TYPE_AVI = HDMI_PACKET_TYPE_INFOFRAME + 2,
-	/* Audio information InfoFrame */
-	HDMI_PACKET_TYPE_AUI = HDMI_PACKET_TYPE_INFOFRAME + 4
-};
-
 enum hdmi_type {
 	HDMI_TYPE13,
 	HDMI_TYPE14,
@@ -379,12 +367,6 @@ static const struct hdmiphy_config hdmiphy_v14_configs[] = {
 	},
 };
 
-struct hdmi_infoframe {
-	enum HDMI_PACKET_TYPE type;
-	u8 ver;
-	u8 len;
-};
-
 static inline u32 hdmi_reg_read(struct hdmi_context *hdata, u32 reg_id)
 {
 	return readl(hdata->regs + reg_id);
@@ -682,7 +664,7 @@ static u8 hdmi_chksum(struct hdmi_context *hdata,
 }
 
 static void hdmi_reg_infoframe(struct hdmi_context *hdata,
-			struct hdmi_infoframe *infoframe)
+			union hdmi_infoframe *infoframe)
 {
 	u32 hdr_sum;
 	u8 chksum;
@@ -700,13 +682,15 @@ static void hdmi_reg_infoframe(struct hdmi_context *hdata,
 		return;
 	}
 
-	switch (infoframe->type) {
-	case HDMI_PACKET_TYPE_AVI:
+	switch (infoframe->any.type) {
+	case HDMI_INFOFRAME_TYPE_AVI:
 		hdmi_reg_writeb(hdata, HDMI_AVI_CON, HDMI_AVI_CON_EVERY_VSYNC);
-		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER0, infoframe->type);
-		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER1, infoframe->ver);
-		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER2, infoframe->len);
-		hdr_sum = infoframe->type + infoframe->ver + infoframe->len;
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER0, infoframe->any.type);
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER1,
+				infoframe->any.version);
+		hdmi_reg_writeb(hdata, HDMI_AVI_HEADER2, infoframe->any.length);
+		hdr_sum = infoframe->any.type + infoframe->any.version +
+			  infoframe->any.length;
 
 		/* Output format zero hardcoded ,RGB YBCR selection */
 		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(1), 0 << 5 |
@@ -722,18 +706,20 @@ static void hdmi_reg_infoframe(struct hdmi_context *hdata,
 		hdmi_reg_writeb(hdata, HDMI_AVI_BYTE(4), vic);
 
 		chksum = hdmi_chksum(hdata, HDMI_AVI_BYTE(1),
-					infoframe->len, hdr_sum);
+					infoframe->any.length, hdr_sum);
 		DRM_DEBUG_KMS("AVI checksum = 0x%x\n", chksum);
 		hdmi_reg_writeb(hdata, HDMI_AVI_CHECK_SUM, chksum);
 		break;
-	case HDMI_PACKET_TYPE_AUI:
+	case HDMI_INFOFRAME_TYPE_AUDIO:
 		hdmi_reg_writeb(hdata, HDMI_AUI_CON, 0x02);
-		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER0, infoframe->type);
-		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER1, infoframe->ver);
-		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER2, infoframe->len);
-		hdr_sum = infoframe->type + infoframe->ver + infoframe->len;
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER0, infoframe->any.type);
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER1,
+				infoframe->any.version);
+		hdmi_reg_writeb(hdata, HDMI_AUI_HEADER2, infoframe->any.length);
+		hdr_sum = infoframe->any.type + infoframe->any.version +
+			  infoframe->any.length;
 		chksum = hdmi_chksum(hdata, HDMI_AUI_BYTE(1),
-					infoframe->len, hdr_sum);
+					infoframe->any.length, hdr_sum);
 		DRM_DEBUG_KMS("AUI checksum = 0x%x\n", chksum);
 		hdmi_reg_writeb(hdata, HDMI_AUI_CHECK_SUM, chksum);
 		break;
@@ -985,7 +971,7 @@ static void hdmi_conf_reset(struct hdmi_context *hdata)
 
 static void hdmi_conf_init(struct hdmi_context *hdata)
 {
-	struct hdmi_infoframe infoframe;
+	union hdmi_infoframe infoframe;
 
 	/* disable HPD interrupts from HDMI IP block, use GPIO instead */
 	hdmi_reg_writemask(hdata, HDMI_INTC_CON, 0, HDMI_INTC_EN_GLOBAL |
@@ -1021,14 +1007,14 @@ static void hdmi_conf_init(struct hdmi_context *hdata)
 		hdmi_reg_writeb(hdata, HDMI_V13_AUI_CON, 0x02);
 		hdmi_reg_writeb(hdata, HDMI_V13_ACR_CON, 0x04);
 	} else {
-		infoframe.type = HDMI_PACKET_TYPE_AVI;
-		infoframe.ver = HDMI_AVI_VERSION;
-		infoframe.len = HDMI_AVI_LENGTH;
+		infoframe.any.type = HDMI_INFOFRAME_TYPE_AVI;
+		infoframe.any.version = HDMI_AVI_VERSION;
+		infoframe.any.length = HDMI_AVI_LENGTH;
 		hdmi_reg_infoframe(hdata, &infoframe);
 
-		infoframe.type = HDMI_PACKET_TYPE_AUI;
-		infoframe.ver = HDMI_AUI_VERSION;
-		infoframe.len = HDMI_AUI_LENGTH;
+		infoframe.any.type = HDMI_INFOFRAME_TYPE_AUDIO;
+		infoframe.any.version = HDMI_AUI_VERSION;
+		infoframe.any.length = HDMI_AUI_LENGTH;
 		hdmi_reg_infoframe(hdata, &infoframe);
 
 		/* enable AVI packet every vsync, fixes purple line problem */
diff --git a/drivers/gpu/drm/i2c/tda998x_drv.c b/drivers/gpu/drm/i2c/tda998x_drv.c
index 400b0c4a10fb..fa18cf374470 100644
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
@@ -208,7 +208,7 @@ struct tda998x_priv {
 # define PLL_SERIAL_1_SRL_IZ(x)   (((x) & 3) << 1)
 # define PLL_SERIAL_1_SRL_MAN_IZ  (1 << 6)
 #define REG_PLL_SERIAL_2          REG(0x02, 0x01)     /* read/write */
-# define PLL_SERIAL_2_SRL_NOSC(x) (((x) & 3) << 0)
+# define PLL_SERIAL_2_SRL_NOSC(x) ((x) << 0)
 # define PLL_SERIAL_2_SRL_PR(x)   (((x) & 0xf) << 4)
 #define REG_PLL_SERIAL_3          REG(0x02, 0x02)     /* read/write */
 # define PLL_SERIAL_3_SRL_CCIR    (1 << 0)
@@ -528,10 +528,10 @@ tda998x_write_aif(struct drm_encoder *encoder, struct tda998x_encoder_params *p)
 {
 	uint8_t buf[PB(5) + 1];
 
+	memset(buf, 0, sizeof(buf));
 	buf[HB(0)] = 0x84;
 	buf[HB(1)] = 0x01;
 	buf[HB(2)] = 10;
-	buf[PB(0)] = 0;
 	buf[PB(1)] = p->audio_frame[1] & 0x07; /* CC */
 	buf[PB(2)] = p->audio_frame[2] & 0x1c; /* SF */
 	buf[PB(4)] = p->audio_frame[4];
@@ -824,6 +824,11 @@ tda998x_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	div = 148500 / mode->clock;
+	if (div != 0) {
+		div--;
+		if (div > 3)
+			div = 3;
+	}
 
 	/* mute the audio FIFO: */
 	reg_set(encoder, REG_AIP_CNTRL_0, AIP_CNTRL_0_RST_FIFO);
@@ -913,7 +918,7 @@ tda998x_encoder_mode_set(struct drm_encoder *encoder,
 
 	if (priv->rev == TDA19988) {
 		/* let incoming pixels fill the active space (if any) */
-		reg_write(encoder, REG_ENABLE_SPACE, 0x01);
+		reg_write(encoder, REG_ENABLE_SPACE, 0x00);
 	}
 
 	/* must be last register set: */
@@ -1094,6 +1099,8 @@ tda998x_encoder_destroy(struct drm_encoder *encoder)
 {
 	struct tda998x_priv *priv = to_tda998x_priv(encoder);
 	drm_i2c_encoder_destroy(encoder);
+	if (priv->cec)
+		i2c_unregister_device(priv->cec);
 	kfree(priv);
 }
 
@@ -1142,8 +1149,10 @@ tda998x_encoder_init(struct i2c_client *client,
 	priv->vip_cntrl_1 = VIP_CNTRL_1_SWAP_C(0) | VIP_CNTRL_1_SWAP_D(1);
 	priv->vip_cntrl_2 = VIP_CNTRL_2_SWAP_E(4) | VIP_CNTRL_2_SWAP_F(5);
 
-	priv->current_page = 0;
+	priv->current_page = 0xff;
 	priv->cec = i2c_new_dummy(client->adapter, 0x34);
+	if (!priv->cec)
+		return -ENODEV;
 	priv->dpms = DRM_MODE_DPMS_OFF;
 
 	encoder_slave->slave_priv = priv;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 4a2bf8e3f739..df77e20e3c3d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -1831,6 +1831,14 @@ struct drm_i915_file_private {
 
 /* Early gen2 have a totally busted CS tlb and require pinned batches. */
 #define HAS_BROKEN_CS_TLB(dev)		(IS_I830(dev) || IS_845G(dev))
+/*
+ * dp aux and gmbus irq on gen4 seems to be able to generate legacy interrupts
+ * even when in MSI mode. This results in spurious interrupt warnings if the
+ * legacy irq no. is shared with another device. The kernel then disables that
+ * interrupt source and so prevents the other device from working properly.
+ */
+#define HAS_AUX_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
+#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
 
 /* With the 945 and later, Y tiling got adjusted so that it was 32 128-byte
  * rows, which changed the alignment requirements and fence programming.
diff --git a/drivers/gpu/drm/i915/i915_gpu_error.c b/drivers/gpu/drm/i915/i915_gpu_error.c
index d7fd2fd2f0a5..990cf8f43efd 100644
--- a/drivers/gpu/drm/i915/i915_gpu_error.c
+++ b/drivers/gpu/drm/i915/i915_gpu_error.c
@@ -146,7 +146,10 @@ static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
 		va_list tmp;
 
 		va_copy(tmp, args);
-		if (!__i915_error_seek(e, vsnprintf(NULL, 0, f, tmp)))
+		len = vsnprintf(NULL, 0, f, tmp);
+		va_end(tmp);
+
+		if (!__i915_error_seek(e, len))
 			return;
 	}
 
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 17d8fcb1b6f7..9fec71175571 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -567,8 +567,7 @@ static u32 i915_get_vblank_counter(struct drm_device *dev, int pipe)
 
 		vbl_start = mode->crtc_vblank_start * mode->crtc_htotal;
 	} else {
-		enum transcoder cpu_transcoder =
-			intel_pipe_to_cpu_transcoder(dev_priv, pipe);
+		enum transcoder cpu_transcoder = (enum transcoder) pipe;
 		u32 htotal;
 
 		htotal = ((I915_READ(HTOTAL(cpu_transcoder)) >> 16) & 0x1fff) + 1;
diff --git a/drivers/gpu/drm/i915/intel_dp.c b/drivers/gpu/drm/i915/intel_dp.c
index 5ede4e8e290d..2f517b85b3f4 100644
--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@@ -404,7 +404,7 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	int i, ret, recv_bytes;
 	uint32_t status;
 	int try, precharge, clock = 0;
-	bool has_aux_irq = true;
+	bool has_aux_irq = HAS_AUX_IRQ(dev);
 	uint32_t timeout;
 
 	/* dp aux is extremely sensitive to irq latency, hence request the
@@ -1869,10 +1869,12 @@ static void vlv_pre_enable_dp(struct intel_encoder *encoder)
 
 	mutex_unlock(&dev_priv->dpio_lock);
 
-	/* init power sequencer on this pipe and port */
-	intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq);
-	intel_dp_init_panel_power_sequencer_registers(dev, intel_dp,
-						      &power_seq);
+	if (is_edp(intel_dp)) {
+		/* init power sequencer on this pipe and port */
+		intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq);
+		intel_dp_init_panel_power_sequencer_registers(dev, intel_dp,
+							      &power_seq);
+	}
 
 	intel_enable_dp(encoder);
 
diff --git a/drivers/gpu/drm/i915/intel_i2c.c b/drivers/gpu/drm/i915/intel_i2c.c
index b1dc33f47899..d33b61d0dd33 100644
--- a/drivers/gpu/drm/i915/intel_i2c.c
+++ b/drivers/gpu/drm/i915/intel_i2c.c
@@ -258,13 +258,6 @@ intel_gpio_setup(struct intel_gmbus *bus, u32 pin)
 	algo->data = bus;
 }
 
-/*
- * gmbus on gen4 seems to be able to generate legacy interrupts even when in MSI
- * mode. This results in spurious interrupt warnings if the legacy irq no. is
- * shared with another device. The kernel then disables that interrupt source
- * and so prevents the other device from working properly.
- */
-#define HAS_GMBUS_IRQ(dev) (INTEL_INFO(dev)->gen >= 5)
 static int
 gmbus_wait_hw_status(struct drm_i915_private *dev_priv,
 		     u32 gmbus2_status,
diff --git a/drivers/gpu/drm/i915/intel_opregion.c b/drivers/gpu/drm/i915/intel_opregion.c
index 4e960ec7419f..acde2945eb8a 100644
--- a/drivers/gpu/drm/i915/intel_opregion.c
+++ b/drivers/gpu/drm/i915/intel_opregion.c
@@ -226,6 +226,8 @@ struct opregion_asle {
 #define ACPI_DIGITAL_OUTPUT (3<<8)
 #define ACPI_LVDS_OUTPUT (4<<8)
 
+#define MAX_DSLP	1500
+
 #ifdef CONFIG_ACPI
 static int swsci(struct drm_device *dev, u32 function, u32 parm, u32 *parm_out)
 {
@@ -260,10 +262,11 @@ static int swsci(struct drm_device *dev, u32 function, u32 parm, u32 *parm_out)
 		/* The spec says 2ms should be the default, but it's too small
 		 * for some machines. */
 		dslp = 50;
-	} else if (dslp > 500) {
+	} else if (dslp > MAX_DSLP) {
 		/* Hey bios, trust must be earned. */
-		WARN_ONCE(1, "excessive driver sleep timeout (DSPL) %u\n", dslp);
-		dslp = 500;
+		DRM_INFO_ONCE("ACPI BIOS requests an excessive sleep of %u ms, "
+			      "using %u ms instead\n", dslp, MAX_DSLP);
+		dslp = MAX_DSLP;
 	}
 
 	/* The spec tells us to do this, but we are the only user... */
diff --git a/drivers/gpu/drm/mgag200/mgag200_fb.c b/drivers/gpu/drm/mgag200/mgag200_fb.c
index f9adc27ef32a..13b7dd83faa9 100644
--- a/drivers/gpu/drm/mgag200/mgag200_fb.c
+++ b/drivers/gpu/drm/mgag200/mgag200_fb.c
@@ -41,7 +41,7 @@ static void mga_dirty_update(struct mga_fbdev *mfbdev,
 	 * then the BO is being moved and we should
 	 * store up the damage until later.
 	 */
-	if (!drm_can_sleep())
+	if (drm_can_sleep())
 		ret = mgag200_bo_reserve(bo, true);
 	if (ret) {
 		if (ret != -EBUSY)
diff --git a/drivers/gpu/drm/mgag200/mgag200_mode.c b/drivers/gpu/drm/mgag200/mgag200_mode.c
index b8583f275e80..968374776db9 100644
--- a/drivers/gpu/drm/mgag200/mgag200_mode.c
+++ b/drivers/gpu/drm/mgag200/mgag200_mode.c
@@ -1519,11 +1519,11 @@ static int mga_vga_mode_valid(struct drm_connector *connector,
 		(mga_vga_calculate_mode_bandwidth(mode, bpp)
 			> (32700 * 1024))) {
 		return MODE_BANDWIDTH;
-	} else if (mode->type == G200_EH &&
+	} else if (mdev->type == G200_EH &&
 		(mga_vga_calculate_mode_bandwidth(mode, bpp)
 			> (37500 * 1024))) {
 		return MODE_BANDWIDTH;
-	} else if (mode->type == G200_ER &&
+	} else if (mdev->type == G200_ER &&
 		(mga_vga_calculate_mode_bandwidth(mode,
 			bpp) > (55000 * 1024))) {
 		return MODE_BANDWIDTH;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
index 1964f4f0d452..84c5b13b33c9 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_crtc.c
@@ -39,6 +39,7 @@ struct mdp4_crtc {
 		spinlock_t lock;
 		bool stale;
 		uint32_t width, height;
+		uint32_t x, y;
 
 		/* next cursor to scan-out: */
 		uint32_t next_iova;
@@ -57,9 +58,16 @@ struct mdp4_crtc {
 #define PENDING_FLIP   0x2
 	atomic_t pending;
 
-	/* the fb that we currently hold a scanout ref to: */
+	/* the fb that we logically (from PoV of KMS API) hold a ref
+	 * to.  Which we may not yet be scanning out (we may still
+	 * be scanning out previous in case of page_flip while waiting
+	 * for gpu rendering to complete:
+	 */
 	struct drm_framebuffer *fb;
 
+	/* the fb that we currently hold a scanout ref to: */
+	struct drm_framebuffer *scanout_fb;
+
 	/* for unref'ing framebuffers after scanout completes: */
 	struct drm_flip_work unref_fb_work;
 
@@ -77,24 +85,73 @@ static struct mdp4_kms *get_kms(struct drm_crtc *crtc)
 	return to_mdp4_kms(to_mdp_kms(priv->kms));
 }
 
-static void update_fb(struct drm_crtc *crtc, bool async,
-		struct drm_framebuffer *new_fb)
+static void request_pending(struct drm_crtc *crtc, uint32_t pending)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-	struct drm_framebuffer *old_fb = mdp4_crtc->fb;
 
-	if (old_fb)
-		drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb);
+	atomic_or(pending, &mdp4_crtc->pending);
+	mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank);
+}
+
+static void crtc_flush(struct drm_crtc *crtc)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
+	uint32_t i, flush = 0;
+
+	for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) {
+		struct drm_plane *plane = mdp4_crtc->planes[i];
+		if (plane) {
+			enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane);
+			flush |= pipe2flush(pipe_id);
+		}
+	}
+	flush |= ovlp2flush(mdp4_crtc->ovlp);
+
+	DBG("%s: flush=%08x", mdp4_crtc->name, flush);
+
+	mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush);
+}
+
+static void update_fb(struct drm_crtc *crtc, struct drm_framebuffer *new_fb)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct drm_framebuffer *old_fb = mdp4_crtc->fb;
 
 	/* grab reference to incoming scanout fb: */
 	drm_framebuffer_reference(new_fb);
 	mdp4_crtc->base.fb = new_fb;
 	mdp4_crtc->fb = new_fb;
 
-	if (!async) {
-		/* enable vblank to pick up the old_fb */
-		mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank);
-	}
+	if (old_fb)
+		drm_flip_work_queue(&mdp4_crtc->unref_fb_work, old_fb);
+}
+
+/* unlike update_fb(), take a ref to the new scanout fb *before* updating
+ * plane, then call this.  Needed to ensure we don't unref the buffer that
+ * is actually still being scanned out.
+ *
+ * Note that this whole thing goes away with atomic.. since we can defer
+ * calling into driver until rendering is done.
+ */
+static void update_scanout(struct drm_crtc *crtc, struct drm_framebuffer *fb)
+{
+	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+
+	/* flush updates, to make sure hw is updated to new scanout fb,
+	 * so that we can safely queue unref to current fb (ie. next
+	 * vblank we know hw is done w/ previous scanout_fb).
+	 */
+	crtc_flush(crtc);
+
+	if (mdp4_crtc->scanout_fb)
+		drm_flip_work_queue(&mdp4_crtc->unref_fb_work,
+				mdp4_crtc->scanout_fb);
+
+	mdp4_crtc->scanout_fb = fb;
+
+	/* enable vblank to complete flip: */
+	request_pending(crtc, PENDING_FLIP);
 }
 
 /* if file!=NULL, this is preclose potential cancel-flip path */
@@ -120,34 +177,6 @@ static void complete_flip(struct drm_crtc *crtc, struct drm_file *file)
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 }
 
-static void crtc_flush(struct drm_crtc *crtc)
-{
-	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-	struct mdp4_kms *mdp4_kms = get_kms(crtc);
-	uint32_t i, flush = 0;
-
-	for (i = 0; i < ARRAY_SIZE(mdp4_crtc->planes); i++) {
-		struct drm_plane *plane = mdp4_crtc->planes[i];
-		if (plane) {
-			enum mdp4_pipe pipe_id = mdp4_plane_pipe(plane);
-			flush |= pipe2flush(pipe_id);
-		}
-	}
-	flush |= ovlp2flush(mdp4_crtc->ovlp);
-
-	DBG("%s: flush=%08x", mdp4_crtc->name, flush);
-
-	mdp4_write(mdp4_kms, REG_MDP4_OVERLAY_FLUSH, flush);
-}
-
-static void request_pending(struct drm_crtc *crtc, uint32_t pending)
-{
-	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-
-	atomic_or(pending, &mdp4_crtc->pending);
-	mdp_irq_register(&get_kms(crtc)->base, &mdp4_crtc->vblank);
-}
-
 static void pageflip_cb(struct msm_fence_cb *cb)
 {
 	struct mdp4_crtc *mdp4_crtc =
@@ -158,11 +187,9 @@ static void pageflip_cb(struct msm_fence_cb *cb)
 	if (!fb)
 		return;
 
+	drm_framebuffer_reference(fb);
 	mdp4_plane_set_scanout(mdp4_crtc->plane, fb);
-	crtc_flush(crtc);
-
-	/* enable vblank to complete flip: */
-	request_pending(crtc, PENDING_FLIP);
+	update_scanout(crtc, fb);
 }
 
 static void unref_fb_worker(struct drm_flip_work *work, void *val)
@@ -320,6 +347,20 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc,
 			mode->vsync_end, mode->vtotal,
 			mode->type, mode->flags);
 
+	/* grab extra ref for update_scanout() */
+	drm_framebuffer_reference(crtc->fb);
+
+	ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb,
+			0, 0, mode->hdisplay, mode->vdisplay,
+			x << 16, y << 16,
+			mode->hdisplay << 16, mode->vdisplay << 16);
+	if (ret) {
+		drm_framebuffer_unreference(crtc->fb);
+		dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n",
+				mdp4_crtc->name, ret);
+		return ret;
+	}
+
 	mdp4_write(mdp4_kms, REG_MDP4_DMA_SRC_SIZE(dma),
 			MDP4_DMA_SRC_SIZE_WIDTH(mode->hdisplay) |
 			MDP4_DMA_SRC_SIZE_HEIGHT(mode->vdisplay));
@@ -341,24 +382,15 @@ static int mdp4_crtc_mode_set(struct drm_crtc *crtc,
 
 	mdp4_write(mdp4_kms, REG_MDP4_OVLP_CFG(ovlp), 1);
 
-	update_fb(crtc, false, crtc->fb);
-
-	ret = mdp4_plane_mode_set(mdp4_crtc->plane, crtc, crtc->fb,
-			0, 0, mode->hdisplay, mode->vdisplay,
-			x << 16, y << 16,
-			mode->hdisplay << 16, mode->vdisplay << 16);
-	if (ret) {
-		dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n",
-				mdp4_crtc->name, ret);
-		return ret;
-	}
-
 	if (dma == DMA_E) {
 		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(0), 0x00ff0000);
 		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(1), 0x00ff0000);
 		mdp4_write(mdp4_kms, REG_MDP4_DMA_E_QUANT(2), 0x00ff0000);
 	}
 
+	update_fb(crtc, crtc->fb);
+	update_scanout(crtc, crtc->fb);
+
 	return 0;
 }
 
@@ -385,13 +417,24 @@ static int mdp4_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
 	struct drm_plane *plane = mdp4_crtc->plane;
 	struct drm_display_mode *mode = &crtc->mode;
+	int ret;
 
-	update_fb(crtc, false, crtc->fb);
+	/* grab extra ref for update_scanout() */
+	drm_framebuffer_reference(crtc->fb);
 
-	return mdp4_plane_mode_set(plane, crtc, crtc->fb,
+	ret = mdp4_plane_mode_set(plane, crtc, crtc->fb,
 			0, 0, mode->hdisplay, mode->vdisplay,
 			x << 16, y << 16,
 			mode->hdisplay << 16, mode->vdisplay << 16);
+	if (ret) {
+		drm_framebuffer_unreference(crtc->fb);
+		return ret;
+	}
+
+	update_fb(crtc, crtc->fb);
+	update_scanout(crtc, crtc->fb);
+
+	return 0;
 }
 
 static void mdp4_crtc_load_lut(struct drm_crtc *crtc)
@@ -419,7 +462,7 @@ static int mdp4_crtc_page_flip(struct drm_crtc *crtc,
 	mdp4_crtc->event = event;
 	spin_unlock_irqrestore(&dev->event_lock, flags);
 
-	update_fb(crtc, true, new_fb);
+	update_fb(crtc, new_fb);
 
 	return msm_gem_queue_inactive_cb(obj, &mdp4_crtc->pageflip_cb);
 }
@@ -442,12 +485,12 @@ static int mdp4_crtc_set_property(struct drm_crtc *crtc,
 static void update_cursor(struct drm_crtc *crtc)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
+	struct mdp4_kms *mdp4_kms = get_kms(crtc);
 	enum mdp4_dma dma = mdp4_crtc->dma;
 	unsigned long flags;
 
 	spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags);
 	if (mdp4_crtc->cursor.stale) {
-		struct mdp4_kms *mdp4_kms = get_kms(crtc);
 		struct drm_gem_object *next_bo = mdp4_crtc->cursor.next_bo;
 		struct drm_gem_object *prev_bo = mdp4_crtc->cursor.scanout_bo;
 		uint32_t iova = mdp4_crtc->cursor.next_iova;
@@ -479,6 +522,11 @@ static void update_cursor(struct drm_crtc *crtc)
 		mdp4_crtc->cursor.scanout_bo = next_bo;
 		mdp4_crtc->cursor.stale = false;
 	}
+
+	mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma),
+			MDP4_DMA_CURSOR_POS_X(mdp4_crtc->cursor.x) |
+			MDP4_DMA_CURSOR_POS_Y(mdp4_crtc->cursor.y));
+
 	spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags);
 }
 
@@ -530,6 +578,7 @@ static int mdp4_crtc_cursor_set(struct drm_crtc *crtc,
 		drm_gem_object_unreference_unlocked(old_bo);
 	}
 
+	crtc_flush(crtc);
 	request_pending(crtc, PENDING_CURSOR);
 
 	return 0;
@@ -542,12 +591,15 @@ fail:
 static int mdp4_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
 {
 	struct mdp4_crtc *mdp4_crtc = to_mdp4_crtc(crtc);
-	struct mdp4_kms *mdp4_kms = get_kms(crtc);
-	enum mdp4_dma dma = mdp4_crtc->dma;
+	unsigned long flags;
 
-	mdp4_write(mdp4_kms, REG_MDP4_DMA_CURSOR_POS(dma),
-			MDP4_DMA_CURSOR_POS_X(x) |
-			MDP4_DMA_CURSOR_POS_Y(y));
+	spin_lock_irqsave(&mdp4_crtc->cursor.lock, flags);
+	mdp4_crtc->cursor.x = x;
+	mdp4_crtc->cursor.y = y;
+	spin_unlock_irqrestore(&mdp4_crtc->cursor.lock, flags);
+
+	crtc_flush(crtc);
+	request_pending(crtc, PENDING_CURSOR);
 
 	return 0;
 }
@@ -713,6 +765,7 @@ struct drm_crtc *mdp4_crtc_init(struct drm_device *dev,
 	crtc = &mdp4_crtc->base;
 
 	mdp4_crtc->plane = plane;
+	mdp4_crtc->id = id;
 
 	mdp4_crtc->ovlp = ovlp_id;
 	mdp4_crtc->dma = dma_id;
diff --git a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
index 2406027200ec..1e893dd13859 100644
--- a/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
+++ b/drivers/gpu/drm/msm/mdp/mdp4/mdp4_plane.c
@@ -170,8 +170,8 @@ int mdp4_plane_mode_set(struct drm_plane *plane,
 			MDP4_PIPE_DST_SIZE_HEIGHT(crtc_h));
 
 	mdp4_write(mdp4_kms, REG_MDP4_PIPE_DST_XY(pipe),
-			MDP4_PIPE_SRC_XY_X(crtc_x) |
-			MDP4_PIPE_SRC_XY_Y(crtc_y));
+			MDP4_PIPE_DST_XY_X(crtc_x) |
+			MDP4_PIPE_DST_XY_Y(crtc_y));
 
 	mdp4_plane_set_scanout(plane, fb);
 
diff --git a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
index 71a3b2345eb3..f2794021f086 100644
--- a/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
+++ b/drivers/gpu/drm/msm/mdp/mdp5/mdp5_crtc.c
@@ -296,6 +296,7 @@ static int mdp5_crtc_mode_set(struct drm_crtc *crtc,
 			x << 16, y << 16,
 			mode->hdisplay << 16, mode->vdisplay << 16);
 	if (ret) {
+		drm_framebuffer_unreference(crtc->fb);
 		dev_err(crtc->dev->dev, "%s: failed to set mode on plane: %d\n",
 				mdp5_crtc->name, ret);
 		return ret;
@@ -343,11 +344,15 @@ static int mdp5_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
 			0, 0, mode->hdisplay, mode->vdisplay,
 			x << 16, y << 16,
 			mode->hdisplay << 16, mode->vdisplay << 16);
+	if (ret) {
+		drm_framebuffer_unreference(crtc->fb);
+		return ret;
+	}
 
 	update_fb(crtc, crtc->fb);
 	update_scanout(crtc, crtc->fb);
 
-	return ret;
+	return 0;
 }
 
 static void mdp5_crtc_load_lut(struct drm_crtc *crtc)
diff --git a/drivers/gpu/drm/msm/msm_gem.c b/drivers/gpu/drm/msm/msm_gem.c
index d8d60c969ac7..3da8264d3039 100644
--- a/drivers/gpu/drm/msm/msm_gem.c
+++ b/drivers/gpu/drm/msm/msm_gem.c
@@ -644,7 +644,7 @@ struct drm_gem_object *msm_gem_new(struct drm_device *dev,
 
 fail:
 	if (obj)
-		drm_gem_object_unreference_unlocked(obj);
+		drm_gem_object_unreference(obj);
 
 	return ERR_PTR(ret);
 }
diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
index 5281d4bc37f7..5423e914e491 100644
--- a/drivers/gpu/drm/msm/msm_gem_submit.c
+++ b/drivers/gpu/drm/msm/msm_gem_submit.c
@@ -163,7 +163,7 @@ retry:
 
 
 		/* if locking succeeded, pin bo: */
-		ret = msm_gem_get_iova(&msm_obj->base,
+		ret = msm_gem_get_iova_locked(&msm_obj->base,
 				submit->gpu->id, &iova);
 
 		/* this would break the logic in the fail path.. there is no
@@ -247,7 +247,7 @@ static int submit_reloc(struct msm_gem_submit *submit, struct msm_gem_object *ob
 	/* For now, just map the entire thing.  Eventually we probably
 	 * to do it page-by-page, w/ kmap() if not vmap()d..
 	 */
-	ptr = msm_gem_vaddr(&obj->base);
+	ptr = msm_gem_vaddr_locked(&obj->base);
 
 	if (IS_ERR(ptr)) {
 		ret = PTR_ERR(ptr);
@@ -307,14 +307,12 @@ static void submit_cleanup(struct msm_gem_submit *submit, bool fail)
 {
 	unsigned i;
 
-	mutex_lock(&submit->dev->struct_mutex);
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct msm_gem_object *msm_obj = submit->bos[i].obj;
 		submit_unlock_unpin_bo(submit, i);
 		list_del_init(&msm_obj->submit_entry);
 		drm_gem_object_unreference(&msm_obj->base);
 	}
-	mutex_unlock(&submit->dev->struct_mutex);
 
 	ww_acquire_fini(&submit->ticket);
 	kfree(submit);
@@ -342,6 +340,8 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (args->nr_cmds > MAX_CMDS)
 		return -EINVAL;
 
+	mutex_lock(&dev->struct_mutex);
+
 	submit = submit_create(dev, gpu, args->nr_bos);
 	if (!submit) {
 		ret = -ENOMEM;
@@ -410,5 +410,6 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
 out:
 	if (submit)
 		submit_cleanup(submit, !!ret);
+	mutex_unlock(&dev->struct_mutex);
 	return ret;
 }
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
index 4ebce8be489d..0cfe3f426ee4 100644
--- a/drivers/gpu/drm/msm/msm_gpu.c
+++ b/drivers/gpu/drm/msm/msm_gpu.c
@@ -298,8 +298,6 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 	struct msm_drm_private *priv = dev->dev_private;
 	int i, ret;
 
-	mutex_lock(&dev->struct_mutex);
-
 	submit->fence = ++priv->next_fence;
 
 	gpu->submitted_fence = submit->fence;
@@ -331,7 +329,6 @@ int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
 			msm_gem_move_to_active(&msm_obj->base, gpu, true, submit->fence);
 	}
 	hangcheck_timer_reset(gpu);
-	mutex_unlock(&dev->struct_mutex);
 
 	return ret;
 }
diff --git a/drivers/gpu/drm/radeon/btc_dpm.c b/drivers/gpu/drm/radeon/btc_dpm.c
index 0fbd36f3d4e9..ea103ccdf4bd 100644
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@@ -29,6 +29,7 @@
 #include "cypress_dpm.h"
 #include "btc_dpm.h"
 #include "atom.h"
+#include <linux/seq_file.h>
 
 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
@@ -2756,6 +2757,37 @@ void btc_dpm_fini(struct radeon_device *rdev)
 	r600_free_extended_power_table(rdev);
 }
 
+void btc_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						     struct seq_file *m)
+{
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct radeon_ps *rps = &eg_pi->current_rps;
+	struct rv7xx_ps *ps = rv770_get_ps(rps);
+	struct rv7xx_pl *pl;
+	u32 current_index =
+		(RREG32(TARGET_AND_CURRENT_PROFILE_INDEX) & CURRENT_PROFILE_INDEX_MASK) >>
+		CURRENT_PROFILE_INDEX_SHIFT;
+
+	if (current_index > 2) {
+		seq_printf(m, "invalid dpm profile %d\n", current_index);
+	} else {
+		if (current_index == 0)
+			pl = &ps->low;
+		else if (current_index == 1)
+			pl = &ps->medium;
+		else /* current_index == 2 */
+			pl = &ps->high;
+		seq_printf(m, "uvd    vclk: %d dclk: %d\n", rps->vclk, rps->dclk);
+		if (rdev->family >= CHIP_CEDAR) {
+			seq_printf(m, "power level %d    sclk: %u mclk: %u vddc: %u vddci: %u\n",
+				   current_index, pl->sclk, pl->mclk, pl->vddc, pl->vddci);
+		} else {
+			seq_printf(m, "power level %d    sclk: %u mclk: %u vddc: %u\n",
+				   current_index, pl->sclk, pl->mclk, pl->vddc);
+		}
+	}
+}
+
 u32 btc_dpm_get_sclk(struct radeon_device *rdev, bool low)
 {
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
diff --git a/drivers/gpu/drm/radeon/btcd.h b/drivers/gpu/drm/radeon/btcd.h
index 29e32de7e025..9c65be2d55a9 100644
--- a/drivers/gpu/drm/radeon/btcd.h
+++ b/drivers/gpu/drm/radeon/btcd.h
@@ -44,6 +44,10 @@
 #       define DYN_SPREAD_SPECTRUM_EN                   (1 << 23)
 #       define AC_DC_SW                                 (1 << 24)
 
+#define TARGET_AND_CURRENT_PROFILE_INDEX                  0x66c
+#       define CURRENT_PROFILE_INDEX_MASK                 (0xf << 4)
+#       define CURRENT_PROFILE_INDEX_SHIFT                4
+
 #define	CG_BIF_REQ_AND_RSP				0x7f4
 #define		CG_CLIENT_REQ(x)			((x) << 0)
 #define		CG_CLIENT_REQ_MASK			(0xff << 0)
diff --git a/drivers/gpu/drm/radeon/kv_dpm.c b/drivers/gpu/drm/radeon/kv_dpm.c
index b6e01d5d2cce..351db361239d 100644
--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@@ -1223,7 +1223,7 @@ int kv_dpm_enable(struct radeon_device *rdev)
 
 int kv_dpm_late_enable(struct radeon_device *rdev)
 {
-	int ret;
+	int ret = 0;
 
 	if (rdev->irq.installed &&
 	    r600_is_internal_thermal_sensor(rdev->pm.int_thermal_type)) {
diff --git a/drivers/gpu/drm/radeon/ni_dpm.c b/drivers/gpu/drm/radeon/ni_dpm.c
index c351226ecb31..1217fbcbdcca 100644
--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@@ -3945,7 +3945,6 @@ static void ni_parse_pplib_clock_info(struct radeon_device *rdev,
 	struct rv7xx_power_info *pi = rv770_get_pi(rdev);
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct ni_ps *ps = ni_get_ps(rps);
-	u16 vddc;
 	struct rv7xx_pl *pl = &ps->performance_levels[index];
 
 	ps->performance_level_count = index + 1;
@@ -3961,8 +3960,8 @@ static void ni_parse_pplib_clock_info(struct radeon_device *rdev,
 
 	/* patch up vddc if necessary */
 	if (pl->vddc == 0xff01) {
-		if (radeon_atom_get_max_vddc(rdev, 0, 0, &vddc) == 0)
-			pl->vddc = vddc;
+		if (pi->max_vddc)
+			pl->vddc = pi->max_vddc;
 	}
 
 	if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) {
@@ -4322,7 +4321,8 @@ void ni_dpm_print_power_state(struct radeon_device *rdev,
 void ni_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 						    struct seq_file *m)
 {
-	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct radeon_ps *rps = &eg_pi->current_rps;
 	struct ni_ps *ps = ni_get_ps(rps);
 	struct rv7xx_pl *pl;
 	u32 current_index =
diff --git a/drivers/gpu/drm/radeon/r600.c b/drivers/gpu/drm/radeon/r600.c
index 56140b4e5bb2..cdbc4171fe73 100644
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@@ -3991,6 +3991,10 @@ restart_ih:
 				break;
 			}
 			break;
+		case 124: /* UVD */
+			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+			break;
 		case 176: /* CP_INT in ring buffer */
 		case 177: /* CP_INT in IB1 */
 		case 178: /* CP_INT in IB2 */
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 7b399dc5fd54..2812c7d1ae6f 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -1007,8 +1007,22 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 	case R_008C64_SQ_VSTMP_RING_SIZE:
 	case R_0288C8_SQ_GS_VERT_ITEMSIZE:
 		/* get value to populate the IB don't remove */
-		tmp =radeon_get_ib_value(p, idx);
-		ib[idx] = 0;
+		/*tmp =radeon_get_ib_value(p, idx);
+		  ib[idx] = 0;*/
+		break;
+	case SQ_ESGS_RING_BASE:
+	case SQ_GSVS_RING_BASE:
+	case SQ_ESTMP_RING_BASE:
+	case SQ_GSTMP_RING_BASE:
+	case SQ_PSTMP_RING_BASE:
+	case SQ_VSTMP_RING_BASE:
+		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
+		if (r) {
+			dev_warn(p->dev, "bad SET_CONTEXT_REG "
+					"0x%04X\n", reg);
+			return -EINVAL;
+		}
+		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SQ_CONFIG:
 		track->sq_config = radeon_get_ib_value(p, idx);
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index f74db43346fd..dda02bfc10a4 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -1555,7 +1555,7 @@ static struct radeon_asic btc_asic = {
 		.get_sclk = &btc_dpm_get_sclk,
 		.get_mclk = &btc_dpm_get_mclk,
 		.print_power_state = &rv770_dpm_print_power_state,
-		.debugfs_print_current_performance_level = &rv770_dpm_debugfs_print_current_performance_level,
+		.debugfs_print_current_performance_level = &btc_dpm_debugfs_print_current_performance_level,
 		.force_performance_level = &rv770_dpm_force_performance_level,
 		.vblank_too_short = &btc_dpm_vblank_too_short,
 	},
diff --git a/drivers/gpu/drm/radeon/radeon_asic.h b/drivers/gpu/drm/radeon/radeon_asic.h
index b3bc433eed4c..ae637cfda783 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@@ -551,6 +551,8 @@ void btc_dpm_fini(struct radeon_device *rdev);
 u32 btc_dpm_get_sclk(struct radeon_device *rdev, bool low);
 u32 btc_dpm_get_mclk(struct radeon_device *rdev, bool low);
 bool btc_dpm_vblank_too_short(struct radeon_device *rdev);
+void btc_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
+						     struct seq_file *m);
 int sumo_dpm_init(struct radeon_device *rdev);
 int sumo_dpm_enable(struct radeon_device *rdev);
 int sumo_dpm_late_enable(struct radeon_device *rdev);
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index ec8c388eec17..84a1bbb75f91 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -78,9 +78,10 @@
  *   2.34.0 - Add CIK tiling mode array query
  *   2.35.0 - Add CIK macrotile mode array query
  *   2.36.0 - Fix CIK DCE tiling setup
+ *   2.37.0 - allow GS ring setup on r6xx/r7xx
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	36
+#define KMS_DRIVER_MINOR	37
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/reg_srcs/r600 b/drivers/gpu/drm/radeon/reg_srcs/r600
index 20bfbda7b3f1..ec0c6829c1dc 100644
--- a/drivers/gpu/drm/radeon/reg_srcs/r600
+++ b/drivers/gpu/drm/radeon/reg_srcs/r600
@@ -18,6 +18,7 @@ r600 0x9400
 0x00028A3C VGT_GROUP_VECT_1_FMT_CNTL
 0x00028A40 VGT_GS_MODE
 0x00028A6C VGT_GS_OUT_PRIM_TYPE
+0x00028B38 VGT_GS_MAX_VERT_OUT
 0x000088C8 VGT_GS_PER_ES
 0x000088E8 VGT_GS_PER_VS
 0x000088D4 VGT_GS_VERTEX_REUSE
diff --git a/drivers/gpu/drm/radeon/rv770_dpm.c b/drivers/gpu/drm/radeon/rv770_dpm.c
index 80c595aba359..5b2ea8ac0731 100644
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@@ -2174,7 +2174,6 @@ static void rv7xx_parse_pplib_clock_info(struct radeon_device *rdev,
 	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
 	struct rv7xx_ps *ps = rv770_get_ps(rps);
 	u32 sclk, mclk;
-	u16 vddc;
 	struct rv7xx_pl *pl;
 
 	switch (index) {
@@ -2214,8 +2213,8 @@ static void rv7xx_parse_pplib_clock_info(struct radeon_device *rdev,
 
 	/* patch up vddc if necessary */
 	if (pl->vddc == 0xff01) {
-		if (radeon_atom_get_max_vddc(rdev, 0, 0, &vddc) == 0)
-			pl->vddc = vddc;
+		if (pi->max_vddc)
+			pl->vddc = pi->max_vddc;
 	}
 
 	if (rps->class & ATOM_PPLIB_CLASSIFICATION_ACPI) {
diff --git a/drivers/gpu/drm/radeon/si.c b/drivers/gpu/drm/radeon/si.c
index 09ec4f6c53bb..83578324e5d1 100644
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@@ -6338,6 +6338,10 @@ restart_ih:
 				break;
 			}
 			break;
+		case 124: /* UVD */
+			DRM_DEBUG("IH: UVD int: 0x%08x\n", src_data);
+			radeon_fence_process(rdev, R600_RING_TYPE_UVD_INDEX);
+			break;
 		case 146:
 		case 147:
 			addr = RREG32(VM_CONTEXT1_PROTECTION_FAULT_ADDR);
diff --git a/drivers/gpu/drm/radeon/si_dpm.c b/drivers/gpu/drm/radeon/si_dpm.c
index 0471501338fb..eafb0e6bc67e 100644
--- a/drivers/gpu/drm/radeon/si_dpm.c
+++ b/drivers/gpu/drm/radeon/si_dpm.c
@@ -6472,7 +6472,8 @@ void si_dpm_fini(struct radeon_device *rdev)
 void si_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 						    struct seq_file *m)
 {
-	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
+	struct evergreen_power_info *eg_pi = evergreen_get_pi(rdev);
+	struct radeon_ps *rps = &eg_pi->current_rps;
 	struct ni_ps *ps = ni_get_ps(rps);
 	struct rv7xx_pl *pl;
 	u32 current_index =
diff --git a/drivers/gpu/drm/radeon/sumo_dpm.c b/drivers/gpu/drm/radeon/sumo_dpm.c
index f121efe12dc5..8b47b3cd0357 100644
--- a/drivers/gpu/drm/radeon/sumo_dpm.c
+++ b/drivers/gpu/drm/radeon/sumo_dpm.c
@@ -1807,7 +1807,7 @@ void sumo_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev
 						      struct seq_file *m)
 {
 	struct sumo_power_info *pi = sumo_get_pi(rdev);
-	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
+	struct radeon_ps *rps = &pi->current_rps;
 	struct sumo_ps *ps = sumo_get_ps(rps);
 	struct sumo_pl *pl;
 	u32 current_index =
diff --git a/drivers/gpu/drm/radeon/trinity_dpm.c b/drivers/gpu/drm/radeon/trinity_dpm.c
index 2d447192d6f7..2da0e17eb960 100644
--- a/drivers/gpu/drm/radeon/trinity_dpm.c
+++ b/drivers/gpu/drm/radeon/trinity_dpm.c
@@ -1926,7 +1926,8 @@ void trinity_dpm_print_power_state(struct radeon_device *rdev,
 void trinity_dpm_debugfs_print_current_performance_level(struct radeon_device *rdev,
 							 struct seq_file *m)
 {
-	struct radeon_ps *rps = rdev->pm.dpm.current_ps;
+	struct trinity_power_info *pi = trinity_get_pi(rdev);
+	struct radeon_ps *rps = &pi->current_rps;
 	struct trinity_ps *ps = trinity_get_ps(rps);
 	struct trinity_pl *pl;
 	u32 current_index =
diff --git a/drivers/gpu/drm/radeon/uvd_v2_2.c b/drivers/gpu/drm/radeon/uvd_v2_2.c
index 824550db3fed..d1771004cb52 100644
--- a/drivers/gpu/drm/radeon/uvd_v2_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c
@@ -57,7 +57,6 @@ void uvd_v2_2_fence_emit(struct radeon_device *rdev,
 	radeon_ring_write(ring, 0);
 	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
 	radeon_ring_write(ring, 2);
-	return;
 }
 
 /**
diff --git a/drivers/gpu/drm/ttm/ttm_object.c b/drivers/gpu/drm/ttm/ttm_object.c
index 37079859afc8..53b51c4e671a 100644
--- a/drivers/gpu/drm/ttm/ttm_object.c
+++ b/drivers/gpu/drm/ttm/ttm_object.c
@@ -292,7 +292,7 @@ int ttm_ref_object_add(struct ttm_object_file *tfile,
 
 		if (ret == 0) {
 			ref = drm_hash_entry(hash, struct ttm_ref_object, hash);
-			if (!kref_get_unless_zero(&ref->kref)) {
+			if (kref_get_unless_zero(&ref->kref)) {
 				rcu_read_unlock();
 				break;
 			}
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 9af99084b344..75f319090043 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -380,6 +380,9 @@ static void ttm_tt_clear_mapping(struct ttm_tt *ttm)
 	pgoff_t i;
 	struct page **page = ttm->pages;
 
+	if (ttm->page_flags & TTM_PAGE_FLAG_SG)
+		return;
+
 	for (i = 0; i < ttm->num_pages; ++i) {
 		(*page)->mapping = NULL;
 		(*page++)->index = 0;
diff --git a/drivers/gpu/drm/vmwgfx/svga3d_reg.h b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
index d95335cb90bd..b645647b7776 100644
--- a/drivers/gpu/drm/vmwgfx/svga3d_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga3d_reg.h
@@ -2583,4 +2583,28 @@ typedef union {
    float  f;
 } SVGA3dDevCapResult;
 
+typedef enum {
+   SVGA3DCAPS_RECORD_UNKNOWN        = 0,
+   SVGA3DCAPS_RECORD_DEVCAPS_MIN    = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS        = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS_MAX    = 0x1ff,
+} SVGA3dCapsRecordType;
+
+typedef
+struct SVGA3dCapsRecordHeader {
+   uint32 length;
+   SVGA3dCapsRecordType type;
+}
+SVGA3dCapsRecordHeader;
+
+typedef
+struct SVGA3dCapsRecord {
+   SVGA3dCapsRecordHeader header;
+   uint32 data[1];
+}
+SVGA3dCapsRecord;
+
+
+typedef uint32 SVGA3dCapPair[2];
+
 #endif /* _SVGA3D_REG_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
index 82c41daebc0e..9426c53fb483 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_context.c
@@ -37,7 +37,7 @@ struct vmw_user_context {
 
 
 
-typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *);
+typedef int (*vmw_scrub_func)(struct vmw_ctx_bindinfo *, bool);
 
 static void vmw_user_context_free(struct vmw_resource *res);
 static struct vmw_resource *
@@ -50,9 +50,11 @@ static int vmw_gb_context_unbind(struct vmw_resource *res,
 				 bool readback,
 				 struct ttm_validate_buffer *val_buf);
 static int vmw_gb_context_destroy(struct vmw_resource *res);
-static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi);
-static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi);
-static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi);
+static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind);
+static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi,
+					   bool rebind);
+static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi, bool rebind);
+static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs);
 static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs);
 static uint64_t vmw_user_context_size;
 
@@ -111,10 +113,14 @@ static void vmw_hw_context_destroy(struct vmw_resource *res)
 
 	if (res->func->destroy == vmw_gb_context_destroy) {
 		mutex_lock(&dev_priv->cmdbuf_mutex);
+		mutex_lock(&dev_priv->binding_mutex);
+		(void) vmw_context_binding_state_kill
+			(&container_of(res, struct vmw_user_context, res)->cbs);
 		(void) vmw_gb_context_destroy(res);
 		if (dev_priv->pinned_bo != NULL &&
 		    !dev_priv->query_cid_valid)
 			__vmw_execbuf_release_pinned_bo(dev_priv, NULL);
+		mutex_unlock(&dev_priv->binding_mutex);
 		mutex_unlock(&dev_priv->cmdbuf_mutex);
 		return;
 	}
@@ -328,7 +334,7 @@ static int vmw_gb_context_unbind(struct vmw_resource *res,
 	BUG_ON(bo->mem.mem_type != VMW_PL_MOB);
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_state_kill(&uctx->cbs);
+	vmw_context_binding_state_scrub(&uctx->cbs);
 
 	submit_size = sizeof(*cmd2) + (readback ? sizeof(*cmd1) : 0);
 
@@ -378,10 +384,6 @@ static int vmw_gb_context_destroy(struct vmw_resource *res)
 		SVGA3dCmdHeader header;
 		SVGA3dCmdDestroyGBContext body;
 	} *cmd;
-	struct vmw_user_context *uctx =
-		container_of(res, struct vmw_user_context, res);
-
-	BUG_ON(!list_empty(&uctx->cbs.list));
 
 	if (likely(res->id == -1))
 		return 0;
@@ -528,8 +530,9 @@ out_unlock:
  * vmw_context_scrub_shader - scrub a shader binding from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  */
-static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi, bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -548,7 +551,8 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
 	cmd->header.size = sizeof(cmd->body);
 	cmd->body.cid = bi->ctx->id;
 	cmd->body.type = bi->i1.shader_type;
-	cmd->body.shid = SVGA3D_INVALID_ID;
+	cmd->body.shid =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
 	return 0;
@@ -559,8 +563,10 @@ static int vmw_context_scrub_shader(struct vmw_ctx_bindinfo *bi)
  * from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  */
-static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi,
+					   bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -579,7 +585,8 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
 	cmd->header.size = sizeof(cmd->body);
 	cmd->body.cid = bi->ctx->id;
 	cmd->body.type = bi->i1.rt_type;
-	cmd->body.target.sid = SVGA3D_INVALID_ID;
+	cmd->body.target.sid =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	cmd->body.target.face = 0;
 	cmd->body.target.mipmap = 0;
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
@@ -591,11 +598,13 @@ static int vmw_context_scrub_render_target(struct vmw_ctx_bindinfo *bi)
  * vmw_context_scrub_texture - scrub a texture binding from a context.
  *
  * @bi: single binding information.
+ * @rebind: Whether to issue a bind instead of scrub command.
  *
  * TODO: Possibly complement this function with a function that takes
  * a list of texture bindings and combines them to a single command.
  */
-static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi)
+static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi,
+				     bool rebind)
 {
 	struct vmw_private *dev_priv = bi->ctx->dev_priv;
 	struct {
@@ -619,7 +628,8 @@ static int vmw_context_scrub_texture(struct vmw_ctx_bindinfo *bi)
 	cmd->body.c.cid = bi->ctx->id;
 	cmd->body.s1.stage = bi->i1.texture_stage;
 	cmd->body.s1.name = SVGA3D_TS_BIND_TEXTURE;
-	cmd->body.s1.value = (uint32) SVGA3D_INVALID_ID;
+	cmd->body.s1.value =
+		cpu_to_le32((rebind) ? bi->res->id : SVGA3D_INVALID_ID);
 	vmw_fifo_commit(dev_priv, sizeof(*cmd));
 
 	return 0;
@@ -692,6 +702,7 @@ int vmw_context_binding_add(struct vmw_ctx_binding_state *cbs,
 		vmw_context_binding_drop(loc);
 
 	loc->bi = *bi;
+	loc->bi.scrubbed = false;
 	list_add_tail(&loc->ctx_list, &cbs->list);
 	INIT_LIST_HEAD(&loc->res_list);
 
@@ -727,12 +738,11 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs,
 	if (loc->bi.ctx != NULL)
 		vmw_context_binding_drop(loc);
 
-	loc->bi = *bi;
-	list_add_tail(&loc->ctx_list, &cbs->list);
-	if (bi->res != NULL)
+	if (bi->res != NULL) {
+		loc->bi = *bi;
+		list_add_tail(&loc->ctx_list, &cbs->list);
 		list_add_tail(&loc->res_list, &bi->res->binding_head);
-	else
-		INIT_LIST_HEAD(&loc->res_list);
+	}
 }
 
 /**
@@ -746,7 +756,10 @@ static void vmw_context_binding_transfer(struct vmw_ctx_binding_state *cbs,
  */
 static void vmw_context_binding_kill(struct vmw_ctx_binding *cb)
 {
-	(void) vmw_scrub_funcs[cb->bi.bt](&cb->bi);
+	if (!cb->bi.scrubbed) {
+		(void) vmw_scrub_funcs[cb->bi.bt](&cb->bi, false);
+		cb->bi.scrubbed = true;
+	}
 	vmw_context_binding_drop(cb);
 }
 
@@ -768,6 +781,27 @@ static void vmw_context_binding_state_kill(struct vmw_ctx_binding_state *cbs)
 }
 
 /**
+ * vmw_context_binding_state_scrub - Scrub all bindings associated with a
+ * struct vmw_ctx_binding state structure.
+ *
+ * @cbs: Pointer to the context binding state tracker.
+ *
+ * Emits commands to scrub all bindings associated with the
+ * context binding state tracker.
+ */
+static void vmw_context_binding_state_scrub(struct vmw_ctx_binding_state *cbs)
+{
+	struct vmw_ctx_binding *entry;
+
+	list_for_each_entry(entry, &cbs->list, ctx_list) {
+		if (!entry->bi.scrubbed) {
+			(void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false);
+			entry->bi.scrubbed = true;
+		}
+	}
+}
+
+/**
  * vmw_context_binding_res_list_kill - Kill all bindings on a
  * resource binding list
  *
@@ -785,6 +819,27 @@ void vmw_context_binding_res_list_kill(struct list_head *head)
 }
 
 /**
+ * vmw_context_binding_res_list_scrub - Scrub all bindings on a
+ * resource binding list
+ *
+ * @head: list head of resource binding list
+ *
+ * Scrub all bindings associated with a specific resource. Typically
+ * called before the resource is evicted.
+ */
+void vmw_context_binding_res_list_scrub(struct list_head *head)
+{
+	struct vmw_ctx_binding *entry;
+
+	list_for_each_entry(entry, head, res_list) {
+		if (!entry->bi.scrubbed) {
+			(void) vmw_scrub_funcs[entry->bi.bt](&entry->bi, false);
+			entry->bi.scrubbed = true;
+		}
+	}
+}
+
+/**
  * vmw_context_binding_state_transfer - Commit staged binding info
  *
  * @ctx: Pointer to context to commit the staged binding info to.
@@ -803,3 +858,50 @@ void vmw_context_binding_state_transfer(struct vmw_resource *ctx,
 	list_for_each_entry_safe(entry, next, &from->list, ctx_list)
 		vmw_context_binding_transfer(&uctx->cbs, &entry->bi);
 }
+
+/**
+ * vmw_context_rebind_all - Rebind all scrubbed bindings of a context
+ *
+ * @ctx: The context resource
+ *
+ * Walks through the context binding list and rebinds all scrubbed
+ * resources.
+ */
+int vmw_context_rebind_all(struct vmw_resource *ctx)
+{
+	struct vmw_ctx_binding *entry;
+	struct vmw_user_context *uctx =
+		container_of(ctx, struct vmw_user_context, res);
+	struct vmw_ctx_binding_state *cbs = &uctx->cbs;
+	int ret;
+
+	list_for_each_entry(entry, &cbs->list, ctx_list) {
+		if (likely(!entry->bi.scrubbed))
+			continue;
+
+		if (WARN_ON(entry->bi.res == NULL || entry->bi.res->id ==
+			    SVGA3D_INVALID_ID))
+			continue;
+
+		ret = vmw_scrub_funcs[entry->bi.bt](&entry->bi, true);
+		if (unlikely(ret != 0))
+			return ret;
+
+		entry->bi.scrubbed = false;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_context_binding_list - Return a list of context bindings
+ *
+ * @ctx: The context resource
+ *
+ * Returns the current list of bindings of the given context. Note that
+ * this list becomes stale as soon as the dev_priv::binding_mutex is unlocked.
+ */
+struct list_head *vmw_context_binding_list(struct vmw_resource *ctx)
+{
+	return &(container_of(ctx, struct vmw_user_context, res)->cbs.list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 9893328f8fdc..3bdc0adc656d 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -941,6 +941,7 @@ static void vmw_postclose(struct drm_device *dev,
 		drm_master_put(&vmw_fp->locked_master);
 	}
 
+	vmw_compat_shader_man_destroy(vmw_fp->shman);
 	ttm_object_file_release(&vmw_fp->tfile);
 	kfree(vmw_fp);
 }
@@ -960,11 +961,17 @@ static int vmw_driver_open(struct drm_device *dev, struct drm_file *file_priv)
 	if (unlikely(vmw_fp->tfile == NULL))
 		goto out_no_tfile;
 
+	vmw_fp->shman = vmw_compat_shader_man_create(dev_priv);
+	if (IS_ERR(vmw_fp->shman))
+		goto out_no_shman;
+
 	file_priv->driver_priv = vmw_fp;
 	dev_priv->bdev.dev_mapping = dev->dev_mapping;
 
 	return 0;
 
+out_no_shman:
+	ttm_object_file_release(&vmw_fp->tfile);
 out_no_tfile:
 	kfree(vmw_fp);
 	return ret;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index 554e7fa33082..ecaa302a6154 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -75,10 +75,14 @@
 #define VMW_RES_FENCE ttm_driver_type3
 #define VMW_RES_SHADER ttm_driver_type4
 
+struct vmw_compat_shader_manager;
+
 struct vmw_fpriv {
 	struct drm_master *locked_master;
 	struct ttm_object_file *tfile;
 	struct list_head fence_events;
+	bool gb_aware;
+	struct vmw_compat_shader_manager *shman;
 };
 
 struct vmw_dma_buffer {
@@ -272,6 +276,7 @@ struct vmw_ctx_bindinfo {
 	struct vmw_resource *ctx;
 	struct vmw_resource *res;
 	enum vmw_ctx_binding_type bt;
+	bool scrubbed;
 	union {
 		SVGA3dShaderType shader_type;
 		SVGA3dRenderTargetType rt_type;
@@ -318,7 +323,7 @@ struct vmw_sw_context{
 	struct drm_open_hash res_ht;
 	bool res_ht_initialized;
 	bool kernel; /**< is the called made from the kernel */
-	struct ttm_object_file *tfile;
+	struct vmw_fpriv *fp;
 	struct list_head validate_nodes;
 	struct vmw_relocation relocs[VMWGFX_MAX_RELOCATIONS];
 	uint32_t cur_reloc;
@@ -336,6 +341,7 @@ struct vmw_sw_context{
 	bool needs_post_query_barrier;
 	struct vmw_resource *error_resource;
 	struct vmw_ctx_binding_state staged_bindings;
+	struct list_head staged_shaders;
 };
 
 struct vmw_legacy_display;
@@ -569,6 +575,8 @@ struct vmw_user_resource_conv;
 
 extern void vmw_resource_unreference(struct vmw_resource **p_res);
 extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
+extern struct vmw_resource *
+vmw_resource_reference_unless_doomed(struct vmw_resource *res);
 extern int vmw_resource_validate(struct vmw_resource *res);
 extern int vmw_resource_reserve(struct vmw_resource *res, bool no_backup);
 extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -957,6 +965,9 @@ extern void
 vmw_context_binding_state_transfer(struct vmw_resource *res,
 				   struct vmw_ctx_binding_state *cbs);
 extern void vmw_context_binding_res_list_kill(struct list_head *head);
+extern void vmw_context_binding_res_list_scrub(struct list_head *head);
+extern int vmw_context_rebind_all(struct vmw_resource *ctx);
+extern struct list_head *vmw_context_binding_list(struct vmw_resource *ctx);
 
 /*
  * Surface management - vmwgfx_surface.c
@@ -991,6 +1002,28 @@ extern int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 				   struct drm_file *file_priv);
 extern int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data,
 				    struct drm_file *file_priv);
+extern int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man,
+				    SVGA3dShaderType shader_type,
+				    u32 *user_key);
+extern void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man,
+				      struct list_head *list);
+extern void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man,
+				      struct list_head *list);
+extern int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man,
+				    u32 user_key,
+				    SVGA3dShaderType shader_type,
+				    struct list_head *list);
+extern int vmw_compat_shader_add(struct vmw_compat_shader_manager *man,
+				 u32 user_key, const void *bytecode,
+				 SVGA3dShaderType shader_type,
+				 size_t size,
+				 struct ttm_object_file *tfile,
+				 struct list_head *list);
+extern struct vmw_compat_shader_manager *
+vmw_compat_shader_man_create(struct vmw_private *dev_priv);
+extern void
+vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man);
+
 
 /**
  * Inline helper functions
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index 7a5f1eb55c5a..269b85cc875a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -114,8 +114,10 @@ static void vmw_resource_list_unreserve(struct list_head *list,
 		 * persistent context binding tracker.
 		 */
 		if (unlikely(val->staged_bindings)) {
-			vmw_context_binding_state_transfer
-				(val->res, val->staged_bindings);
+			if (!backoff) {
+				vmw_context_binding_state_transfer
+					(val->res, val->staged_bindings);
+			}
 			kfree(val->staged_bindings);
 			val->staged_bindings = NULL;
 		}
@@ -178,6 +180,44 @@ static int vmw_resource_val_add(struct vmw_sw_context *sw_context,
 }
 
 /**
+ * vmw_resource_context_res_add - Put resources previously bound to a context on
+ * the validation list
+ *
+ * @dev_priv: Pointer to a device private structure
+ * @sw_context: Pointer to a software context used for this command submission
+ * @ctx: Pointer to the context resource
+ *
+ * This function puts all resources that were previously bound to @ctx on
+ * the resource validation list. This is part of the context state reemission
+ */
+static int vmw_resource_context_res_add(struct vmw_private *dev_priv,
+					struct vmw_sw_context *sw_context,
+					struct vmw_resource *ctx)
+{
+	struct list_head *binding_list;
+	struct vmw_ctx_binding *entry;
+	int ret = 0;
+	struct vmw_resource *res;
+
+	mutex_lock(&dev_priv->binding_mutex);
+	binding_list = vmw_context_binding_list(ctx);
+
+	list_for_each_entry(entry, binding_list, ctx_list) {
+		res = vmw_resource_reference_unless_doomed(entry->bi.res);
+		if (unlikely(res == NULL))
+			continue;
+
+		ret = vmw_resource_val_add(sw_context, entry->bi.res, NULL);
+		vmw_resource_unreference(&res);
+		if (unlikely(ret != 0))
+			break;
+	}
+
+	mutex_unlock(&dev_priv->binding_mutex);
+	return ret;
+}
+
+/**
  * vmw_resource_relocation_add - Add a relocation to the relocation list
  *
  * @list: Pointer to head of relocation list.
@@ -233,8 +273,12 @@ static void vmw_resource_relocations_apply(uint32_t *cb,
 {
 	struct vmw_resource_relocation *rel;
 
-	list_for_each_entry(rel, list, head)
-		cb[rel->offset] = rel->res->id;
+	list_for_each_entry(rel, list, head) {
+		if (likely(rel->res != NULL))
+			cb[rel->offset] = rel->res->id;
+		else
+			cb[rel->offset] = SVGA_3D_CMD_NOP;
+	}
 }
 
 static int vmw_cmd_invalid(struct vmw_private *dev_priv,
@@ -379,22 +423,27 @@ static int vmw_resources_validate(struct vmw_sw_context *sw_context)
 }
 
 /**
- * vmw_cmd_res_check - Check that a resource is present and if so, put it
+ * vmw_cmd_compat_res_check - Check that a resource is present and if so, put it
  * on the resource validate list unless it's already there.
  *
  * @dev_priv: Pointer to a device private structure.
  * @sw_context: Pointer to the software context.
  * @res_type: Resource type.
  * @converter: User-space visisble type specific information.
- * @id: Pointer to the location in the command buffer currently being
+ * @id: user-space resource id handle.
+ * @id_loc: Pointer to the location in the command buffer currently being
  * parsed from where the user-space resource id handle is located.
+ * @p_val: Pointer to pointer to resource validalidation node. Populated
+ * on exit.
  */
-static int vmw_cmd_res_check(struct vmw_private *dev_priv,
-			     struct vmw_sw_context *sw_context,
-			     enum vmw_res_type res_type,
-			     const struct vmw_user_resource_conv *converter,
-			     uint32_t *id,
-			     struct vmw_resource_val_node **p_val)
+static int
+vmw_cmd_compat_res_check(struct vmw_private *dev_priv,
+			 struct vmw_sw_context *sw_context,
+			 enum vmw_res_type res_type,
+			 const struct vmw_user_resource_conv *converter,
+			 uint32_t id,
+			 uint32_t *id_loc,
+			 struct vmw_resource_val_node **p_val)
 {
 	struct vmw_res_cache_entry *rcache =
 		&sw_context->res_cache[res_type];
@@ -402,7 +451,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 	struct vmw_resource_val_node *node;
 	int ret;
 
-	if (*id == SVGA3D_INVALID_ID) {
+	if (id == SVGA3D_INVALID_ID) {
 		if (p_val)
 			*p_val = NULL;
 		if (res_type == vmw_res_context) {
@@ -417,7 +466,7 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 	 * resource
 	 */
 
-	if (likely(rcache->valid && *id == rcache->handle)) {
+	if (likely(rcache->valid && id == rcache->handle)) {
 		const struct vmw_resource *res = rcache->res;
 
 		rcache->node->first_usage = false;
@@ -426,28 +475,28 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 
 		return vmw_resource_relocation_add
 			(&sw_context->res_relocations, res,
-			 id - sw_context->buf_start);
+			 id_loc - sw_context->buf_start);
 	}
 
 	ret = vmw_user_resource_lookup_handle(dev_priv,
-					      sw_context->tfile,
-					      *id,
+					      sw_context->fp->tfile,
+					      id,
 					      converter,
 					      &res);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use resource 0x%08x.\n",
-			  (unsigned) *id);
+			  (unsigned) id);
 		dump_stack();
 		return ret;
 	}
 
 	rcache->valid = true;
 	rcache->res = res;
-	rcache->handle = *id;
+	rcache->handle = id;
 
 	ret = vmw_resource_relocation_add(&sw_context->res_relocations,
 					  res,
-					  id - sw_context->buf_start);
+					  id_loc - sw_context->buf_start);
 	if (unlikely(ret != 0))
 		goto out_no_reloc;
 
@@ -459,7 +508,11 @@ static int vmw_cmd_res_check(struct vmw_private *dev_priv,
 	if (p_val)
 		*p_val = node;
 
-	if (node->first_usage && res_type == vmw_res_context) {
+	if (dev_priv->has_mob && node->first_usage &&
+	    res_type == vmw_res_context) {
+		ret = vmw_resource_context_res_add(dev_priv, sw_context, res);
+		if (unlikely(ret != 0))
+			goto out_no_reloc;
 		node->staged_bindings =
 			kzalloc(sizeof(*node->staged_bindings), GFP_KERNEL);
 		if (node->staged_bindings == NULL) {
@@ -481,6 +534,59 @@ out_no_reloc:
 }
 
 /**
+ * vmw_cmd_res_check - Check that a resource is present and if so, put it
+ * on the resource validate list unless it's already there.
+ *
+ * @dev_priv: Pointer to a device private structure.
+ * @sw_context: Pointer to the software context.
+ * @res_type: Resource type.
+ * @converter: User-space visisble type specific information.
+ * @id_loc: Pointer to the location in the command buffer currently being
+ * parsed from where the user-space resource id handle is located.
+ * @p_val: Pointer to pointer to resource validalidation node. Populated
+ * on exit.
+ */
+static int
+vmw_cmd_res_check(struct vmw_private *dev_priv,
+		  struct vmw_sw_context *sw_context,
+		  enum vmw_res_type res_type,
+		  const struct vmw_user_resource_conv *converter,
+		  uint32_t *id_loc,
+		  struct vmw_resource_val_node **p_val)
+{
+	return vmw_cmd_compat_res_check(dev_priv, sw_context, res_type,
+					converter, *id_loc, id_loc, p_val);
+}
+
+/**
+ * vmw_rebind_contexts - Rebind all resources previously bound to
+ * referenced contexts.
+ *
+ * @sw_context: Pointer to the software context.
+ *
+ * Rebind context binding points that have been scrubbed because of eviction.
+ */
+static int vmw_rebind_contexts(struct vmw_sw_context *sw_context)
+{
+	struct vmw_resource_val_node *val;
+	int ret;
+
+	list_for_each_entry(val, &sw_context->resource_list, head) {
+		if (likely(!val->staged_bindings))
+			continue;
+
+		ret = vmw_context_rebind_all(val->res);
+		if (unlikely(ret != 0)) {
+			if (ret != -ERESTARTSYS)
+				DRM_ERROR("Failed to rebind context.\n");
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
  * vmw_cmd_cid_check - Check a command header for valid context information.
  *
  * @dev_priv: Pointer to a device private structure.
@@ -767,7 +873,7 @@ static int vmw_translate_mob_ptr(struct vmw_private *dev_priv,
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
+	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use MOB buffer.\n");
 		return -EINVAL;
@@ -828,7 +934,7 @@ static int vmw_translate_guest_ptr(struct vmw_private *dev_priv,
 	struct vmw_relocation *reloc;
 	int ret;
 
-	ret = vmw_user_dmabuf_lookup(sw_context->tfile, handle, &vmw_bo);
+	ret = vmw_user_dmabuf_lookup(sw_context->fp->tfile, handle, &vmw_bo);
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("Could not find or use GMR region.\n");
 		return -EINVAL;
@@ -1127,7 +1233,8 @@ static int vmw_cmd_dma(struct vmw_private *dev_priv,
 
 	srf = vmw_res_to_srf(sw_context->res_cache[vmw_res_surface].res);
 
-	vmw_kms_cursor_snoop(srf, sw_context->tfile, &vmw_bo->base, header);
+	vmw_kms_cursor_snoop(srf, sw_context->fp->tfile, &vmw_bo->base,
+			     header);
 
 out_no_surface:
 	vmw_dmabuf_unreference(&vmw_bo);
@@ -1478,6 +1585,98 @@ static int vmw_cmd_invalidate_gb_surface(struct vmw_private *dev_priv,
 				 &cmd->body.sid, NULL);
 }
 
+
+/**
+ * vmw_cmd_shader_define - Validate an SVGA_3D_CMD_SHADER_DEFINE
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_shader_define(struct vmw_private *dev_priv,
+				 struct vmw_sw_context *sw_context,
+				 SVGA3dCmdHeader *header)
+{
+	struct vmw_shader_define_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDefineShader body;
+	} *cmd;
+	int ret;
+	size_t size;
+
+	cmd = container_of(header, struct vmw_shader_define_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(!dev_priv->has_mob))
+		return 0;
+
+	size = cmd->header.size - sizeof(cmd->body);
+	ret = vmw_compat_shader_add(sw_context->fp->shman,
+				    cmd->body.shid, cmd + 1,
+				    cmd->body.type, size,
+				    sw_context->fp->tfile,
+				    &sw_context->staged_shaders);
+	if (unlikely(ret != 0))
+		return ret;
+
+	return vmw_resource_relocation_add(&sw_context->res_relocations,
+					   NULL, &cmd->header.id -
+					   sw_context->buf_start);
+
+	return 0;
+}
+
+/**
+ * vmw_cmd_shader_destroy - Validate an SVGA_3D_CMD_SHADER_DESTROY
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_shader_destroy(struct vmw_private *dev_priv,
+				  struct vmw_sw_context *sw_context,
+				  SVGA3dCmdHeader *header)
+{
+	struct vmw_shader_destroy_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdDestroyShader body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_shader_destroy_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (unlikely(!dev_priv->has_mob))
+		return 0;
+
+	ret = vmw_compat_shader_remove(sw_context->fp->shman,
+				       cmd->body.shid,
+				       cmd->body.type,
+				       &sw_context->staged_shaders);
+	if (unlikely(ret != 0))
+		return ret;
+
+	return vmw_resource_relocation_add(&sw_context->res_relocations,
+					   NULL, &cmd->header.id -
+					   sw_context->buf_start);
+
+	return 0;
+}
+
 /**
  * vmw_cmd_set_shader - Validate an SVGA_3D_CMD_SET_SHADER
  * command
@@ -1509,10 +1708,18 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 	if (dev_priv->has_mob) {
 		struct vmw_ctx_bindinfo bi;
 		struct vmw_resource_val_node *res_node;
-
-		ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_shader,
-					user_shader_converter,
-					&cmd->body.shid, &res_node);
+		u32 shid = cmd->body.shid;
+
+		if (shid != SVGA3D_INVALID_ID)
+			(void) vmw_compat_shader_lookup(sw_context->fp->shman,
+							cmd->body.type,
+							&shid);
+
+		ret = vmw_cmd_compat_res_check(dev_priv, sw_context,
+					       vmw_res_shader,
+					       user_shader_converter,
+					       shid,
+					       &cmd->body.shid, &res_node);
 		if (unlikely(ret != 0))
 			return ret;
 
@@ -1527,6 +1734,39 @@ static int vmw_cmd_set_shader(struct vmw_private *dev_priv,
 }
 
 /**
+ * vmw_cmd_set_shader_const - Validate an SVGA_3D_CMD_SET_SHADER_CONST
+ * command
+ *
+ * @dev_priv: Pointer to a device private struct.
+ * @sw_context: The software context being used for this batch.
+ * @header: Pointer to the command header in the command stream.
+ */
+static int vmw_cmd_set_shader_const(struct vmw_private *dev_priv,
+				    struct vmw_sw_context *sw_context,
+				    SVGA3dCmdHeader *header)
+{
+	struct vmw_set_shader_const_cmd {
+		SVGA3dCmdHeader header;
+		SVGA3dCmdSetShaderConst body;
+	} *cmd;
+	int ret;
+
+	cmd = container_of(header, struct vmw_set_shader_const_cmd,
+			   header);
+
+	ret = vmw_cmd_res_check(dev_priv, sw_context, vmw_res_context,
+				user_context_converter, &cmd->body.cid,
+				NULL);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (dev_priv->has_mob)
+		header->id = SVGA_3D_CMD_SET_GB_SHADERCONSTS_INLINE;
+
+	return 0;
+}
+
+/**
  * vmw_cmd_bind_gb_shader - Validate an SVGA_3D_CMD_BIND_GB_SHADER
  * command
  *
@@ -1634,14 +1874,14 @@ static const struct vmw_cmd_entry const vmw_cmd_entries[SVGA_3D_CMD_MAX] = {
 		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_PRESENT, &vmw_cmd_present_check,
 		    false, false, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_cid_check,
-		    true, true, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_cid_check,
-		    true, true, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DEFINE, &vmw_cmd_shader_define,
+		    true, false, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SHADER_DESTROY, &vmw_cmd_shader_destroy,
+		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER, &vmw_cmd_set_shader,
 		    true, false, false),
-	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_cid_check,
-		    true, true, false),
+	VMW_CMD_DEF(SVGA_3D_CMD_SET_SHADER_CONST, &vmw_cmd_set_shader_const,
+		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_DRAW_PRIMITIVES, &vmw_cmd_draw,
 		    true, false, false),
 	VMW_CMD_DEF(SVGA_3D_CMD_SETSCISSORRECT, &vmw_cmd_cid_check,
@@ -2171,7 +2411,7 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	} else
 		sw_context->kernel = true;
 
-	sw_context->tfile = vmw_fpriv(file_priv)->tfile;
+	sw_context->fp = vmw_fpriv(file_priv);
 	sw_context->cur_reloc = 0;
 	sw_context->cur_val_buf = 0;
 	sw_context->fence_flags = 0;
@@ -2188,16 +2428,17 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 			goto out_unlock;
 		sw_context->res_ht_initialized = true;
 	}
+	INIT_LIST_HEAD(&sw_context->staged_shaders);
 
 	INIT_LIST_HEAD(&resource_list);
 	ret = vmw_cmd_check_all(dev_priv, sw_context, kernel_commands,
 				command_size);
 	if (unlikely(ret != 0))
-		goto out_err;
+		goto out_err_nores;
 
 	ret = vmw_resources_reserve(sw_context);
 	if (unlikely(ret != 0))
-		goto out_err;
+		goto out_err_nores;
 
 	ret = ttm_eu_reserve_buffers(&ticket, &sw_context->validate_nodes);
 	if (unlikely(ret != 0))
@@ -2225,6 +2466,12 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 		goto out_err;
 	}
 
+	if (dev_priv->has_mob) {
+		ret = vmw_rebind_contexts(sw_context);
+		if (unlikely(ret != 0))
+			goto out_err;
+	}
+
 	cmd = vmw_fifo_reserve(dev_priv, command_size);
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving fifo space for commands.\n");
@@ -2276,6 +2523,8 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 	}
 
 	list_splice_init(&sw_context->resource_list, &resource_list);
+	vmw_compat_shaders_commit(sw_context->fp->shman,
+				  &sw_context->staged_shaders);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
 	/*
@@ -2289,10 +2538,11 @@ int vmw_execbuf_process(struct drm_file *file_priv,
 out_unlock_binding:
 	mutex_unlock(&dev_priv->binding_mutex);
 out_err:
-	vmw_resource_relocations_free(&sw_context->res_relocations);
-	vmw_free_relocations(sw_context);
 	ttm_eu_backoff_reservation(&ticket, &sw_context->validate_nodes);
+out_err_nores:
 	vmw_resource_list_unreserve(&sw_context->resource_list, true);
+	vmw_resource_relocations_free(&sw_context->res_relocations);
+	vmw_free_relocations(sw_context);
 	vmw_clear_validations(sw_context);
 	if (unlikely(dev_priv->pinned_bo != NULL &&
 		     !dev_priv->query_cid_valid))
@@ -2301,6 +2551,8 @@ out_unlock:
 	list_splice_init(&sw_context->resource_list, &resource_list);
 	error_resource = sw_context->error_resource;
 	sw_context->error_resource = NULL;
+	vmw_compat_shaders_revert(sw_context->fp->shman,
+				  &sw_context->staged_shaders);
 	mutex_unlock(&dev_priv->cmdbuf_mutex);
 
 	/*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
index 116c49736763..f9881f9e62bd 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ioctl.c
@@ -29,12 +29,18 @@
 #include <drm/vmwgfx_drm.h>
 #include "vmwgfx_kms.h"
 
+struct svga_3d_compat_cap {
+	SVGA3dCapsRecordHeader header;
+	SVGA3dCapPair pairs[SVGA3D_DEVCAP_MAX];
+};
+
 int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
 	struct drm_vmw_getparam_arg *param =
 	    (struct drm_vmw_getparam_arg *)data;
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
 	switch (param->param) {
 	case DRM_VMW_PARAM_NUM_STREAMS:
@@ -60,6 +66,11 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		__le32 __iomem *fifo_mem = dev_priv->mmio_virt;
 		const struct vmw_fifo_state *fifo = &dev_priv->fifo;
 
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS)) {
+			param->value = SVGA3D_HWVERSION_WS8_B1;
+			break;
+		}
+
 		param->value =
 			ioread32(fifo_mem +
 				 ((fifo->capabilities &
@@ -69,17 +80,26 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	case DRM_VMW_PARAM_MAX_SURF_MEMORY:
-		param->value = dev_priv->memory_size;
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) &&
+		    !vmw_fp->gb_aware)
+			param->value = dev_priv->max_mob_pages * PAGE_SIZE / 2;
+		else
+			param->value = dev_priv->memory_size;
 		break;
 	case DRM_VMW_PARAM_3D_CAPS_SIZE:
-		if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS)
-			param->value = SVGA3D_DEVCAP_MAX;
+		if ((dev_priv->capabilities & SVGA_CAP_GBOBJECTS) &&
+		    vmw_fp->gb_aware)
+			param->value = SVGA3D_DEVCAP_MAX * sizeof(uint32_t);
+		else if (dev_priv->capabilities & SVGA_CAP_GBOBJECTS)
+			param->value = sizeof(struct svga_3d_compat_cap) +
+				sizeof(uint32_t);
 		else
 			param->value = (SVGA_FIFO_3D_CAPS_LAST -
-					SVGA_FIFO_3D_CAPS + 1);
-		param->value *= sizeof(uint32_t);
+					SVGA_FIFO_3D_CAPS + 1) *
+				sizeof(uint32_t);
 		break;
 	case DRM_VMW_PARAM_MAX_MOB_MEMORY:
+		vmw_fp->gb_aware = true;
 		param->value = dev_priv->max_mob_pages * PAGE_SIZE;
 		break;
 	default:
@@ -91,6 +111,38 @@ int vmw_getparam_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }
 
+static int vmw_fill_compat_cap(struct vmw_private *dev_priv, void *bounce,
+			       size_t size)
+{
+	struct svga_3d_compat_cap *compat_cap =
+		(struct svga_3d_compat_cap *) bounce;
+	unsigned int i;
+	size_t pair_offset = offsetof(struct svga_3d_compat_cap, pairs);
+	unsigned int max_size;
+
+	if (size < pair_offset)
+		return -EINVAL;
+
+	max_size = (size - pair_offset) / sizeof(SVGA3dCapPair);
+
+	if (max_size > SVGA3D_DEVCAP_MAX)
+		max_size = SVGA3D_DEVCAP_MAX;
+
+	compat_cap->header.length =
+		(pair_offset + max_size * sizeof(SVGA3dCapPair)) / sizeof(u32);
+	compat_cap->header.type = SVGA3DCAPS_RECORD_DEVCAPS;
+
+	mutex_lock(&dev_priv->hw_mutex);
+	for (i = 0; i < max_size; ++i) {
+		vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
+		compat_cap->pairs[i][0] = i;
+		compat_cap->pairs[i][1] = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
+	}
+	mutex_unlock(&dev_priv->hw_mutex);
+
+	return 0;
+}
+
 
 int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 			 struct drm_file *file_priv)
@@ -104,41 +156,49 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	void *bounce;
 	int ret;
 	bool gb_objects = !!(dev_priv->capabilities & SVGA_CAP_GBOBJECTS);
+	struct vmw_fpriv *vmw_fp = vmw_fpriv(file_priv);
 
 	if (unlikely(arg->pad64 != 0)) {
 		DRM_ERROR("Illegal GET_3D_CAP argument.\n");
 		return -EINVAL;
 	}
 
-	if (gb_objects)
-		size = SVGA3D_DEVCAP_MAX;
+	if (gb_objects && vmw_fp->gb_aware)
+		size = SVGA3D_DEVCAP_MAX * sizeof(uint32_t);
+	else if (gb_objects)
+		size = sizeof(struct svga_3d_compat_cap) + sizeof(uint32_t);
 	else
-		size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1);
-
-	size *= sizeof(uint32_t);
+		size = (SVGA_FIFO_3D_CAPS_LAST - SVGA_FIFO_3D_CAPS + 1) *
+			sizeof(uint32_t);
 
 	if (arg->max_size < size)
 		size = arg->max_size;
 
-	bounce = vmalloc(size);
+	bounce = vzalloc(size);
 	if (unlikely(bounce == NULL)) {
 		DRM_ERROR("Failed to allocate bounce buffer for 3D caps.\n");
 		return -ENOMEM;
 	}
 
-	if (gb_objects) {
-		int i;
+	if (gb_objects && vmw_fp->gb_aware) {
+		int i, num;
 		uint32_t *bounce32 = (uint32_t *) bounce;
 
+		num = size / sizeof(uint32_t);
+		if (num > SVGA3D_DEVCAP_MAX)
+			num = SVGA3D_DEVCAP_MAX;
+
 		mutex_lock(&dev_priv->hw_mutex);
-		for (i = 0; i < SVGA3D_DEVCAP_MAX; ++i) {
+		for (i = 0; i < num; ++i) {
 			vmw_write(dev_priv, SVGA_REG_DEV_CAP, i);
 			*bounce32++ = vmw_read(dev_priv, SVGA_REG_DEV_CAP);
 		}
 		mutex_unlock(&dev_priv->hw_mutex);
-
+	} else if (gb_objects) {
+		ret = vmw_fill_compat_cap(dev_priv, bounce, size);
+		if (unlikely(ret != 0))
+			goto out_err;
 	} else {
-
 		fifo_mem = dev_priv->mmio_virt;
 		memcpy_fromio(bounce, &fifo_mem[SVGA_FIFO_3D_CAPS], size);
 	}
@@ -146,6 +206,7 @@ int vmw_get_cap_3d_ioctl(struct drm_device *dev, void *data,
 	ret = copy_to_user(buffer, bounce, size);
 	if (ret)
 		ret = -EFAULT;
+out_err:
 	vfree(bounce);
 
 	if (unlikely(ret != 0))
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
index 4910e7b81811..d4a5a19cb8c3 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_mob.c
@@ -134,6 +134,7 @@ static int vmw_setup_otable_base(struct vmw_private *dev_priv,
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
 		DRM_ERROR("Failed reserving FIFO space for OTable setup.\n");
+		ret = -ENOMEM;
 		goto out_no_fifo;
 	}
 
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6fdd82d42f65..2aa4bc6a4d60 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -88,6 +88,11 @@ struct vmw_resource *vmw_resource_reference(struct vmw_resource *res)
 	return res;
 }
 
+struct vmw_resource *
+vmw_resource_reference_unless_doomed(struct vmw_resource *res)
+{
+	return kref_get_unless_zero(&res->kref) ? res : NULL;
+}
 
 /**
  * vmw_resource_release_id - release a resource id to the id manager.
@@ -136,8 +141,12 @@ static void vmw_resource_release(struct kref *kref)
 		vmw_dmabuf_unreference(&res->backup);
 	}
 
-	if (likely(res->hw_destroy != NULL))
+	if (likely(res->hw_destroy != NULL)) {
 		res->hw_destroy(res);
+		mutex_lock(&dev_priv->binding_mutex);
+		vmw_context_binding_res_list_kill(&res->binding_head);
+		mutex_unlock(&dev_priv->binding_mutex);
+	}
 
 	id = res->id;
 	if (res->res_free != NULL)
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
index 1457ec4b7125..217d941b8176 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_shader.c
@@ -29,6 +29,8 @@
 #include "vmwgfx_resource_priv.h"
 #include "ttm/ttm_placement.h"
 
+#define VMW_COMPAT_SHADER_HT_ORDER 12
+
 struct vmw_shader {
 	struct vmw_resource res;
 	SVGA3dShaderType type;
@@ -40,6 +42,50 @@ struct vmw_user_shader {
 	struct vmw_shader shader;
 };
 
+/**
+ * enum vmw_compat_shader_state - Staging state for compat shaders
+ */
+enum vmw_compat_shader_state {
+	VMW_COMPAT_COMMITED,
+	VMW_COMPAT_ADD,
+	VMW_COMPAT_DEL
+};
+
+/**
+ * struct vmw_compat_shader - Metadata for compat shaders.
+ *
+ * @handle: The TTM handle of the guest backed shader.
+ * @tfile: The struct ttm_object_file the guest backed shader is registered
+ * with.
+ * @hash: Hash item for lookup.
+ * @head: List head for staging lists or the compat shader manager list.
+ * @state: Staging state.
+ *
+ * The structure is protected by the cmdbuf lock.
+ */
+struct vmw_compat_shader {
+	u32 handle;
+	struct ttm_object_file *tfile;
+	struct drm_hash_item hash;
+	struct list_head head;
+	enum vmw_compat_shader_state state;
+};
+
+/**
+ * struct vmw_compat_shader_manager - Compat shader manager.
+ *
+ * @shaders: Hash table containing staged and commited compat shaders
+ * @list: List of commited shaders.
+ * @dev_priv: Pointer to a device private structure.
+ *
+ * @shaders and @list are protected by the cmdbuf mutex for now.
+ */
+struct vmw_compat_shader_manager {
+	struct drm_open_hash shaders;
+	struct list_head list;
+	struct vmw_private *dev_priv;
+};
+
 static void vmw_user_shader_free(struct vmw_resource *res);
 static struct vmw_resource *
 vmw_user_shader_base_to_res(struct ttm_base_object *base);
@@ -258,7 +304,7 @@ static int vmw_gb_shader_destroy(struct vmw_resource *res)
 		return 0;
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_res_list_kill(&res->binding_head);
+	vmw_context_binding_res_list_scrub(&res->binding_head);
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {
@@ -325,13 +371,81 @@ int vmw_shader_destroy_ioctl(struct drm_device *dev, void *data,
 					 TTM_REF_USAGE);
 }
 
+int vmw_shader_alloc(struct vmw_private *dev_priv,
+		     struct vmw_dma_buffer *buffer,
+		     size_t shader_size,
+		     size_t offset,
+		     SVGA3dShaderType shader_type,
+		     struct ttm_object_file *tfile,
+		     u32 *handle)
+{
+	struct vmw_user_shader *ushader;
+	struct vmw_resource *res, *tmp;
+	int ret;
+
+	/*
+	 * Approximate idr memory usage with 128 bytes. It will be limited
+	 * by maximum number_of shaders anyway.
+	 */
+	if (unlikely(vmw_user_shader_size == 0))
+		vmw_user_shader_size =
+			ttm_round_pot(sizeof(struct vmw_user_shader)) + 128;
+
+	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
+				   vmw_user_shader_size,
+				   false, true);
+	if (unlikely(ret != 0)) {
+		if (ret != -ERESTARTSYS)
+			DRM_ERROR("Out of graphics memory for shader "
+				  "creation.\n");
+		goto out;
+	}
+
+	ushader = kzalloc(sizeof(*ushader), GFP_KERNEL);
+	if (unlikely(ushader == NULL)) {
+		ttm_mem_global_free(vmw_mem_glob(dev_priv),
+				    vmw_user_shader_size);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	res = &ushader->shader.res;
+	ushader->base.shareable = false;
+	ushader->base.tfile = NULL;
+
+	/*
+	 * From here on, the destructor takes over resource freeing.
+	 */
+
+	ret = vmw_gb_shader_init(dev_priv, res, shader_size,
+				 offset, shader_type, buffer,
+				 vmw_user_shader_free);
+	if (unlikely(ret != 0))
+		goto out;
+
+	tmp = vmw_resource_reference(res);
+	ret = ttm_base_object_init(tfile, &ushader->base, false,
+				   VMW_RES_SHADER,
+				   &vmw_user_shader_base_release, NULL);
+
+	if (unlikely(ret != 0)) {
+		vmw_resource_unreference(&tmp);
+		goto out_err;
+	}
+
+	if (handle)
+		*handle = ushader->base.hash.key;
+out_err:
+	vmw_resource_unreference(&res);
+out:
+	return ret;
+}
+
+
 int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv)
 {
 	struct vmw_private *dev_priv = vmw_priv(dev);
-	struct vmw_user_shader *ushader;
-	struct vmw_resource *res;
-	struct vmw_resource *tmp;
 	struct drm_vmw_shader_create_arg *arg =
 		(struct drm_vmw_shader_create_arg *)data;
 	struct ttm_object_file *tfile = vmw_fpriv(file_priv)->tfile;
@@ -373,69 +487,324 @@ int vmw_shader_define_ioctl(struct drm_device *dev, void *data,
 		goto out_bad_arg;
 	}
 
-	/*
-	 * Approximate idr memory usage with 128 bytes. It will be limited
-	 * by maximum number_of shaders anyway.
-	 */
+	ret = ttm_read_lock(&vmaster->lock, true);
+	if (unlikely(ret != 0))
+		goto out_bad_arg;
 
-	if (unlikely(vmw_user_shader_size == 0))
-		vmw_user_shader_size = ttm_round_pot(sizeof(*ushader))
-			+ 128;
+	ret = vmw_shader_alloc(dev_priv, buffer, arg->size, arg->offset,
+			       shader_type, tfile, &arg->shader_handle);
 
-	ret = ttm_read_lock(&vmaster->lock, true);
+	ttm_read_unlock(&vmaster->lock);
+out_bad_arg:
+	vmw_dmabuf_unreference(&buffer);
+	return ret;
+}
+
+/**
+ * vmw_compat_shader_lookup - Look up a compat shader
+ *
+ * @man: Pointer to the compat shader manager.
+ * @shader_type: The shader type, that combined with the user_key identifies
+ * the shader.
+ * @user_key: On entry, this should be a pointer to the user_key.
+ * On successful exit, it will contain the guest-backed shader's TTM handle.
+ *
+ * Returns 0 on success. Non-zero on failure, in which case the value pointed
+ * to by @user_key is unmodified.
+ */
+int vmw_compat_shader_lookup(struct vmw_compat_shader_manager *man,
+			     SVGA3dShaderType shader_type,
+			     u32 *user_key)
+{
+	struct drm_hash_item *hash;
+	int ret;
+	unsigned long key = *user_key | (shader_type << 24);
+
+	ret = drm_ht_find_item(&man->shaders, key, &hash);
 	if (unlikely(ret != 0))
 		return ret;
 
-	ret = ttm_mem_global_alloc(vmw_mem_glob(dev_priv),
-				   vmw_user_shader_size,
-				   false, true);
-	if (unlikely(ret != 0)) {
-		if (ret != -ERESTARTSYS)
-			DRM_ERROR("Out of graphics memory for shader"
-				  " creation.\n");
-		goto out_unlock;
+	*user_key = drm_hash_entry(hash, struct vmw_compat_shader,
+				   hash)->handle;
+
+	return 0;
+}
+
+/**
+ * vmw_compat_shader_free - Free a compat shader.
+ *
+ * @man: Pointer to the compat shader manager.
+ * @entry: Pointer to a struct vmw_compat_shader.
+ *
+ * Frees a struct vmw_compat_shder entry and drops its reference to the
+ * guest backed shader.
+ */
+static void vmw_compat_shader_free(struct vmw_compat_shader_manager *man,
+				   struct vmw_compat_shader *entry)
+{
+	list_del(&entry->head);
+	WARN_ON(drm_ht_remove_item(&man->shaders, &entry->hash));
+	WARN_ON(ttm_ref_object_base_unref(entry->tfile, entry->handle,
+					  TTM_REF_USAGE));
+	kfree(entry);
+}
+
+/**
+ * vmw_compat_shaders_commit - Commit a list of compat shader actions.
+ *
+ * @man: Pointer to the compat shader manager.
+ * @list: Caller's list of compat shader actions.
+ *
+ * This function commits a list of compat shader additions or removals.
+ * It is typically called when the execbuf ioctl call triggering these
+ * actions has commited the fifo contents to the device.
+ */
+void vmw_compat_shaders_commit(struct vmw_compat_shader_manager *man,
+			       struct list_head *list)
+{
+	struct vmw_compat_shader *entry, *next;
+
+	list_for_each_entry_safe(entry, next, list, head) {
+		list_del(&entry->head);
+		switch (entry->state) {
+		case VMW_COMPAT_ADD:
+			entry->state = VMW_COMPAT_COMMITED;
+			list_add_tail(&entry->head, &man->list);
+			break;
+		case VMW_COMPAT_DEL:
+			ttm_ref_object_base_unref(entry->tfile, entry->handle,
+						  TTM_REF_USAGE);
+			kfree(entry);
+			break;
+		default:
+			BUG();
+			break;
+		}
 	}
+}
 
-	ushader = kzalloc(sizeof(*ushader), GFP_KERNEL);
-	if (unlikely(ushader == NULL)) {
-		ttm_mem_global_free(vmw_mem_glob(dev_priv),
-				    vmw_user_shader_size);
-		ret = -ENOMEM;
-		goto out_unlock;
+/**
+ * vmw_compat_shaders_revert - Revert a list of compat shader actions
+ *
+ * @man: Pointer to the compat shader manager.
+ * @list: Caller's list of compat shader actions.
+ *
+ * This function reverts a list of compat shader additions or removals.
+ * It is typically called when the execbuf ioctl call triggering these
+ * actions failed for some reason, and the command stream was never
+ * submitted.
+ */
+void vmw_compat_shaders_revert(struct vmw_compat_shader_manager *man,
+			       struct list_head *list)
+{
+	struct vmw_compat_shader *entry, *next;
+	int ret;
+
+	list_for_each_entry_safe(entry, next, list, head) {
+		switch (entry->state) {
+		case VMW_COMPAT_ADD:
+			vmw_compat_shader_free(man, entry);
+			break;
+		case VMW_COMPAT_DEL:
+			ret = drm_ht_insert_item(&man->shaders, &entry->hash);
+			list_del(&entry->head);
+			list_add_tail(&entry->head, &man->list);
+			entry->state = VMW_COMPAT_COMMITED;
+			break;
+		default:
+			BUG();
+			break;
+		}
 	}
+}
 
-	res = &ushader->shader.res;
-	ushader->base.shareable = false;
-	ushader->base.tfile = NULL;
+/**
+ * vmw_compat_shader_remove - Stage a compat shader for removal.
+ *
+ * @man: Pointer to the compat shader manager
+ * @user_key: The key that is used to identify the shader. The key is
+ * unique to the shader type.
+ * @shader_type: Shader type.
+ * @list: Caller's list of staged shader actions.
+ *
+ * This function stages a compat shader for removal and removes the key from
+ * the shader manager's hash table. If the shader was previously only staged
+ * for addition it is completely removed (But the execbuf code may keep a
+ * reference if it was bound to a context between addition and removal). If
+ * it was previously commited to the manager, it is staged for removal.
+ */
+int vmw_compat_shader_remove(struct vmw_compat_shader_manager *man,
+			     u32 user_key, SVGA3dShaderType shader_type,
+			     struct list_head *list)
+{
+	struct vmw_compat_shader *entry;
+	struct drm_hash_item *hash;
+	int ret;
 
-	/*
-	 * From here on, the destructor takes over resource freeing.
-	 */
+	ret = drm_ht_find_item(&man->shaders, user_key | (shader_type << 24),
+			       &hash);
+	if (likely(ret != 0))
+		return -EINVAL;
 
-	ret = vmw_gb_shader_init(dev_priv, res, arg->size,
-				 arg->offset, shader_type, buffer,
-				 vmw_user_shader_free);
+	entry = drm_hash_entry(hash, struct vmw_compat_shader, hash);
+
+	switch (entry->state) {
+	case VMW_COMPAT_ADD:
+		vmw_compat_shader_free(man, entry);
+		break;
+	case VMW_COMPAT_COMMITED:
+		(void) drm_ht_remove_item(&man->shaders, &entry->hash);
+		list_del(&entry->head);
+		entry->state = VMW_COMPAT_DEL;
+		list_add_tail(&entry->head, list);
+		break;
+	default:
+		BUG();
+		break;
+	}
+
+	return 0;
+}
+
+/**
+ * vmw_compat_shader_add - Create a compat shader and add the
+ * key to the manager
+ *
+ * @man: Pointer to the compat shader manager
+ * @user_key: The key that is used to identify the shader. The key is
+ * unique to the shader type.
+ * @bytecode: Pointer to the bytecode of the shader.
+ * @shader_type: Shader type.
+ * @tfile: Pointer to a struct ttm_object_file that the guest-backed shader is
+ * to be created with.
+ * @list: Caller's list of staged shader actions.
+ *
+ * Note that only the key is added to the shader manager's hash table.
+ * The shader is not yet added to the shader manager's list of shaders.
+ */
+int vmw_compat_shader_add(struct vmw_compat_shader_manager *man,
+			  u32 user_key, const void *bytecode,
+			  SVGA3dShaderType shader_type,
+			  size_t size,
+			  struct ttm_object_file *tfile,
+			  struct list_head *list)
+{
+	struct vmw_dma_buffer *buf;
+	struct ttm_bo_kmap_obj map;
+	bool is_iomem;
+	struct vmw_compat_shader *compat;
+	u32 handle;
+	int ret;
+
+	if (user_key > ((1 << 24) - 1) || (unsigned) shader_type > 16)
+		return -EINVAL;
+
+	/* Allocate and pin a DMA buffer */
+	buf = kzalloc(sizeof(*buf), GFP_KERNEL);
+	if (unlikely(buf == NULL))
+		return -ENOMEM;
+
+	ret = vmw_dmabuf_init(man->dev_priv, buf, size, &vmw_sys_ne_placement,
+			      true, vmw_dmabuf_bo_free);
 	if (unlikely(ret != 0))
-		goto out_unlock;
+		goto out;
 
-	tmp = vmw_resource_reference(res);
-	ret = ttm_base_object_init(tfile, &ushader->base, false,
-				   VMW_RES_SHADER,
-				   &vmw_user_shader_base_release, NULL);
+	ret = ttm_bo_reserve(&buf->base, false, true, false, NULL);
+	if (unlikely(ret != 0))
+		goto no_reserve;
 
+	/* Map and copy shader bytecode. */
+	ret = ttm_bo_kmap(&buf->base, 0, PAGE_ALIGN(size) >> PAGE_SHIFT,
+			  &map);
 	if (unlikely(ret != 0)) {
-		vmw_resource_unreference(&tmp);
-		goto out_err;
+		ttm_bo_unreserve(&buf->base);
+		goto no_reserve;
 	}
 
-	arg->shader_handle = ushader->base.hash.key;
-out_err:
-	vmw_resource_unreference(&res);
-out_unlock:
-	ttm_read_unlock(&vmaster->lock);
-out_bad_arg:
-	vmw_dmabuf_unreference(&buffer);
+	memcpy(ttm_kmap_obj_virtual(&map, &is_iomem), bytecode, size);
+	WARN_ON(is_iomem);
+
+	ttm_bo_kunmap(&map);
+	ret = ttm_bo_validate(&buf->base, &vmw_sys_placement, false, true);
+	WARN_ON(ret != 0);
+	ttm_bo_unreserve(&buf->base);
+
+	/* Create a guest-backed shader container backed by the dma buffer */
+	ret = vmw_shader_alloc(man->dev_priv, buf, size, 0, shader_type,
+			       tfile, &handle);
+	vmw_dmabuf_unreference(&buf);
+	if (unlikely(ret != 0))
+		goto no_reserve;
+	/*
+	 * Create a compat shader structure and stage it for insertion
+	 * in the manager
+	 */
+	compat = kzalloc(sizeof(*compat), GFP_KERNEL);
+	if (compat == NULL)
+		goto no_compat;
+
+	compat->hash.key = user_key |  (shader_type << 24);
+	ret = drm_ht_insert_item(&man->shaders, &compat->hash);
+	if (unlikely(ret != 0))
+		goto out_invalid_key;
+
+	compat->state = VMW_COMPAT_ADD;
+	compat->handle = handle;
+	compat->tfile = tfile;
+	list_add_tail(&compat->head, list);
 
+	return 0;
+
+out_invalid_key:
+	kfree(compat);
+no_compat:
+	ttm_ref_object_base_unref(tfile, handle, TTM_REF_USAGE);
+no_reserve:
+out:
 	return ret;
+}
+
+/**
+ * vmw_compat_shader_man_create - Create a compat shader manager
+ *
+ * @dev_priv: Pointer to a device private structure.
+ *
+ * Typically done at file open time. If successful returns a pointer to a
+ * compat shader manager. Otherwise returns an error pointer.
+ */
+struct vmw_compat_shader_manager *
+vmw_compat_shader_man_create(struct vmw_private *dev_priv)
+{
+	struct vmw_compat_shader_manager *man;
+	int ret;
+
+	man = kzalloc(sizeof(*man), GFP_KERNEL);
+
+	man->dev_priv = dev_priv;
+	INIT_LIST_HEAD(&man->list);
+	ret = drm_ht_create(&man->shaders, VMW_COMPAT_SHADER_HT_ORDER);
+	if (ret == 0)
+		return man;
+
+	kfree(man);
+	return ERR_PTR(ret);
+}
+
+/**
+ * vmw_compat_shader_man_destroy - Destroy a compat shader manager
+ *
+ * @man: Pointer to the shader manager to destroy.
+ *
+ * Typically done at file close time.
+ */
+void vmw_compat_shader_man_destroy(struct vmw_compat_shader_manager *man)
+{
+	struct vmw_compat_shader *entry, *next;
+
+	mutex_lock(&man->dev_priv->cmdbuf_mutex);
+	list_for_each_entry_safe(entry, next, &man->list, head)
+		vmw_compat_shader_free(man, entry);
 
+	mutex_unlock(&man->dev_priv->cmdbuf_mutex);
+	kfree(man);
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index 979da1c246a5..82468d902915 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -908,8 +908,8 @@ int vmw_surface_reference_ioctl(struct drm_device *dev, void *data,
 	    rep->size_addr;
 
 	if (user_sizes)
-		ret = copy_to_user(user_sizes, srf->sizes,
-				   srf->num_sizes * sizeof(*srf->sizes));
+		ret = copy_to_user(user_sizes, &srf->base_size,
+				   sizeof(srf->base_size));
 	if (unlikely(ret != 0)) {
 		DRM_ERROR("copy_to_user failed %p %u\n",
 			  user_sizes, srf->num_sizes);
@@ -1111,7 +1111,7 @@ static int vmw_gb_surface_destroy(struct vmw_resource *res)
 		return 0;
 
 	mutex_lock(&dev_priv->binding_mutex);
-	vmw_context_binding_res_list_kill(&res->binding_head);
+	vmw_context_binding_res_list_scrub(&res->binding_head);
 
 	cmd = vmw_fifo_reserve(dev_priv, sizeof(*cmd));
 	if (unlikely(cmd == NULL)) {