88 files changed, 9128 insertions, 3997 deletions
diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
index adc9358c9bec..0a9357c66ff8 100644
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@@ -1413,6 +1413,64 @@ end:
 EXPORT_SYMBOL(drm_detect_monitor_audio);
 
 /**
+ * drm_add_display_info - pull display info out if present
+ * @edid: EDID data
+ * @info: display info (attached to connector)
+ *
+ * Grab any available display info and stuff it into the drm_display_info
+ * structure that's part of the connector.  Useful for tracking bpp and
+ * color spaces.
+ */
+static void drm_add_display_info(struct edid *edid,
+				 struct drm_display_info *info)
+{
+	info->width_mm = edid->width_cm * 10;
+	info->height_mm = edid->height_cm * 10;
+
+	/* driver figures it out in this case */
+	info->bpc = 0;
+	info->color_formats = 0;
+
+	/* Only defined for 1.4 with digital displays */
+	if (edid->revision < 4)
+		return;
+
+	if (!(edid->input & DRM_EDID_INPUT_DIGITAL))
+		return;
+
+	switch (edid->input & DRM_EDID_DIGITAL_DEPTH_MASK) {
+	case DRM_EDID_DIGITAL_DEPTH_6:
+		info->bpc = 6;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_8:
+		info->bpc = 8;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_10:
+		info->bpc = 10;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_12:
+		info->bpc = 12;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_14:
+		info->bpc = 14;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_16:
+		info->bpc = 16;
+		break;
+	case DRM_EDID_DIGITAL_DEPTH_UNDEF:
+	default:
+		info->bpc = 0;
+		break;
+	}
+
+	info->color_formats = DRM_COLOR_FORMAT_RGB444;
+	if (info->color_formats & DRM_EDID_FEATURE_RGB_YCRCB444)
+		info->color_formats = DRM_COLOR_FORMAT_YCRCB444;
+	if (info->color_formats & DRM_EDID_FEATURE_RGB_YCRCB422)
+		info->color_formats = DRM_COLOR_FORMAT_YCRCB422;
+}
+
+/**
  * drm_add_edid_modes - add modes from EDID data, if available
  * @connector: connector we're probing
  * @edid: edid data
@@ -1460,8 +1518,7 @@ int drm_add_edid_modes(struct drm_connector *connector, struct edid *edid)
 	if (quirks & (EDID_QUIRK_PREFER_LARGE_60 | EDID_QUIRK_PREFER_LARGE_75))
 		edid_fixup_preferred(connector, quirks);
 
-	connector->display_info.width_mm = edid->width_cm * 10;
-	connector->display_info.height_mm = edid->height_cm * 10;
+	drm_add_display_info(edid, &connector->display_info);
 
 	return num_modes;
 }
diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c
index 140b9525b48a..802b61ac3139 100644
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@@ -70,174 +70,50 @@ fail:
 }
 EXPORT_SYMBOL(drm_fb_helper_single_add_all_connectors);
 
-/**
- * drm_fb_helper_connector_parse_command_line - parse command line for connector
- * @connector - connector to parse line for
- * @mode_option - per connector mode option
- *
- * This parses the connector specific then generic command lines for
- * modes and options to configure the connector.
- *
- * This uses the same parameters as the fb modedb.c, except for extra
- *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
- *
- * enable/enable Digital/disable bit at the end
- */
-static bool drm_fb_helper_connector_parse_command_line(struct drm_fb_helper_connector *fb_helper_conn,
-						       const char *mode_option)
-{
-	const char *name;
-	unsigned int namelen;
-	int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
-	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
-	int yres_specified = 0, cvt = 0, rb = 0, interlace = 0, margins = 0;
-	int i;
-	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
-	struct drm_fb_helper_cmdline_mode *cmdline_mode;
-	struct drm_connector *connector;
-
-	if (!fb_helper_conn)
-		return false;
-	connector = fb_helper_conn->connector;
-
-	cmdline_mode = &fb_helper_conn->cmdline_mode;
-	if (!mode_option)
-		mode_option = fb_mode_option;
-
-	if (!mode_option) {
-		cmdline_mode->specified = false;
-		return false;
-	}
-
-	name = mode_option;
-	namelen = strlen(name);
-	for (i = namelen-1; i >= 0; i--) {
-		switch (name[i]) {
-		case '@':
-			namelen = i;
-			if (!refresh_specified && !bpp_specified &&
-			    !yres_specified) {
-				refresh = simple_strtol(&name[i+1], NULL, 10);
-				refresh_specified = 1;
-				if (cvt || rb)
-					cvt = 0;
-			} else
-				goto done;
-			break;
-		case '-':
-			namelen = i;
-			if (!bpp_specified && !yres_specified) {
-				bpp = simple_strtol(&name[i+1], NULL, 10);
-				bpp_specified = 1;
-				if (cvt || rb)
-					cvt = 0;
-			} else
-				goto done;
-			break;
-		case 'x':
-			if (!yres_specified) {
-				yres = simple_strtol(&name[i+1], NULL, 10);
-				yres_specified = 1;
-			} else
-				goto done;
-		case '0' ... '9':
-			break;
-		case 'M':
-			if (!yres_specified)
-				cvt = 1;
-			break;
-		case 'R':
-			if (cvt)
-				rb = 1;
-			break;
-		case 'm':
-			if (!cvt)
-				margins = 1;
-			break;
-		case 'i':
-			if (!cvt)
-				interlace = 1;
-			break;
-		case 'e':
-			force = DRM_FORCE_ON;
-			break;
-		case 'D':
-			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) &&
-			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
-				force = DRM_FORCE_ON;
-			else
-				force = DRM_FORCE_ON_DIGITAL;
-			break;
-		case 'd':
-			force = DRM_FORCE_OFF;
-			break;
-		default:
-			goto done;
-		}
-	}
-	if (i < 0 && yres_specified) {
-		xres = simple_strtol(name, NULL, 10);
-		res_specified = 1;
-	}
-done:
-
-	DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
-		drm_get_connector_name(connector), xres, yres,
-		(refresh) ? refresh : 60, (rb) ? " reduced blanking" :
-		"", (margins) ? " with margins" : "", (interlace) ?
-		" interlaced" : "");
-
-	if (force) {
-		const char *s;
-		switch (force) {
-		case DRM_FORCE_OFF: s = "OFF"; break;
-		case DRM_FORCE_ON_DIGITAL: s = "ON - dig"; break;
-		default:
-		case DRM_FORCE_ON: s = "ON"; break;
-		}
-
-		DRM_INFO("forcing %s connector %s\n",
-			 drm_get_connector_name(connector), s);
-		connector->force = force;
-	}
-
-	if (res_specified) {
-		cmdline_mode->specified = true;
-		cmdline_mode->xres = xres;
-		cmdline_mode->yres = yres;
-	}
-
-	if (refresh_specified) {
-		cmdline_mode->refresh_specified = true;
-		cmdline_mode->refresh = refresh;
-	}
-
-	if (bpp_specified) {
-		cmdline_mode->bpp_specified = true;
-		cmdline_mode->bpp = bpp;
-	}
-	cmdline_mode->rb = rb ? true : false;
-	cmdline_mode->cvt = cvt  ? true : false;
-	cmdline_mode->interlace = interlace ? true : false;
-
-	return true;
-}
-
 static int drm_fb_helper_parse_command_line(struct drm_fb_helper *fb_helper)
 {
 	struct drm_fb_helper_connector *fb_helper_conn;
 	int i;
 
 	for (i = 0; i < fb_helper->connector_count; i++) {
+		struct drm_cmdline_mode *mode;
+		struct drm_connector *connector;
 		char *option = NULL;
 
 		fb_helper_conn = fb_helper->connector_info[i];
+		connector = fb_helper_conn->connector;
+		mode = &fb_helper_conn->cmdline_mode;
 
 		/* do something on return - turn off connector maybe */
-		if (fb_get_options(drm_get_connector_name(fb_helper_conn->connector), &option))
+		if (fb_get_options(drm_get_connector_name(connector), &option))
 			continue;
 
-		drm_fb_helper_connector_parse_command_line(fb_helper_conn, option);
+		if (drm_mode_parse_command_line_for_connector(option,
+							      connector,
+							      mode)) {
+			if (mode->force) {
+				const char *s;
+				switch (mode->force) {
+				case DRM_FORCE_OFF: s = "OFF"; break;
+				case DRM_FORCE_ON_DIGITAL: s = "ON - dig"; break;
+				default:
+				case DRM_FORCE_ON: s = "ON"; break;
+				}
+
+				DRM_INFO("forcing %s connector %s\n",
+					 drm_get_connector_name(connector), s);
+				connector->force = mode->force;
+			}
+
+			DRM_DEBUG_KMS("cmdline mode for connector %s %dx%d@%dHz%s%s%s\n",
+				      drm_get_connector_name(connector),
+				      mode->xres, mode->yres,
+				      mode->refresh_specified ? mode->refresh : 60,
+				      mode->rb ? " reduced blanking" : "",
+				      mode->margins ? " with margins" : "",
+				      mode->interlace ?  " interlaced" : "");
+		}
+
 	}
 	return 0;
 }
@@ -901,7 +777,7 @@ int drm_fb_helper_single_fb_probe(struct drm_fb_helper *fb_helper,
 	/* first up get a count of crtcs now in use and new min/maxes width/heights */
 	for (i = 0; i < fb_helper->connector_count; i++) {
 		struct drm_fb_helper_connector *fb_helper_conn = fb_helper->connector_info[i];
-		struct drm_fb_helper_cmdline_mode *cmdline_mode;
+		struct drm_cmdline_mode *cmdline_mode;
 
 		cmdline_mode = &fb_helper_conn->cmdline_mode;
 
@@ -1123,7 +999,7 @@ static struct drm_display_mode *drm_has_preferred_mode(struct drm_fb_helper_conn
 
 static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector)
 {
-	struct drm_fb_helper_cmdline_mode *cmdline_mode;
+	struct drm_cmdline_mode *cmdline_mode;
 	cmdline_mode = &fb_connector->cmdline_mode;
 	return cmdline_mode->specified;
 }
@@ -1131,7 +1007,7 @@ static bool drm_has_cmdline_mode(struct drm_fb_helper_connector *fb_connector)
 static struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_connector *fb_helper_conn,
 						      int width, int height)
 {
-	struct drm_fb_helper_cmdline_mode *cmdline_mode;
+	struct drm_cmdline_mode *cmdline_mode;
 	struct drm_display_mode *mode = NULL;
 
 	cmdline_mode = &fb_helper_conn->cmdline_mode;
@@ -1163,19 +1039,8 @@ static struct drm_display_mode *drm_pick_cmdline_mode(struct drm_fb_helper_conne
 	}
 
 create_mode:
-	if (cmdline_mode->cvt)
-		mode = drm_cvt_mode(fb_helper_conn->connector->dev,
-				    cmdline_mode->xres, cmdline_mode->yres,
-				    cmdline_mode->refresh_specified ? cmdline_mode->refresh : 60,
-				    cmdline_mode->rb, cmdline_mode->interlace,
-				    cmdline_mode->margins);
-	else
-		mode = drm_gtf_mode(fb_helper_conn->connector->dev,
-				    cmdline_mode->xres, cmdline_mode->yres,
-				    cmdline_mode->refresh_specified ? cmdline_mode->refresh : 60,
-				    cmdline_mode->interlace,
-				    cmdline_mode->margins);
-	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+	mode = drm_mode_create_from_cmdline_mode(fb_helper_conn->connector->dev,
+						 cmdline_mode);
 	list_add(&mode->head, &fb_helper_conn->connector->modes);
 	return mode;
 }
diff --git a/drivers/gpu/drm/drm_irq.c b/drivers/gpu/drm/drm_irq.c
index a1f12cb043de..2022a5c966bb 100644
--- a/drivers/gpu/drm/drm_irq.c
+++ b/drivers/gpu/drm/drm_irq.c
@@ -684,10 +684,11 @@ int drm_calc_vbltimestamp_from_scanoutpos(struct drm_device *dev, int crtc,
 	 */
 	*vblank_time = ns_to_timeval(timeval_to_ns(&raw_time) - delta_ns);
 
-	DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %d.%d -> %d.%d [e %d us, %d rep]\n",
-		  crtc, (int) vbl_status, hpos, vpos, raw_time.tv_sec,
-		  raw_time.tv_usec, vblank_time->tv_sec, vblank_time->tv_usec,
-		  (int) duration_ns/1000, i);
+	DRM_DEBUG("crtc %d : v %d p(%d,%d)@ %ld.%ld -> %ld.%ld [e %d us, %d rep]\n",
+		  crtc, (int)vbl_status, hpos, vpos,
+		  (long)raw_time.tv_sec, (long)raw_time.tv_usec,
+		  (long)vblank_time->tv_sec, (long)vblank_time->tv_usec,
+		  (int)duration_ns/1000, i);
 
 	vbl_status = DRM_VBLANKTIME_SCANOUTPOS_METHOD;
 	if (invbl)
diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 25bf87390f53..c2d32f20e2fb 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -974,3 +974,159 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
 	}
 }
 EXPORT_SYMBOL(drm_mode_connector_list_update);
+
+/**
+ * drm_mode_parse_command_line_for_connector - parse command line for connector
+ * @mode_option - per connector mode option
+ * @connector - connector to parse line for
+ *
+ * This parses the connector specific then generic command lines for
+ * modes and options to configure the connector.
+ *
+ * This uses the same parameters as the fb modedb.c, except for extra
+ *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
+ *
+ * enable/enable Digital/disable bit at the end
+ */
+bool drm_mode_parse_command_line_for_connector(const char *mode_option,
+					       struct drm_connector *connector,
+					       struct drm_cmdline_mode *mode)
+{
+	const char *name;
+	unsigned int namelen;
+	int res_specified = 0, bpp_specified = 0, refresh_specified = 0;
+	unsigned int xres = 0, yres = 0, bpp = 32, refresh = 0;
+	int yres_specified = 0, cvt = 0, rb = 0, interlace = 0, margins = 0;
+	int i;
+	enum drm_connector_force force = DRM_FORCE_UNSPECIFIED;
+
+#ifdef CONFIG_FB
+	if (!mode_option)
+		mode_option = fb_mode_option;
+#endif
+
+	if (!mode_option) {
+		mode->specified = false;
+		return false;
+	}
+
+	name = mode_option;
+	namelen = strlen(name);
+	for (i = namelen-1; i >= 0; i--) {
+		switch (name[i]) {
+		case '@':
+			namelen = i;
+			if (!refresh_specified && !bpp_specified &&
+			    !yres_specified) {
+				refresh = simple_strtol(&name[i+1], NULL, 10);
+				refresh_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case '-':
+			namelen = i;
+			if (!bpp_specified && !yres_specified) {
+				bpp = simple_strtol(&name[i+1], NULL, 10);
+				bpp_specified = 1;
+				if (cvt || rb)
+					cvt = 0;
+			} else
+				goto done;
+			break;
+		case 'x':
+			if (!yres_specified) {
+				yres = simple_strtol(&name[i+1], NULL, 10);
+				yres_specified = 1;
+			} else
+				goto done;
+		case '0' ... '9':
+			break;
+		case 'M':
+			if (!yres_specified)
+				cvt = 1;
+			break;
+		case 'R':
+			if (cvt)
+				rb = 1;
+			break;
+		case 'm':
+			if (!cvt)
+				margins = 1;
+			break;
+		case 'i':
+			if (!cvt)
+				interlace = 1;
+			break;
+		case 'e':
+			force = DRM_FORCE_ON;
+			break;
+		case 'D':
+			if ((connector->connector_type != DRM_MODE_CONNECTOR_DVII) &&
+			    (connector->connector_type != DRM_MODE_CONNECTOR_HDMIB))
+				force = DRM_FORCE_ON;
+			else
+				force = DRM_FORCE_ON_DIGITAL;
+			break;
+		case 'd':
+			force = DRM_FORCE_OFF;
+			break;
+		default:
+			goto done;
+		}
+	}
+	if (i < 0 && yres_specified) {
+		xres = simple_strtol(name, NULL, 10);
+		res_specified = 1;
+	}
+done:
+	if (res_specified) {
+		mode->specified = true;
+		mode->xres = xres;
+		mode->yres = yres;
+	}
+
+	if (refresh_specified) {
+		mode->refresh_specified = true;
+		mode->refresh = refresh;
+	}
+
+	if (bpp_specified) {
+		mode->bpp_specified = true;
+		mode->bpp = bpp;
+	}
+	mode->rb = rb ? true : false;
+	mode->cvt = cvt  ? true : false;
+	mode->interlace = interlace ? true : false;
+	mode->force = force;
+
+	return true;
+}
+EXPORT_SYMBOL(drm_mode_parse_command_line_for_connector);
+
+struct drm_display_mode *
+drm_mode_create_from_cmdline_mode(struct drm_device *dev,
+				  struct drm_cmdline_mode *cmd)
+{
+	struct drm_display_mode *mode;
+
+	if (cmd->cvt)
+		mode = drm_cvt_mode(dev,
+				    cmd->xres, cmd->yres,
+				    cmd->refresh_specified ? cmd->refresh : 60,
+				    cmd->rb, cmd->interlace,
+				    cmd->margins);
+	else
+		mode = drm_gtf_mode(dev,
+				    cmd->xres, cmd->yres,
+				    cmd->refresh_specified ? cmd->refresh : 60,
+				    cmd->interlace,
+				    cmd->margins);
+	if (!mode)
+		return NULL;
+
+	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+	return mode;
+}
+EXPORT_SYMBOL(drm_mode_create_from_cmdline_mode);
diff --git a/drivers/gpu/drm/drm_stub.c b/drivers/gpu/drm/drm_stub.c
index 001273d57f2d..6d7b083c5b77 100644
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@@ -62,6 +62,26 @@ struct idr drm_minors_idr;
 struct class *drm_class;
 struct proc_dir_entry *drm_proc_root;
 struct dentry *drm_debugfs_root;
+
+int drm_err(const char *func, const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+	int r;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	r = printk(KERN_ERR "[" DRM_NAME ":%s] *ERROR* %pV", func, &vaf);
+
+	va_end(args);
+
+	return r;
+}
+EXPORT_SYMBOL(drm_err);
+
 void drm_ut_debug_printk(unsigned int request_level,
 			 const char *prefix,
 			 const char *function_name,
@@ -78,6 +98,7 @@ void drm_ut_debug_printk(unsigned int request_level,
 	}
 }
 EXPORT_SYMBOL(drm_ut_debug_printk);
+
 static int drm_minor_get_id(struct drm_device *dev, int type)
 {
 	int new_id;
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 87c8e29465e3..51c2257b11e6 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -106,11 +106,12 @@ static const char *get_tiling_flag(struct drm_i915_gem_object *obj)
     }
 }
 
-static const char *agp_type_str(int type)
+static const char *cache_level_str(int type)
 {
 	switch (type) {
-	case 0: return " uncached";
-	case 1: return " snooped";
+	case I915_CACHE_NONE: return " uncached";
+	case I915_CACHE_LLC: return " snooped (LLC)";
+	case I915_CACHE_LLC_MLC: return " snooped (LLC+MLC)";
 	default: return "";
 	}
 }
@@ -127,7 +128,7 @@ describe_obj(struct seq_file *m, struct drm_i915_gem_object *obj)
 		   obj->base.write_domain,
 		   obj->last_rendering_seqno,
 		   obj->last_fenced_seqno,
-		   agp_type_str(obj->agp_type == AGP_USER_CACHED_MEMORY),
+		   cache_level_str(obj->cache_level),
 		   obj->dirty ? " dirty" : "",
 		   obj->madv == I915_MADV_DONTNEED ? " purgeable" : "");
 	if (obj->base.name)
@@ -714,7 +715,7 @@ static void print_error_buffers(struct seq_file *m,
 			   dirty_flag(err->dirty),
 			   purgeable_flag(err->purgeable),
 			   ring_str(err->ring),
-			   agp_type_str(err->agp_type));
+			   cache_level_str(err->cache_level));
 
 		if (err->name)
 			seq_printf(m, " (name: %d)", err->name);
@@ -852,6 +853,7 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 	struct drm_info_node *node = (struct drm_info_node *) m->private;
 	struct drm_device *dev = node->minor->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
 
 	if (IS_GEN5(dev)) {
 		u16 rgvswctl = I915_READ16(MEMSWCTL);
@@ -873,7 +875,11 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		int max_freq;
 
 		/* RPSTAT1 is in the GT power well */
-		__gen6_gt_force_wake_get(dev_priv);
+		ret = mutex_lock_interruptible(&dev->struct_mutex);
+		if (ret)
+			return ret;
+
+		gen6_gt_force_wake_get(dev_priv);
 
 		rpstat = I915_READ(GEN6_RPSTAT1);
 		rpupei = I915_READ(GEN6_RP_CUR_UP_EI);
@@ -883,6 +889,9 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		rpcurdown = I915_READ(GEN6_RP_CUR_DOWN);
 		rpprevdown = I915_READ(GEN6_RP_PREV_DOWN);
 
+		gen6_gt_force_wake_put(dev_priv);
+		mutex_unlock(&dev->struct_mutex);
+
 		seq_printf(m, "GT_PERF_STATUS: 0x%08x\n", gt_perf_status);
 		seq_printf(m, "RPSTAT1: 0x%08x\n", rpstat);
 		seq_printf(m, "Render p-state ratio: %d\n",
@@ -917,8 +926,6 @@ static int i915_cur_delayinfo(struct seq_file *m, void *unused)
 		max_freq = rp_state_cap & 0xff;
 		seq_printf(m, "Max non-overclocked (RP0) frequency: %dMHz\n",
 			   max_freq * 50);
-
-		__gen6_gt_force_wake_put(dev_priv);
 	} else {
 		seq_printf(m, "no P-state info available\n");
 	}
@@ -1058,6 +1065,9 @@ static int i915_fbc_status(struct seq_file *m, void *unused)
 		case FBC_MULTIPLE_PIPES:
 			seq_printf(m, "multiple pipes are enabled");
 			break;
+		case FBC_MODULE_PARAM:
+			seq_printf(m, "disabled per module param (default off)");
+			break;
 		default:
 			seq_printf(m, "unknown reason");
 		}
@@ -1186,6 +1196,42 @@ static int i915_gem_framebuffer_info(struct seq_file *m, void *data)
 	return 0;
 }
 
+static int i915_context_status(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	drm_i915_private_t *dev_priv = dev->dev_private;
+	int ret;
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (ret)
+		return ret;
+
+	seq_printf(m, "power context ");
+	describe_obj(m, dev_priv->pwrctx);
+	seq_printf(m, "\n");
+
+	seq_printf(m, "render context ");
+	describe_obj(m, dev_priv->renderctx);
+	seq_printf(m, "\n");
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
+static int i915_gen6_forcewake_count_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	seq_printf(m, "forcewake count = %d\n",
+		   atomic_read(&dev_priv->forcewake_count));
+
+	return 0;
+}
+
 static int
 i915_wedged_open(struct inode *inode,
 		 struct file *filp)
@@ -1288,6 +1334,67 @@ static int i915_wedged_create(struct dentry *root, struct drm_minor *minor)
 	return drm_add_fake_info_node(minor, ent, &i915_wedged_fops);
 }
 
+static int i915_forcewake_open(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	ret = mutex_lock_interruptible(&dev->struct_mutex);
+	if (ret)
+		return ret;
+	gen6_gt_force_wake_get(dev_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+int i915_forcewake_release(struct inode *inode, struct file *file)
+{
+	struct drm_device *dev = inode->i_private;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	if (!IS_GEN6(dev))
+		return 0;
+
+	/*
+	 * It's bad that we can potentially hang userspace if struct_mutex gets
+	 * forever stuck.  However, if we cannot acquire this lock it means that
+	 * almost certainly the driver has hung, is not unload-able. Therefore
+	 * hanging here is probably a minor inconvenience not to be seen my
+	 * almost every user.
+	 */
+	mutex_lock(&dev->struct_mutex);
+	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev->struct_mutex);
+
+	return 0;
+}
+
+static const struct file_operations i915_forcewake_fops = {
+	.owner = THIS_MODULE,
+	.open = i915_forcewake_open,
+	.release = i915_forcewake_release,
+};
+
+static int i915_forcewake_create(struct dentry *root, struct drm_minor *minor)
+{
+	struct drm_device *dev = minor->dev;
+	struct dentry *ent;
+
+	ent = debugfs_create_file("i915_forcewake_user",
+				  S_IRUSR,
+				  root, dev,
+				  &i915_forcewake_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	return drm_add_fake_info_node(minor, ent, &i915_forcewake_fops);
+}
+
 static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_capabilities", i915_capabilities, 0},
 	{"i915_gem_objects", i915_gem_object_info, 0},
@@ -1324,6 +1431,8 @@ static struct drm_info_list i915_debugfs_list[] = {
 	{"i915_sr_status", i915_sr_status, 0},
 	{"i915_opregion", i915_opregion, 0},
 	{"i915_gem_framebuffer", i915_gem_framebuffer_info, 0},
+	{"i915_context_status", i915_context_status, 0},
+	{"i915_gen6_forcewake_count", i915_gen6_forcewake_count_info, 0},
 };
 #define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
 
@@ -1335,6 +1444,10 @@ int i915_debugfs_init(struct drm_minor *minor)
 	if (ret)
 		return ret;
 
+	ret = i915_forcewake_create(minor->debugfs_root, minor);
+	if (ret)
+		return ret;
+
 	return drm_debugfs_create_files(i915_debugfs_list,
 					I915_DEBUGFS_ENTRIES,
 					minor->debugfs_root, minor);
@@ -1344,6 +1457,8 @@ void i915_debugfs_cleanup(struct drm_minor *minor)
 {
 	drm_debugfs_remove_files(i915_debugfs_list,
 				 I915_DEBUGFS_ENTRIES, minor);
+	drm_debugfs_remove_files((struct drm_info_list *) &i915_forcewake_fops,
+				 1, minor);
 	drm_debugfs_remove_files((struct drm_info_list *) &i915_wedged_fops,
 				 1, minor);
 }
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 12876f2795d2..0239e9974bf2 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -571,7 +571,7 @@ static int i915_quiescent(struct drm_device *dev)
 	struct intel_ring_buffer *ring = LP_RING(dev->dev_private);
 
 	i915_kernel_lost_context(dev);
-	return intel_wait_ring_buffer(ring, ring->size - 8);
+	return intel_wait_ring_idle(ring);
 }
 
 static int i915_flush_ioctl(struct drm_device *dev, void *data,
@@ -1176,11 +1176,11 @@ static bool i915_switcheroo_can_switch(struct pci_dev *pdev)
 	return can_switch;
 }
 
-static int i915_load_modeset_init(struct drm_device *dev)
+static int i915_load_gem_init(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	unsigned long prealloc_size, gtt_size, mappable_size;
-	int ret = 0;
+	int ret;
 
 	prealloc_size = dev_priv->mm.gtt->stolen_size;
 	gtt_size = dev_priv->mm.gtt->gtt_total_entries << PAGE_SHIFT;
@@ -1204,7 +1204,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	ret = i915_gem_init_ringbuffer(dev);
 	mutex_unlock(&dev->struct_mutex);
 	if (ret)
-		goto out;
+		return ret;
 
 	/* Try to set up FBC with a reasonable compressed buffer size */
 	if (I915_HAS_FBC(dev) && i915_powersave) {
@@ -1222,6 +1222,13 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	/* Allow hardware batchbuffers unless told otherwise. */
 	dev_priv->allow_batchbuffer = 1;
+	return 0;
+}
+
+static int i915_load_modeset_init(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int ret;
 
 	ret = intel_parse_bios(dev);
 	if (ret)
@@ -1236,7 +1243,7 @@ static int i915_load_modeset_init(struct drm_device *dev)
 	 */
 	ret = vga_client_register(dev->pdev, dev, NULL, i915_vga_set_decode);
 	if (ret && ret != -ENODEV)
-		goto cleanup_ringbuffer;
+		goto out;
 
 	intel_register_dsm_handler();
 
@@ -1253,10 +1260,40 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 	intel_modeset_init(dev);
 
-	ret = drm_irq_install(dev);
+	ret = i915_load_gem_init(dev);
 	if (ret)
 		goto cleanup_vga_switcheroo;
 
+	intel_modeset_gem_init(dev);
+
+	if (IS_IVYBRIDGE(dev)) {
+		/* Share pre & uninstall handlers with ILK/SNB */
+		dev->driver->irq_handler = ivybridge_irq_handler;
+		dev->driver->irq_preinstall = ironlake_irq_preinstall;
+		dev->driver->irq_postinstall = ivybridge_irq_postinstall;
+		dev->driver->irq_uninstall = ironlake_irq_uninstall;
+		dev->driver->enable_vblank = ivybridge_enable_vblank;
+		dev->driver->disable_vblank = ivybridge_disable_vblank;
+	} else if (HAS_PCH_SPLIT(dev)) {
+		dev->driver->irq_handler = ironlake_irq_handler;
+		dev->driver->irq_preinstall = ironlake_irq_preinstall;
+		dev->driver->irq_postinstall = ironlake_irq_postinstall;
+		dev->driver->irq_uninstall = ironlake_irq_uninstall;
+		dev->driver->enable_vblank = ironlake_enable_vblank;
+		dev->driver->disable_vblank = ironlake_disable_vblank;
+	} else {
+		dev->driver->irq_preinstall = i915_driver_irq_preinstall;
+		dev->driver->irq_postinstall = i915_driver_irq_postinstall;
+		dev->driver->irq_uninstall = i915_driver_irq_uninstall;
+		dev->driver->irq_handler = i915_driver_irq_handler;
+		dev->driver->enable_vblank = i915_enable_vblank;
+		dev->driver->disable_vblank = i915_disable_vblank;
+	}
+
+	ret = drm_irq_install(dev);
+	if (ret)
+		goto cleanup_gem;
+
 	/* Always safe in the mode setting case. */
 	/* FIXME: do pre/post-mode set stuff in core KMS code */
 	dev->vblank_disable_allowed = 1;
@@ -1274,14 +1311,14 @@ static int i915_load_modeset_init(struct drm_device *dev)
 
 cleanup_irq:
 	drm_irq_uninstall(dev);
+cleanup_gem:
+	mutex_lock(&dev->struct_mutex);
+	i915_gem_cleanup_ringbuffer(dev);
+	mutex_unlock(&dev->struct_mutex);
 cleanup_vga_switcheroo:
 	vga_switcheroo_unregister_client(dev->pdev);
 cleanup_vga_client:
 	vga_client_register(dev->pdev, NULL, NULL, NULL);
-cleanup_ringbuffer:
-	mutex_lock(&dev->struct_mutex);
-	i915_gem_cleanup_ringbuffer(dev);
-	mutex_unlock(&dev->struct_mutex);
 out:
 	return ret;
 }
@@ -1982,7 +2019,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	dev->driver->get_vblank_counter = i915_get_vblank_counter;
 	dev->max_vblank_count = 0xffffff; /* only 24 bits of frame count */
-	if (IS_G4X(dev) || IS_GEN5(dev) || IS_GEN6(dev)) {
+	if (IS_G4X(dev) || IS_GEN5(dev) || IS_GEN6(dev) || IS_IVYBRIDGE(dev)) {
 		dev->max_vblank_count = 0xffffffff; /* full 32 bit counter */
 		dev->driver->get_vblank_counter = gm45_get_vblank_counter;
 	}
@@ -2025,6 +2062,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)
 
 	spin_lock_init(&dev_priv->irq_lock);
 	spin_lock_init(&dev_priv->error_lock);
+	spin_lock_init(&dev_priv->rps_lock);
 
 	if (IS_MOBILE(dev) || !IS_GEN2(dev))
 		dev_priv->num_pipe = 2;
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index 32d1b3e829c8..0defd4270594 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -52,9 +52,12 @@ module_param_named(powersave, i915_powersave, int, 0600);
 unsigned int i915_semaphores = 0;
 module_param_named(semaphores, i915_semaphores, int, 0600);
 
-unsigned int i915_enable_rc6 = 0;
+unsigned int i915_enable_rc6 = 1;
 module_param_named(i915_enable_rc6, i915_enable_rc6, int, 0600);
 
+unsigned int i915_enable_fbc = 0;
+module_param_named(i915_enable_fbc, i915_enable_fbc, int, 0600);
+
 unsigned int i915_lvds_downclock = 0;
 module_param_named(lvds_downclock, i915_lvds_downclock, int, 0400);
 
@@ -169,7 +172,7 @@ static const struct intel_device_info intel_ironlake_d_info = {
 static const struct intel_device_info intel_ironlake_m_info = {
 	.gen = 5, .is_mobile = 1,
 	.need_gfx_hws = 1, .has_hotplug = 1,
-	.has_fbc = 0, /* disabled due to buggy hardware */
+	.has_fbc = 1,
 	.has_bsd_ring = 1,
 };
 
@@ -188,6 +191,21 @@ static const struct intel_device_info intel_sandybridge_m_info = {
 	.has_blt_ring = 1,
 };
 
+static const struct intel_device_info intel_ivybridge_d_info = {
+	.is_ivybridge = 1, .gen = 7,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.has_bsd_ring = 1,
+	.has_blt_ring = 1,
+};
+
+static const struct intel_device_info intel_ivybridge_m_info = {
+	.is_ivybridge = 1, .gen = 7, .is_mobile = 1,
+	.need_gfx_hws = 1, .has_hotplug = 1,
+	.has_fbc = 0,	/* FBC is not enabled on Ivybridge mobile yet */
+	.has_bsd_ring = 1,
+	.has_blt_ring = 1,
+};
+
 static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_VGA_DEVICE(0x3577, &intel_i830_info),		/* I830_M */
 	INTEL_VGA_DEVICE(0x2562, &intel_845g_info),		/* 845_G */
@@ -227,6 +245,11 @@ static const struct pci_device_id pciidlist[] = {		/* aka */
 	INTEL_VGA_DEVICE(0x0116, &intel_sandybridge_m_info),
 	INTEL_VGA_DEVICE(0x0126, &intel_sandybridge_m_info),
 	INTEL_VGA_DEVICE(0x010A, &intel_sandybridge_d_info),
+	INTEL_VGA_DEVICE(0x0156, &intel_ivybridge_m_info), /* GT1 mobile */
+	INTEL_VGA_DEVICE(0x0166, &intel_ivybridge_m_info), /* GT2 mobile */
+	INTEL_VGA_DEVICE(0x0152, &intel_ivybridge_d_info), /* GT1 desktop */
+	INTEL_VGA_DEVICE(0x0162, &intel_ivybridge_d_info), /* GT2 desktop */
+	INTEL_VGA_DEVICE(0x015a, &intel_ivybridge_d_info), /* GT1 server */
 	{0, 0, 0}
 };
 
@@ -235,7 +258,9 @@ MODULE_DEVICE_TABLE(pci, pciidlist);
 #endif
 
 #define INTEL_PCH_DEVICE_ID_MASK	0xff00
+#define INTEL_PCH_IBX_DEVICE_ID_TYPE	0x3b00
 #define INTEL_PCH_CPT_DEVICE_ID_TYPE	0x1c00
+#define INTEL_PCH_PPT_DEVICE_ID_TYPE	0x1e00
 
 void intel_detect_pch (struct drm_device *dev)
 {
@@ -254,16 +279,23 @@ void intel_detect_pch (struct drm_device *dev)
 			int id;
 			id = pch->device & INTEL_PCH_DEVICE_ID_MASK;
 
-			if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) {
+			if (id == INTEL_PCH_IBX_DEVICE_ID_TYPE) {
+				dev_priv->pch_type = PCH_IBX;
+				DRM_DEBUG_KMS("Found Ibex Peak PCH\n");
+			} else if (id == INTEL_PCH_CPT_DEVICE_ID_TYPE) {
 				dev_priv->pch_type = PCH_CPT;
 				DRM_DEBUG_KMS("Found CougarPoint PCH\n");
+			} else if (id == INTEL_PCH_PPT_DEVICE_ID_TYPE) {
+				/* PantherPoint is CPT compatible */
+				dev_priv->pch_type = PCH_CPT;
+				DRM_DEBUG_KMS("Found PatherPoint PCH\n");
 			}
 		}
 		pci_dev_put(pch);
 	}
 }
 
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+static void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 {
 	int count;
 
@@ -279,12 +311,38 @@ void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
 		udelay(10);
 }
 
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+/*
+ * Generally this is called implicitly by the register read function. However,
+ * if some sequence requires the GT to not power down then this function should
+ * be called at the beginning of the sequence followed by a call to
+ * gen6_gt_force_wake_put() at the end of the sequence.
+ */
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+
+	/* Forcewake is atomic in case we get in here without the lock */
+	if (atomic_add_return(1, &dev_priv->forcewake_count) == 1)
+		__gen6_gt_force_wake_get(dev_priv);
+}
+
+static void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE_NOTRACE(FORCEWAKE, 0);
 	POSTING_READ(FORCEWAKE);
 }
 
+/*
+ * see gen6_gt_force_wake_get()
+ */
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv)
+{
+	WARN_ON(!mutex_is_locked(&dev_priv->dev->struct_mutex));
+
+	if (atomic_dec_and_test(&dev_priv->forcewake_count))
+		__gen6_gt_force_wake_put(dev_priv);
+}
+
 void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv)
 {
 	int loop = 500;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 1c1b27c97e5c..ee660355ae68 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -188,7 +188,7 @@ struct drm_i915_error_state {
 		u32 dirty:1;
 		u32 purgeable:1;
 		u32 ring:4;
-		u32 agp_type:1;
+		u32 cache_level:2;
 	} *active_bo, *pinned_bo;
 	u32 active_bo_count, pinned_bo_count;
 	struct intel_overlay_error_state *overlay;
@@ -203,12 +203,19 @@ struct drm_i915_display_funcs {
 	int (*get_display_clock_speed)(struct drm_device *dev);
 	int (*get_fifo_size)(struct drm_device *dev, int plane);
 	void (*update_wm)(struct drm_device *dev);
+	int (*crtc_mode_set)(struct drm_crtc *crtc,
+			     struct drm_display_mode *mode,
+			     struct drm_display_mode *adjusted_mode,
+			     int x, int y,
+			     struct drm_framebuffer *old_fb);
+	void (*fdi_link_train)(struct drm_crtc *crtc);
+	void (*init_clock_gating)(struct drm_device *dev);
+	void (*init_pch_clock_gating)(struct drm_device *dev);
 	/* clock updates for mode set */
 	/* cursor updates */
 	/* render clock increase/decrease */
 	/* display clock increase/decrease */
 	/* pll clock increase/decrease */
-	/* clock gating init */
 };
 
 struct intel_device_info {
@@ -223,6 +230,7 @@ struct intel_device_info {
 	u8 is_pineview : 1;
 	u8 is_broadwater : 1;
 	u8 is_crestline : 1;
+	u8 is_ivybridge : 1;
 	u8 has_fbc : 1;
 	u8 has_pipe_cxsr : 1;
 	u8 has_hotplug : 1;
@@ -242,6 +250,7 @@ enum no_fbc_reason {
 	FBC_BAD_PLANE, /* fbc not supported on plane */
 	FBC_NOT_TILED, /* buffer not tiled */
 	FBC_MULTIPLE_PIPES, /* more than one pipe active */
+	FBC_MODULE_PARAM,
 };
 
 enum intel_pch {
@@ -676,6 +685,10 @@ typedef struct drm_i915_private {
 
 	bool mchbar_need_disable;
 
+	struct work_struct rps_work;
+	spinlock_t rps_lock;
+	u32 pm_iir;
+
 	u8 cur_delay;
 	u8 min_delay;
 	u8 max_delay;
@@ -703,8 +716,16 @@ typedef struct drm_i915_private {
 	struct intel_fbdev *fbdev;
 
 	struct drm_property *broadcast_rgb_property;
+
+	atomic_t forcewake_count;
 } drm_i915_private_t;
 
+enum i915_cache_level {
+	I915_CACHE_NONE,
+	I915_CACHE_LLC,
+	I915_CACHE_LLC_MLC, /* gen6+ */
+};
+
 struct drm_i915_gem_object {
 	struct drm_gem_object base;
 
@@ -791,6 +812,8 @@ struct drm_i915_gem_object {
 	unsigned int pending_fenced_gpu_access:1;
 	unsigned int fenced_gpu_access:1;
 
+	unsigned int cache_level:2;
+
 	struct page **pages;
 
 	/**
@@ -827,8 +850,6 @@ struct drm_i915_gem_object {
 	/** Record of address bit 17 of each page at last unbind. */
 	unsigned long *bit_17;
 
-	/** AGP mapping type (AGP_USER_MEMORY or AGP_USER_CACHED_MEMORY */
-	uint32_t agp_type;
 
 	/**
 	 * If present, while GEM_DOMAIN_CPU is in the read domain this array
@@ -915,13 +936,21 @@ enum intel_chip_family {
 #define IS_G33(dev)		(INTEL_INFO(dev)->is_g33)
 #define IS_IRONLAKE_D(dev)	((dev)->pci_device == 0x0042)
 #define IS_IRONLAKE_M(dev)	((dev)->pci_device == 0x0046)
+#define IS_IVYBRIDGE(dev)	(INTEL_INFO(dev)->is_ivybridge)
 #define IS_MOBILE(dev)		(INTEL_INFO(dev)->is_mobile)
 
+/*
+ * The genX designation typically refers to the render engine, so render
+ * capability related checks should use IS_GEN, while display and other checks
+ * have their own (e.g. HAS_PCH_SPLIT for ILK+ display, IS_foo for particular
+ * chips, etc.).
+ */
 #define IS_GEN2(dev)	(INTEL_INFO(dev)->gen == 2)
 #define IS_GEN3(dev)	(INTEL_INFO(dev)->gen == 3)
 #define IS_GEN4(dev)	(INTEL_INFO(dev)->gen == 4)
 #define IS_GEN5(dev)	(INTEL_INFO(dev)->gen == 5)
 #define IS_GEN6(dev)	(INTEL_INFO(dev)->gen == 6)
+#define IS_GEN7(dev)	(INTEL_INFO(dev)->gen == 7)
 
 #define HAS_BSD(dev)            (INTEL_INFO(dev)->has_bsd_ring)
 #define HAS_BLT(dev)            (INTEL_INFO(dev)->has_blt_ring)
@@ -948,8 +977,8 @@ enum intel_chip_family {
 #define HAS_PIPE_CXSR(dev) (INTEL_INFO(dev)->has_pipe_cxsr)
 #define I915_HAS_FBC(dev) (INTEL_INFO(dev)->has_fbc)
 
-#define HAS_PCH_SPLIT(dev) (IS_GEN5(dev) || IS_GEN6(dev))
-#define HAS_PIPE_CONTROL(dev) (IS_GEN5(dev) || IS_GEN6(dev))
+#define HAS_PCH_SPLIT(dev) (IS_GEN5(dev) || IS_GEN6(dev) || IS_IVYBRIDGE(dev))
+#define HAS_PIPE_CONTROL(dev) (INTEL_INFO(dev)->gen >= 5)
 
 #define INTEL_PCH_TYPE(dev) (((struct drm_i915_private *)(dev)->dev_private)->pch_type)
 #define HAS_PCH_CPT(dev) (INTEL_PCH_TYPE(dev) == PCH_CPT)
@@ -967,6 +996,7 @@ extern unsigned int i915_lvds_downclock;
 extern unsigned int i915_panel_use_ssc;
 extern int i915_vbt_sdvo_panel_type;
 extern unsigned int i915_enable_rc6;
+extern unsigned int i915_enable_fbc;
 
 extern int i915_suspend(struct drm_device *dev, pm_message_t state);
 extern int i915_resume(struct drm_device *dev);
@@ -1010,12 +1040,27 @@ extern irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS);
 extern void i915_driver_irq_preinstall(struct drm_device * dev);
 extern int i915_driver_irq_postinstall(struct drm_device *dev);
 extern void i915_driver_irq_uninstall(struct drm_device * dev);
+
+extern irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS);
+extern void ironlake_irq_preinstall(struct drm_device *dev);
+extern int ironlake_irq_postinstall(struct drm_device *dev);
+extern void ironlake_irq_uninstall(struct drm_device *dev);
+
+extern irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS);
+extern void ivybridge_irq_preinstall(struct drm_device *dev);
+extern int ivybridge_irq_postinstall(struct drm_device *dev);
+extern void ivybridge_irq_uninstall(struct drm_device *dev);
+
 extern int i915_vblank_pipe_set(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 extern int i915_vblank_pipe_get(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 extern int i915_enable_vblank(struct drm_device *dev, int crtc);
 extern void i915_disable_vblank(struct drm_device *dev, int crtc);
+extern int ironlake_enable_vblank(struct drm_device *dev, int crtc);
+extern void ironlake_disable_vblank(struct drm_device *dev, int crtc);
+extern int ivybridge_enable_vblank(struct drm_device *dev, int crtc);
+extern void ivybridge_disable_vblank(struct drm_device *dev, int crtc);
 extern u32 i915_get_vblank_counter(struct drm_device *dev, int crtc);
 extern u32 gm45_get_vblank_counter(struct drm_device *dev, int crtc);
 extern int i915_vblank_swap(struct drm_device *dev, void *data,
@@ -1265,6 +1310,7 @@ static inline void intel_unregister_dsm_handler(void) { return; }
 
 /* modesetting */
 extern void intel_modeset_init(struct drm_device *dev);
+extern void intel_modeset_gem_init(struct drm_device *dev);
 extern void intel_modeset_cleanup(struct drm_device *dev);
 extern int intel_modeset_vga_set_state(struct drm_device *dev, bool state);
 extern void i8xx_disable_fbc(struct drm_device *dev);
@@ -1312,13 +1358,34 @@ extern void intel_display_print_error_state(struct seq_file *m,
 		LOCK_TEST_WITH_RETURN(dev, file);			\
 } while (0)
 
+/* On SNB platform, before reading ring registers forcewake bit
+ * must be set to prevent GT core from power down and stale values being
+ * returned.
+ */
+void gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
+void gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
+void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
+
+/* We give fast paths for the really cool registers */
+#define NEEDS_FORCE_WAKE(dev_priv, reg) \
+	(((dev_priv)->info->gen >= 6) && \
+	((reg) < 0x40000) && \
+	((reg) != FORCEWAKE))
 
 #define __i915_read(x, y) \
 static inline u##x i915_read##x(struct drm_i915_private *dev_priv, u32 reg) { \
-	u##x val = read##y(dev_priv->regs + reg); \
+	u##x val = 0; \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		gen6_gt_force_wake_get(dev_priv); \
+		val = read##y(dev_priv->regs + reg); \
+		gen6_gt_force_wake_put(dev_priv); \
+	} else { \
+		val = read##y(dev_priv->regs + reg); \
+	} \
 	trace_i915_reg_rw(false, reg, val, sizeof(val)); \
 	return val; \
 }
+
 __i915_read(8, b)
 __i915_read(16, w)
 __i915_read(32, l)
@@ -1328,6 +1395,9 @@ __i915_read(64, q)
 #define __i915_write(x, y) \
 static inline void i915_write##x(struct drm_i915_private *dev_priv, u32 reg, u##x val) { \
 	trace_i915_reg_rw(true, reg, val, sizeof(val)); \
+	if (NEEDS_FORCE_WAKE((dev_priv), (reg))) { \
+		__gen6_gt_wait_for_fifo(dev_priv); \
+	} \
 	write##y(val, dev_priv->regs + reg); \
 }
 __i915_write(8, b)
@@ -1356,33 +1426,4 @@ __i915_write(64, q)
 #define POSTING_READ16(reg)	(void)I915_READ16_NOTRACE(reg)
 
 
-/* On SNB platform, before reading ring registers forcewake bit
- * must be set to prevent GT core from power down and stale values being
- * returned.
- */
-void __gen6_gt_force_wake_get(struct drm_i915_private *dev_priv);
-void __gen6_gt_force_wake_put(struct drm_i915_private *dev_priv);
-void __gen6_gt_wait_for_fifo(struct drm_i915_private *dev_priv);
-
-static inline u32 i915_gt_read(struct drm_i915_private *dev_priv, u32 reg)
-{
-	u32 val;
-
-	if (dev_priv->info->gen >= 6) {
-		__gen6_gt_force_wake_get(dev_priv);
-		val = I915_READ(reg);
-		__gen6_gt_force_wake_put(dev_priv);
-	} else
-		val = I915_READ(reg);
-
-	return val;
-}
-
-static inline void i915_gt_write(struct drm_i915_private *dev_priv,
-				u32 reg, u32 val)
-{
-	if (dev_priv->info->gen >= 6)
-		__gen6_gt_wait_for_fifo(dev_priv);
-	I915_WRITE(reg, val);
-}
 #endif
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 7ce3f353af33..c6289034e29a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2673,6 +2673,7 @@ i915_gem_object_get_fence(struct drm_i915_gem_object *obj,
 update:
 	obj->tiling_changed = false;
 	switch (INTEL_INFO(dev)->gen) {
+	case 7:
 	case 6:
 		ret = sandybridge_write_fence_reg(obj, pipelined);
 		break;
@@ -2706,6 +2707,7 @@ i915_gem_clear_fence_reg(struct drm_device *dev,
 	uint32_t fence_reg = reg - dev_priv->fence_regs;
 
 	switch (INTEL_INFO(dev)->gen) {
+	case 7:
 	case 6:
 		I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + fence_reg*8, 0);
 		break;
@@ -2878,6 +2880,17 @@ i915_gem_clflush_object(struct drm_i915_gem_object *obj)
 	if (obj->pages == NULL)
 		return;
 
+	/* If the GPU is snooping the contents of the CPU cache,
+	 * we do not need to manually clear the CPU cache lines.  However,
+	 * the caches are only snooped when the render cache is
+	 * flushed/invalidated.  As we always have to emit invalidations
+	 * and flushes when moving into and out of the RENDER domain, correct
+	 * snooping behaviour occurs naturally as the result of our domain
+	 * tracking.
+	 */
+	if (obj->cache_level != I915_CACHE_NONE)
+		return;
+
 	trace_i915_gem_object_clflush(obj);
 
 	drm_clflush_pages(obj->pages, obj->base.size / PAGE_SIZE);
@@ -3569,7 +3582,7 @@ struct drm_i915_gem_object *i915_gem_alloc_object(struct drm_device *dev,
 	obj->base.write_domain = I915_GEM_DOMAIN_CPU;
 	obj->base.read_domains = I915_GEM_DOMAIN_CPU;
 
-	obj->agp_type = AGP_USER_MEMORY;
+	obj->cache_level = I915_CACHE_NONE;
 	obj->base.driver_private = NULL;
 	obj->fence_reg = I915_FENCE_REG_NONE;
 	INIT_LIST_HEAD(&obj->mm_list);
@@ -3845,25 +3858,10 @@ i915_gem_load(struct drm_device *dev)
 		dev_priv->num_fence_regs = 8;
 
 	/* Initialize fence registers to zero */
-	switch (INTEL_INFO(dev)->gen) {
-	case 6:
-		for (i = 0; i < 16; i++)
-			I915_WRITE64(FENCE_REG_SANDYBRIDGE_0 + (i * 8), 0);
-		break;
-	case 5:
-	case 4:
-		for (i = 0; i < 16; i++)
-			I915_WRITE64(FENCE_REG_965_0 + (i * 8), 0);
-		break;
-	case 3:
-		if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
-			for (i = 0; i < 8; i++)
-				I915_WRITE(FENCE_REG_945_8 + (i * 4), 0);
-	case 2:
-		for (i = 0; i < 8; i++)
-			I915_WRITE(FENCE_REG_830_0 + (i * 4), 0);
-		break;
+	for (i = 0; i < dev_priv->num_fence_regs; i++) {
+		i915_gem_clear_fence_reg(dev, &dev_priv->fence_regs[i]);
 	}
+
 	i915_gem_detect_bit_6_swizzle(dev);
 	init_waitqueue_head(&dev_priv->pending_flip_queue);
 
diff --git a/drivers/gpu/drm/i915/i915_gem_gtt.c b/drivers/gpu/drm/i915/i915_gem_gtt.c
index b0abdc64aa9f..e46b645773cf 100644
--- a/drivers/gpu/drm/i915/i915_gem_gtt.c
+++ b/drivers/gpu/drm/i915/i915_gem_gtt.c
@@ -29,6 +29,26 @@
 #include "i915_trace.h"
 #include "intel_drv.h"
 
+/* XXX kill agp_type! */
+static unsigned int cache_level_to_agp_type(struct drm_device *dev,
+					    enum i915_cache_level cache_level)
+{
+	switch (cache_level) {
+	case I915_CACHE_LLC_MLC:
+		if (INTEL_INFO(dev)->gen >= 6)
+			return AGP_USER_CACHED_MEMORY_LLC_MLC;
+		/* Older chipsets do not have this extra level of CPU
+		 * cacheing, so fallthrough and request the PTE simply
+		 * as cached.
+		 */
+	case I915_CACHE_LLC:
+		return AGP_USER_CACHED_MEMORY;
+	default:
+	case I915_CACHE_NONE:
+		return AGP_USER_MEMORY;
+	}
+}
+
 void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -39,6 +59,9 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 			      (dev_priv->mm.gtt_end - dev_priv->mm.gtt_start) / PAGE_SIZE);
 
 	list_for_each_entry(obj, &dev_priv->mm.gtt_list, gtt_list) {
+		unsigned int agp_type =
+			cache_level_to_agp_type(dev, obj->cache_level);
+
 		i915_gem_clflush_object(obj);
 
 		if (dev_priv->mm.gtt->needs_dmar) {
@@ -46,15 +69,14 @@ void i915_gem_restore_gtt_mappings(struct drm_device *dev)
 
 			intel_gtt_insert_sg_entries(obj->sg_list,
 						    obj->num_sg,
-						    obj->gtt_space->start
-							>> PAGE_SHIFT,
-						    obj->agp_type);
+						    obj->gtt_space->start >> PAGE_SHIFT,
+						    agp_type);
 		} else
 			intel_gtt_insert_pages(obj->gtt_space->start
 						   >> PAGE_SHIFT,
 					       obj->base.size >> PAGE_SHIFT,
 					       obj->pages,
-					       obj->agp_type);
+					       agp_type);
 	}
 
 	intel_gtt_chipset_flush();
@@ -64,6 +86,7 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
 {
 	struct drm_device *dev = obj->base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
+	unsigned int agp_type = cache_level_to_agp_type(dev, obj->cache_level);
 	int ret;
 
 	if (dev_priv->mm.gtt->needs_dmar) {
@@ -77,12 +100,12 @@ int i915_gem_gtt_bind_object(struct drm_i915_gem_object *obj)
 		intel_gtt_insert_sg_entries(obj->sg_list,
 					    obj->num_sg,
 					    obj->gtt_space->start >> PAGE_SHIFT,
-					    obj->agp_type);
+					    agp_type);
 	} else
 		intel_gtt_insert_pages(obj->gtt_space->start >> PAGE_SHIFT,
 				       obj->base.size >> PAGE_SHIFT,
 				       obj->pages,
-				       obj->agp_type);
+				       agp_type);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 281ad3d6115d..82d70fd9e933 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -92,7 +92,7 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 	uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN;
 	uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN;
 
-	if (IS_GEN5(dev) || IS_GEN6(dev)) {
+	if (INTEL_INFO(dev)->gen >= 5) {
 		/* On Ironlake whatever DRAM config, GPU always do
 		 * same swizzling setup.
 		 */
diff --git a/drivers/gpu/drm/i915/i915_irq.c b/drivers/gpu/drm/i915/i915_irq.c
index 188b497e5076..b79619a7b788 100644
--- a/drivers/gpu/drm/i915/i915_irq.c
+++ b/drivers/gpu/drm/i915/i915_irq.c
@@ -367,22 +367,30 @@ static void notify_ring(struct drm_device *dev,
 		  jiffies + msecs_to_jiffies(DRM_I915_HANGCHECK_PERIOD));
 }
 
-static void gen6_pm_irq_handler(struct drm_device *dev)
+static void gen6_pm_rps_work(struct work_struct *work)
 {
-	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	drm_i915_private_t *dev_priv = container_of(work, drm_i915_private_t,
+						    rps_work);
 	u8 new_delay = dev_priv->cur_delay;
-	u32 pm_iir;
+	u32 pm_iir, pm_imr;
+
+	spin_lock_irq(&dev_priv->rps_lock);
+	pm_iir = dev_priv->pm_iir;
+	dev_priv->pm_iir = 0;
+	pm_imr = I915_READ(GEN6_PMIMR);
+	spin_unlock_irq(&dev_priv->rps_lock);
 
-	pm_iir = I915_READ(GEN6_PMIIR);
 	if (!pm_iir)
 		return;
 
+	mutex_lock(&dev_priv->dev->struct_mutex);
 	if (pm_iir & GEN6_PM_RP_UP_THRESHOLD) {
 		if (dev_priv->cur_delay != dev_priv->max_delay)
 			new_delay = dev_priv->cur_delay + 1;
 		if (new_delay > dev_priv->max_delay)
 			new_delay = dev_priv->max_delay;
 	} else if (pm_iir & (GEN6_PM_RP_DOWN_THRESHOLD | GEN6_PM_RP_DOWN_TIMEOUT)) {
+		gen6_gt_force_wake_get(dev_priv);
 		if (dev_priv->cur_delay != dev_priv->min_delay)
 			new_delay = dev_priv->cur_delay - 1;
 		if (new_delay < dev_priv->min_delay) {
@@ -396,13 +404,19 @@ static void gen6_pm_irq_handler(struct drm_device *dev)
 			I915_WRITE(GEN6_RP_INTERRUPT_LIMITS,
 				   I915_READ(GEN6_RP_INTERRUPT_LIMITS) & ~0x3f0000);
 		}
-
+		gen6_gt_force_wake_put(dev_priv);
 	}
 
-	gen6_set_rps(dev, new_delay);
+	gen6_set_rps(dev_priv->dev, new_delay);
 	dev_priv->cur_delay = new_delay;
 
-	I915_WRITE(GEN6_PMIIR, pm_iir);
+	/*
+	 * rps_lock not held here because clearing is non-destructive. There is
+	 * an *extremely* unlikely race with gen6_rps_enable() that is prevented
+	 * by holding struct_mutex for the duration of the write.
+	 */
+	I915_WRITE(GEN6_PMIMR, pm_imr & ~pm_iir);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
 static void pch_irq_handler(struct drm_device *dev)
@@ -448,8 +462,97 @@ static void pch_irq_handler(struct drm_device *dev)
 		DRM_DEBUG_DRIVER("PCH transcoder A underrun interrupt\n");
 }
 
-static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
+irqreturn_t ivybridge_irq_handler(DRM_IRQ_ARGS)
+{
+	struct drm_device *dev = (struct drm_device *) arg;
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	int ret = IRQ_NONE;
+	u32 de_iir, gt_iir, de_ier, pch_iir, pm_iir;
+	struct drm_i915_master_private *master_priv;
+
+	atomic_inc(&dev_priv->irq_received);
+
+	/* disable master interrupt before clearing iir  */
+	de_ier = I915_READ(DEIER);
+	I915_WRITE(DEIER, de_ier & ~DE_MASTER_IRQ_CONTROL);
+	POSTING_READ(DEIER);
+
+	de_iir = I915_READ(DEIIR);
+	gt_iir = I915_READ(GTIIR);
+	pch_iir = I915_READ(SDEIIR);
+	pm_iir = I915_READ(GEN6_PMIIR);
+
+	if (de_iir == 0 && gt_iir == 0 && pch_iir == 0 && pm_iir == 0)
+		goto done;
+
+	ret = IRQ_HANDLED;
+
+	if (dev->primary->master) {
+		master_priv = dev->primary->master->driver_priv;
+		if (master_priv->sarea_priv)
+			master_priv->sarea_priv->last_dispatch =
+				READ_BREADCRUMB(dev_priv);
+	}
+
+	if (gt_iir & (GT_USER_INTERRUPT | GT_PIPE_NOTIFY))
+		notify_ring(dev, &dev_priv->ring[RCS]);
+	if (gt_iir & GT_GEN6_BSD_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->ring[VCS]);
+	if (gt_iir & GT_BLT_USER_INTERRUPT)
+		notify_ring(dev, &dev_priv->ring[BCS]);
+
+	if (de_iir & DE_GSE_IVB)
+		intel_opregion_gse_intr(dev);
+
+	if (de_iir & DE_PLANEA_FLIP_DONE_IVB) {
+		intel_prepare_page_flip(dev, 0);
+		intel_finish_page_flip_plane(dev, 0);
+	}
+
+	if (de_iir & DE_PLANEB_FLIP_DONE_IVB) {
+		intel_prepare_page_flip(dev, 1);
+		intel_finish_page_flip_plane(dev, 1);
+	}
+
+	if (de_iir & DE_PIPEA_VBLANK_IVB)
+		drm_handle_vblank(dev, 0);
+
+	if (de_iir & DE_PIPEB_VBLANK_IVB);
+		drm_handle_vblank(dev, 1);
+
+	/* check event from PCH */
+	if (de_iir & DE_PCH_EVENT_IVB) {
+		if (pch_iir & SDE_HOTPLUG_MASK_CPT)
+			queue_work(dev_priv->wq, &dev_priv->hotplug_work);
+		pch_irq_handler(dev);
+	}
+
+	if (pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+		unsigned long flags;
+		spin_lock_irqsave(&dev_priv->rps_lock, flags);
+		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+		I915_WRITE(GEN6_PMIMR, pm_iir);
+		dev_priv->pm_iir |= pm_iir;
+		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+		queue_work(dev_priv->wq, &dev_priv->rps_work);
+	}
+
+	/* should clear PCH hotplug event before clear CPU irq */
+	I915_WRITE(SDEIIR, pch_iir);
+	I915_WRITE(GTIIR, gt_iir);
+	I915_WRITE(DEIIR, de_iir);
+	I915_WRITE(GEN6_PMIIR, pm_iir);
+
+done:
+	I915_WRITE(DEIER, de_ier);
+	POSTING_READ(DEIER);
+
+	return ret;
+}
+
+irqreturn_t ironlake_irq_handler(DRM_IRQ_ARGS)
 {
+	struct drm_device *dev = (struct drm_device *) arg;
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	int ret = IRQ_NONE;
 	u32 de_iir, gt_iir, de_ier, pch_iir, pm_iir;
@@ -457,6 +560,8 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 	struct drm_i915_master_private *master_priv;
 	u32 bsd_usr_interrupt = GT_BSD_USER_INTERRUPT;
 
+	atomic_inc(&dev_priv->irq_received);
+
 	if (IS_GEN6(dev))
 		bsd_usr_interrupt = GT_GEN6_BSD_USER_INTERRUPT;
 
@@ -526,13 +631,30 @@ static irqreturn_t ironlake_irq_handler(struct drm_device *dev)
 		i915_handle_rps_change(dev);
 	}
 
-	if (IS_GEN6(dev))
-		gen6_pm_irq_handler(dev);
+	if (IS_GEN6(dev) && pm_iir & GEN6_PM_DEFERRED_EVENTS) {
+		/*
+		 * IIR bits should never already be set because IMR should
+		 * prevent an interrupt from being shown in IIR. The warning
+		 * displays a case where we've unsafely cleared
+		 * dev_priv->pm_iir. Although missing an interrupt of the same
+		 * type is not a problem, it displays a problem in the logic.
+		 *
+		 * The mask bit in IMR is cleared by rps_work.
+		 */
+		unsigned long flags;
+		spin_lock_irqsave(&dev_priv->rps_lock, flags);
+		WARN(dev_priv->pm_iir & pm_iir, "Missed a PM interrupt\n");
+		I915_WRITE(GEN6_PMIMR, pm_iir);
+		dev_priv->pm_iir |= pm_iir;
+		spin_unlock_irqrestore(&dev_priv->rps_lock, flags);
+		queue_work(dev_priv->wq, &dev_priv->rps_work);
+	}
 
 	/* should clear PCH hotplug event before clear CPU irq */
 	I915_WRITE(SDEIIR, pch_iir);
 	I915_WRITE(GTIIR, gt_iir);
 	I915_WRITE(DEIIR, de_iir);
+	I915_WRITE(GEN6_PMIIR, pm_iir);
 
 done:
 	I915_WRITE(DEIER, de_ier);
@@ -676,7 +798,7 @@ static u32 capture_bo_list(struct drm_i915_error_buffer *err,
 		err->dirty = obj->dirty;
 		err->purgeable = obj->madv != I915_MADV_WILLNEED;
 		err->ring = obj->ring ? obj->ring->id : 0;
-		err->agp_type = obj->agp_type == AGP_USER_CACHED_MEMORY;
+		err->cache_level = obj->cache_level;
 
 		if (++i == count)
 			break;
@@ -1103,9 +1225,6 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)
 
 	atomic_inc(&dev_priv->irq_received);
 
-	if (HAS_PCH_SPLIT(dev))
-		return ironlake_irq_handler(dev);
-
 	iir = I915_READ(IIR);
 
 	if (INTEL_INFO(dev)->gen >= 4)
@@ -1344,10 +1463,7 @@ int i915_enable_vblank(struct drm_device *dev, int pipe)
 		return -EINVAL;
 
 	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
-	if (HAS_PCH_SPLIT(dev))
-		ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
-					    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
-	else if (INTEL_INFO(dev)->gen >= 4)
+	if (INTEL_INFO(dev)->gen >= 4)
 		i915_enable_pipestat(dev_priv, pipe,
 				     PIPE_START_VBLANK_INTERRUPT_ENABLE);
 	else
@@ -1362,6 +1478,38 @@ int i915_enable_vblank(struct drm_device *dev, int pipe)
 	return 0;
 }
 
+int ironlake_enable_vblank(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	unsigned long irqflags;
+
+	if (!i915_pipe_enabled(dev, pipe))
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
+				    DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	return 0;
+}
+
+int ivybridge_enable_vblank(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	unsigned long irqflags;
+
+	if (!i915_pipe_enabled(dev, pipe))
+		return -EINVAL;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	ironlake_enable_display_irq(dev_priv, (pipe == 0) ?
+				    DE_PIPEA_VBLANK_IVB : DE_PIPEB_VBLANK_IVB);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+
+	return 0;
+}
+
 /* Called from drm generic code, passed 'crtc' which
  * we use as a pipe index
  */
@@ -1375,13 +1523,31 @@ void i915_disable_vblank(struct drm_device *dev, int pipe)
 		I915_WRITE(INSTPM,
 			   INSTPM_AGPBUSY_DIS << 16 | INSTPM_AGPBUSY_DIS);
 
-	if (HAS_PCH_SPLIT(dev))
-		ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
-					     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
-	else
-		i915_disable_pipestat(dev_priv, pipe,
-				      PIPE_VBLANK_INTERRUPT_ENABLE |
-				      PIPE_START_VBLANK_INTERRUPT_ENABLE);
+	i915_disable_pipestat(dev_priv, pipe,
+			      PIPE_VBLANK_INTERRUPT_ENABLE |
+			      PIPE_START_VBLANK_INTERRUPT_ENABLE);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+void ironlake_disable_vblank(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
+				     DE_PIPEA_VBLANK: DE_PIPEB_VBLANK);
+	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
+}
+
+void ivybridge_disable_vblank(struct drm_device *dev, int pipe)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	unsigned long irqflags;
+
+	spin_lock_irqsave(&dev_priv->irq_lock, irqflags);
+	ironlake_disable_display_irq(dev_priv, (pipe == 0) ?
+				     DE_PIPEA_VBLANK_IVB : DE_PIPEB_VBLANK_IVB);
 	spin_unlock_irqrestore(&dev_priv->irq_lock, irqflags);
 }
 
@@ -1562,10 +1728,17 @@ repeat:
 
 /* drm_dma.h hooks
 */
-static void ironlake_irq_preinstall(struct drm_device *dev)
+void ironlake_irq_preinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 
+	atomic_set(&dev_priv->irq_received, 0);
+
+	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
+	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
+	if (IS_GEN6(dev) || IS_IVYBRIDGE(dev))
+		INIT_WORK(&dev_priv->rps_work, gen6_pm_rps_work);
+
 	I915_WRITE(HWSTAM, 0xeffe);
 
 	/* XXX hotplug from PCH */
@@ -1585,7 +1758,7 @@ static void ironlake_irq_preinstall(struct drm_device *dev)
 	POSTING_READ(SDEIER);
 }
 
-static int ironlake_irq_postinstall(struct drm_device *dev)
+int ironlake_irq_postinstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
 	/* enable kind of interrupts always enabled */
@@ -1594,6 +1767,13 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	u32 render_irqs;
 	u32 hotplug_mask;
 
+	DRM_INIT_WAITQUEUE(&dev_priv->ring[RCS].irq_queue);
+	if (HAS_BSD(dev))
+		DRM_INIT_WAITQUEUE(&dev_priv->ring[VCS].irq_queue);
+	if (HAS_BLT(dev))
+		DRM_INIT_WAITQUEUE(&dev_priv->ring[BCS].irq_queue);
+
+	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 	dev_priv->irq_mask = ~display_mask;
 
 	/* should always can generate irq */
@@ -1650,6 +1830,56 @@ static int ironlake_irq_postinstall(struct drm_device *dev)
 	return 0;
 }
 
+int ivybridge_irq_postinstall(struct drm_device *dev)
+{
+	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+	/* enable kind of interrupts always enabled */
+	u32 display_mask = DE_MASTER_IRQ_CONTROL | DE_GSE_IVB |
+		DE_PCH_EVENT_IVB | DE_PLANEA_FLIP_DONE_IVB |
+		DE_PLANEB_FLIP_DONE_IVB;
+	u32 render_irqs;
+	u32 hotplug_mask;
+
+	DRM_INIT_WAITQUEUE(&dev_priv->ring[RCS].irq_queue);
+	if (HAS_BSD(dev))
+		DRM_INIT_WAITQUEUE(&dev_priv->ring[VCS].irq_queue);
+	if (HAS_BLT(dev))
+		DRM_INIT_WAITQUEUE(&dev_priv->ring[BCS].irq_queue);
+
+	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
+	dev_priv->irq_mask = ~display_mask;
+
+	/* should always can generate irq */
+	I915_WRITE(DEIIR, I915_READ(DEIIR));
+	I915_WRITE(DEIMR, dev_priv->irq_mask);
+	I915_WRITE(DEIER, display_mask | DE_PIPEA_VBLANK_IVB |
+		   DE_PIPEB_VBLANK_IVB);
+	POSTING_READ(DEIER);
+
+	dev_priv->gt_irq_mask = ~0;
+
+	I915_WRITE(GTIIR, I915_READ(GTIIR));
+	I915_WRITE(GTIMR, dev_priv->gt_irq_mask);
+
+	render_irqs = GT_USER_INTERRUPT | GT_GEN6_BSD_USER_INTERRUPT |
+		GT_BLT_USER_INTERRUPT;
+	I915_WRITE(GTIER, render_irqs);
+	POSTING_READ(GTIER);
+
+	hotplug_mask = (SDE_CRT_HOTPLUG_CPT |
+			SDE_PORTB_HOTPLUG_CPT |
+			SDE_PORTC_HOTPLUG_CPT |
+			SDE_PORTD_HOTPLUG_CPT);
+	dev_priv->pch_irq_mask = ~hotplug_mask;
+
+	I915_WRITE(SDEIIR, I915_READ(SDEIIR));
+	I915_WRITE(SDEIMR, dev_priv->pch_irq_mask);
+	I915_WRITE(SDEIER, hotplug_mask);
+	POSTING_READ(SDEIER);
+
+	return 0;
+}
+
 void i915_driver_irq_preinstall(struct drm_device * dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
@@ -1660,11 +1890,6 @@ void i915_driver_irq_preinstall(struct drm_device * dev)
 	INIT_WORK(&dev_priv->hotplug_work, i915_hotplug_work_func);
 	INIT_WORK(&dev_priv->error_work, i915_error_work_func);
 
-	if (HAS_PCH_SPLIT(dev)) {
-		ironlake_irq_preinstall(dev);
-		return;
-	}
-
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
@@ -1688,17 +1913,8 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	u32 enable_mask = I915_INTERRUPT_ENABLE_FIX | I915_INTERRUPT_ENABLE_VAR;
 	u32 error_mask;
 
-	DRM_INIT_WAITQUEUE(&dev_priv->ring[RCS].irq_queue);
-	if (HAS_BSD(dev))
-		DRM_INIT_WAITQUEUE(&dev_priv->ring[VCS].irq_queue);
-	if (HAS_BLT(dev))
-		DRM_INIT_WAITQUEUE(&dev_priv->ring[BCS].irq_queue);
-
 	dev_priv->vblank_pipe = DRM_I915_VBLANK_PIPE_A | DRM_I915_VBLANK_PIPE_B;
 
-	if (HAS_PCH_SPLIT(dev))
-		return ironlake_irq_postinstall(dev);
-
 	/* Unmask the interrupts that we always want on. */
 	dev_priv->irq_mask = ~I915_INTERRUPT_ENABLE_FIX;
 
@@ -1767,9 +1983,15 @@ int i915_driver_irq_postinstall(struct drm_device *dev)
 	return 0;
 }
 
-static void ironlake_irq_uninstall(struct drm_device *dev)
+void ironlake_irq_uninstall(struct drm_device *dev)
 {
 	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;
+
+	if (!dev_priv)
+		return;
+
+	dev_priv->vblank_pipe = 0;
+
 	I915_WRITE(HWSTAM, 0xffffffff);
 
 	I915_WRITE(DEIMR, 0xffffffff);
@@ -1791,11 +2013,6 @@ void i915_driver_irq_uninstall(struct drm_device * dev)
 
 	dev_priv->vblank_pipe = 0;
 
-	if (HAS_PCH_SPLIT(dev)) {
-		ironlake_irq_uninstall(dev);
-		return;
-	}
-
 	if (I915_HAS_HOTPLUG(dev)) {
 		I915_WRITE(PORT_HOTPLUG_EN, 0);
 		I915_WRITE(PORT_HOTPLUG_STAT, I915_READ(PORT_HOTPLUG_STAT));
diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
index f39ac3a0fa93..2f967af8e62e 100644
--- a/drivers/gpu/drm/i915/i915_reg.h
+++ b/drivers/gpu/drm/i915/i915_reg.h
@@ -291,6 +291,9 @@
 #define RING_MAX_IDLE(base)	((base)+0x54)
 #define RING_HWS_PGA(base)	((base)+0x80)
 #define RING_HWS_PGA_GEN6(base)	((base)+0x2080)
+#define RENDER_HWS_PGA_GEN7	(0x04080)
+#define BSD_HWS_PGA_GEN7	(0x04180)
+#define BLT_HWS_PGA_GEN7	(0x04280)
 #define RING_ACTHD(base)	((base)+0x74)
 #define RING_NOPID(base)	((base)+0x94)
 #define RING_IMR(base)		((base)+0xa8)
@@ -2778,6 +2781,19 @@
 #define DE_PIPEA_VSYNC          (1 << 3)
 #define DE_PIPEA_FIFO_UNDERRUN  (1 << 0)
 
+/* More Ivybridge lolz */
+#define DE_ERR_DEBUG_IVB		(1<<30)
+#define DE_GSE_IVB			(1<<29)
+#define DE_PCH_EVENT_IVB		(1<<28)
+#define DE_DP_A_HOTPLUG_IVB		(1<<27)
+#define DE_AUX_CHANNEL_A_IVB		(1<<26)
+#define DE_SPRITEB_FLIP_DONE_IVB	(1<<9)
+#define DE_SPRITEA_FLIP_DONE_IVB	(1<<4)
+#define DE_PLANEB_FLIP_DONE_IVB		(1<<8)
+#define DE_PLANEA_FLIP_DONE_IVB		(1<<3)
+#define DE_PIPEB_VBLANK_IVB		(1<<5)
+#define DE_PIPEA_VBLANK_IVB		(1<<0)
+
 #define DEISR   0x44000
 #define DEIMR   0x44004
 #define DEIIR   0x44008
@@ -2809,6 +2825,7 @@
 #define  ILK_eDP_A_DISABLE		(1<<24)
 #define  ILK_DESKTOP			(1<<23)
 #define ILK_DSPCLK_GATE		0x42020
+#define  IVB_VRHUNIT_CLK_GATE	(1<<28)
 #define  ILK_DPARB_CLK_GATE	(1<<5)
 #define  ILK_DPFD_CLK_GATE	(1<<7)
 
@@ -3057,6 +3074,9 @@
 #define  TRANS_6BPC             (2<<5)
 #define  TRANS_12BPC            (3<<5)
 
+#define SOUTH_CHICKEN2		0xc2004
+#define  DPLS_EDP_PPS_FIX_DIS	(1<<0)
+
 #define _FDI_RXA_CHICKEN         0xc200c
 #define _FDI_RXB_CHICKEN         0xc2010
 #define  FDI_RX_PHASE_SYNC_POINTER_OVR	(1<<1)
@@ -3104,7 +3124,15 @@
 #define  FDI_TX_ENHANCE_FRAME_ENABLE    (1<<18)
 /* Ironlake: hardwired to 1 */
 #define  FDI_TX_PLL_ENABLE              (1<<14)
+
+/* Ivybridge has different bits for lolz */
+#define  FDI_LINK_TRAIN_PATTERN_1_IVB       (0<<8)
+#define  FDI_LINK_TRAIN_PATTERN_2_IVB       (1<<8)
+#define  FDI_LINK_TRAIN_PATTERN_IDLE_IVB    (2<<8)
+#define  FDI_LINK_TRAIN_NONE_IVB            (3<<8)
+
 /* both Tx and Rx */
+#define  FDI_LINK_TRAIN_AUTO		(1<<10)
 #define  FDI_SCRAMBLING_ENABLE          (0<<7)
 #define  FDI_SCRAMBLING_DISABLE         (1<<7)
 
@@ -3114,6 +3142,8 @@
 #define FDI_RX_CTL(pipe) _PIPE(pipe, _FDI_RXA_CTL, _FDI_RXB_CTL)
 #define  FDI_RX_ENABLE          (1<<31)
 /* train, dp width same as FDI_TX */
+#define  FDI_FS_ERRC_ENABLE		(1<<27)
+#define  FDI_FE_ERRC_ENABLE		(1<<26)
 #define  FDI_DP_PORT_WIDTH_X8           (7<<19)
 #define  FDI_8BPC                       (0<<16)
 #define  FDI_10BPC                      (1<<16)
@@ -3386,7 +3416,7 @@
 #define GEN6_PMINTRMSK				0xA168
 
 #define GEN6_PMISR				0x44020
-#define GEN6_PMIMR				0x44024
+#define GEN6_PMIMR				0x44024 /* rps_lock */
 #define GEN6_PMIIR				0x44028
 #define GEN6_PMIER				0x4402C
 #define  GEN6_PM_MBOX_EVENT			(1<<25)
@@ -3396,6 +3426,9 @@
 #define  GEN6_PM_RP_DOWN_THRESHOLD		(1<<4)
 #define  GEN6_PM_RP_UP_EI_EXPIRED		(1<<2)
 #define  GEN6_PM_RP_DOWN_EI_EXPIRED		(1<<1)
+#define  GEN6_PM_DEFERRED_EVENTS		(GEN6_PM_RP_UP_THRESHOLD | \
+						 GEN6_PM_RP_DOWN_THRESHOLD | \
+						 GEN6_PM_RP_DOWN_TIMEOUT)
 
 #define GEN6_PCODE_MAILBOX			0x138124
 #define   GEN6_PCODE_READY			(1<<31)
diff --git a/drivers/gpu/drm/i915/i915_suspend.c b/drivers/gpu/drm/i915/i915_suspend.c
index da474153a0a2..60a94d2b5264 100644
--- a/drivers/gpu/drm/i915/i915_suspend.c
+++ b/drivers/gpu/drm/i915/i915_suspend.c
@@ -863,8 +863,7 @@ int i915_restore_state(struct drm_device *dev)
 		I915_WRITE(IMR, dev_priv->saveIMR);
 	}
 
-	/* Clock gating state */
-	intel_enable_clock_gating(dev);
+	intel_init_clock_gating(dev);
 
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_enable_drps(dev);
diff --git a/drivers/gpu/drm/i915/intel_bios.c b/drivers/gpu/drm/i915/intel_bios.c
index fb5b4d426ae0..927442a11925 100644
--- a/drivers/gpu/drm/i915/intel_bios.c
+++ b/drivers/gpu/drm/i915/intel_bios.c
@@ -214,9 +214,9 @@ parse_lfp_panel_data(struct drm_i915_private *dev_priv,
 	    i915_lvds_downclock) {
 		dev_priv->lvds_downclock_avail = 1;
 		dev_priv->lvds_downclock = temp_downclock;
-		DRM_DEBUG_KMS("LVDS downclock is found in VBT. ",
-				"Normal Clock %dKHz, downclock %dKHz\n",
-				temp_downclock, panel_fixed_mode->clock);
+		DRM_DEBUG_KMS("LVDS downclock is found in VBT. "
+			      "Normal Clock %dKHz, downclock %dKHz\n",
+			      temp_downclock, panel_fixed_mode->clock);
 	}
 	return;
 }
diff --git a/drivers/gpu/drm/i915/intel_crt.c b/drivers/gpu/drm/i915/intel_crt.c
index d03fc05b39c0..e93f93cc7e78 100644
--- a/drivers/gpu/drm/i915/intel_crt.c
+++ b/drivers/gpu/drm/i915/intel_crt.c
@@ -305,13 +305,11 @@ static bool intel_crt_detect_ddc(struct drm_connector *connector)
 }
 
 static enum drm_connector_status
-intel_crt_load_detect(struct drm_crtc *crtc, struct intel_crt *crt)
+intel_crt_load_detect(struct intel_crt *crt)
 {
-	struct drm_encoder *encoder = &crt->base.base;
-	struct drm_device *dev = encoder->dev;
+	struct drm_device *dev = crt->base.base.dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
-	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
-	uint32_t pipe = intel_crtc->pipe;
+	uint32_t pipe = to_intel_crtc(crt->base.base.crtc)->pipe;
 	uint32_t save_bclrpat;
 	uint32_t save_vtotal;
 	uint32_t vtotal, vactive;
@@ -432,7 +430,6 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	struct drm_device *dev = connector->dev;
 	struct intel_crt *crt = intel_attached_crt(connector);
 	struct drm_crtc *crtc;
-	int dpms_mode;
 	enum drm_connector_status status;
 
 	if (I915_HAS_HOTPLUG(dev)) {
@@ -454,17 +451,18 @@ intel_crt_detect(struct drm_connector *connector, bool force)
 	/* for pre-945g platforms use load detect */
 	crtc = crt->base.base.crtc;
 	if (crtc && crtc->enabled) {
-		status = intel_crt_load_detect(crtc, crt);
+		status = intel_crt_load_detect(crt);
 	} else {
-		crtc = intel_get_load_detect_pipe(&crt->base, connector,
-						  NULL, &dpms_mode);
-		if (crtc) {
+		struct intel_load_detect_pipe tmp;
+
+		if (intel_get_load_detect_pipe(&crt->base, connector, NULL,
+					       &tmp)) {
 			if (intel_crt_detect_ddc(connector))
 				status = connector_status_connected;
 			else
-				status = intel_crt_load_detect(crtc, crt);
-			intel_release_load_detect_pipe(&crt->base,
-						       connector, dpms_mode);
+				status = intel_crt_load_detect(crt);
+			intel_release_load_detect_pipe(&crt->base, connector,
+						       &tmp);
 		} else
 			status = connector_status_unknown;
 	}
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 2166ee071ddb..f553ddfdc168 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -76,255 +76,6 @@ struct intel_limit {
 		      int, int, intel_clock_t *);
 };
 
-#define I8XX_DOT_MIN		  25000
-#define I8XX_DOT_MAX		 350000
-#define I8XX_VCO_MIN		 930000
-#define I8XX_VCO_MAX		1400000
-#define I8XX_N_MIN		      3
-#define I8XX_N_MAX		     16
-#define I8XX_M_MIN		     96
-#define I8XX_M_MAX		    140
-#define I8XX_M1_MIN		     18
-#define I8XX_M1_MAX		     26
-#define I8XX_M2_MIN		      6
-#define I8XX_M2_MAX		     16
-#define I8XX_P_MIN		      4
-#define I8XX_P_MAX		    128
-#define I8XX_P1_MIN		      2
-#define I8XX_P1_MAX		     33
-#define I8XX_P1_LVDS_MIN	      1
-#define I8XX_P1_LVDS_MAX	      6
-#define I8XX_P2_SLOW		      4
-#define I8XX_P2_FAST		      2
-#define I8XX_P2_LVDS_SLOW	      14
-#define I8XX_P2_LVDS_FAST	      7
-#define I8XX_P2_SLOW_LIMIT	 165000
-
-#define I9XX_DOT_MIN		  20000
-#define I9XX_DOT_MAX		 400000
-#define I9XX_VCO_MIN		1400000
-#define I9XX_VCO_MAX		2800000
-#define PINEVIEW_VCO_MIN		1700000
-#define PINEVIEW_VCO_MAX		3500000
-#define I9XX_N_MIN		      1
-#define I9XX_N_MAX		      6
-/* Pineview's Ncounter is a ring counter */
-#define PINEVIEW_N_MIN		      3
-#define PINEVIEW_N_MAX		      6
-#define I9XX_M_MIN		     70
-#define I9XX_M_MAX		    120
-#define PINEVIEW_M_MIN		      2
-#define PINEVIEW_M_MAX		    256
-#define I9XX_M1_MIN		     10
-#define I9XX_M1_MAX		     22
-#define I9XX_M2_MIN		      5
-#define I9XX_M2_MAX		      9
-/* Pineview M1 is reserved, and must be 0 */
-#define PINEVIEW_M1_MIN		      0
-#define PINEVIEW_M1_MAX		      0
-#define PINEVIEW_M2_MIN		      0
-#define PINEVIEW_M2_MAX		      254
-#define I9XX_P_SDVO_DAC_MIN	      5
-#define I9XX_P_SDVO_DAC_MAX	     80
-#define I9XX_P_LVDS_MIN		      7
-#define I9XX_P_LVDS_MAX		     98
-#define PINEVIEW_P_LVDS_MIN		      7
-#define PINEVIEW_P_LVDS_MAX		     112
-#define I9XX_P1_MIN		      1
-#define I9XX_P1_MAX		      8
-#define I9XX_P2_SDVO_DAC_SLOW		     10
-#define I9XX_P2_SDVO_DAC_FAST		      5
-#define I9XX_P2_SDVO_DAC_SLOW_LIMIT	 200000
-#define I9XX_P2_LVDS_SLOW		     14
-#define I9XX_P2_LVDS_FAST		      7
-#define I9XX_P2_LVDS_SLOW_LIMIT		 112000
-
-/*The parameter is for SDVO on G4x platform*/
-#define G4X_DOT_SDVO_MIN           25000
-#define G4X_DOT_SDVO_MAX           270000
-#define G4X_VCO_MIN                1750000
-#define G4X_VCO_MAX                3500000
-#define G4X_N_SDVO_MIN             1
-#define G4X_N_SDVO_MAX             4
-#define G4X_M_SDVO_MIN             104
-#define G4X_M_SDVO_MAX             138
-#define G4X_M1_SDVO_MIN            17
-#define G4X_M1_SDVO_MAX            23
-#define G4X_M2_SDVO_MIN            5
-#define G4X_M2_SDVO_MAX            11
-#define G4X_P_SDVO_MIN             10
-#define G4X_P_SDVO_MAX             30
-#define G4X_P1_SDVO_MIN            1
-#define G4X_P1_SDVO_MAX            3
-#define G4X_P2_SDVO_SLOW           10
-#define G4X_P2_SDVO_FAST           10
-#define G4X_P2_SDVO_LIMIT          270000
-
-/*The parameter is for HDMI_DAC on G4x platform*/
-#define G4X_DOT_HDMI_DAC_MIN           22000
-#define G4X_DOT_HDMI_DAC_MAX           400000
-#define G4X_N_HDMI_DAC_MIN             1
-#define G4X_N_HDMI_DAC_MAX             4
-#define G4X_M_HDMI_DAC_MIN             104
-#define G4X_M_HDMI_DAC_MAX             138
-#define G4X_M1_HDMI_DAC_MIN            16
-#define G4X_M1_HDMI_DAC_MAX            23
-#define G4X_M2_HDMI_DAC_MIN            5
-#define G4X_M2_HDMI_DAC_MAX            11
-#define G4X_P_HDMI_DAC_MIN             5
-#define G4X_P_HDMI_DAC_MAX             80
-#define G4X_P1_HDMI_DAC_MIN            1
-#define G4X_P1_HDMI_DAC_MAX            8
-#define G4X_P2_HDMI_DAC_SLOW           10
-#define G4X_P2_HDMI_DAC_FAST           5
-#define G4X_P2_HDMI_DAC_LIMIT          165000
-
-/*The parameter is for SINGLE_CHANNEL_LVDS on G4x platform*/
-#define G4X_DOT_SINGLE_CHANNEL_LVDS_MIN           20000
-#define G4X_DOT_SINGLE_CHANNEL_LVDS_MAX           115000
-#define G4X_N_SINGLE_CHANNEL_LVDS_MIN             1
-#define G4X_N_SINGLE_CHANNEL_LVDS_MAX             3
-#define G4X_M_SINGLE_CHANNEL_LVDS_MIN             104
-#define G4X_M_SINGLE_CHANNEL_LVDS_MAX             138
-#define G4X_M1_SINGLE_CHANNEL_LVDS_MIN            17
-#define G4X_M1_SINGLE_CHANNEL_LVDS_MAX            23
-#define G4X_M2_SINGLE_CHANNEL_LVDS_MIN            5
-#define G4X_M2_SINGLE_CHANNEL_LVDS_MAX            11
-#define G4X_P_SINGLE_CHANNEL_LVDS_MIN             28
-#define G4X_P_SINGLE_CHANNEL_LVDS_MAX             112
-#define G4X_P1_SINGLE_CHANNEL_LVDS_MIN            2
-#define G4X_P1_SINGLE_CHANNEL_LVDS_MAX            8
-#define G4X_P2_SINGLE_CHANNEL_LVDS_SLOW           14
-#define G4X_P2_SINGLE_CHANNEL_LVDS_FAST           14
-#define G4X_P2_SINGLE_CHANNEL_LVDS_LIMIT          0
-
-/*The parameter is for DUAL_CHANNEL_LVDS on G4x platform*/
-#define G4X_DOT_DUAL_CHANNEL_LVDS_MIN           80000
-#define G4X_DOT_DUAL_CHANNEL_LVDS_MAX           224000
-#define G4X_N_DUAL_CHANNEL_LVDS_MIN             1
-#define G4X_N_DUAL_CHANNEL_LVDS_MAX             3
-#define G4X_M_DUAL_CHANNEL_LVDS_MIN             104
-#define G4X_M_DUAL_CHANNEL_LVDS_MAX             138
-#define G4X_M1_DUAL_CHANNEL_LVDS_MIN            17
-#define G4X_M1_DUAL_CHANNEL_LVDS_MAX            23
-#define G4X_M2_DUAL_CHANNEL_LVDS_MIN            5
-#define G4X_M2_DUAL_CHANNEL_LVDS_MAX            11
-#define G4X_P_DUAL_CHANNEL_LVDS_MIN             14
-#define G4X_P_DUAL_CHANNEL_LVDS_MAX             42
-#define G4X_P1_DUAL_CHANNEL_LVDS_MIN            2
-#define G4X_P1_DUAL_CHANNEL_LVDS_MAX            6
-#define G4X_P2_DUAL_CHANNEL_LVDS_SLOW           7
-#define G4X_P2_DUAL_CHANNEL_LVDS_FAST           7
-#define G4X_P2_DUAL_CHANNEL_LVDS_LIMIT          0
-
-/*The parameter is for DISPLAY PORT on G4x platform*/
-#define G4X_DOT_DISPLAY_PORT_MIN           161670
-#define G4X_DOT_DISPLAY_PORT_MAX           227000
-#define G4X_N_DISPLAY_PORT_MIN             1
-#define G4X_N_DISPLAY_PORT_MAX             2
-#define G4X_M_DISPLAY_PORT_MIN             97
-#define G4X_M_DISPLAY_PORT_MAX             108
-#define G4X_M1_DISPLAY_PORT_MIN            0x10
-#define G4X_M1_DISPLAY_PORT_MAX            0x12
-#define G4X_M2_DISPLAY_PORT_MIN            0x05
-#define G4X_M2_DISPLAY_PORT_MAX            0x06
-#define G4X_P_DISPLAY_PORT_MIN             10
-#define G4X_P_DISPLAY_PORT_MAX             20
-#define G4X_P1_DISPLAY_PORT_MIN            1
-#define G4X_P1_DISPLAY_PORT_MAX            2
-#define G4X_P2_DISPLAY_PORT_SLOW           10
-#define G4X_P2_DISPLAY_PORT_FAST           10
-#define G4X_P2_DISPLAY_PORT_LIMIT          0
-
-/* Ironlake / Sandybridge */
-/* as we calculate clock using (register_value + 2) for
-   N/M1/M2, so here the range value for them is (actual_value-2).
- */
-#define IRONLAKE_DOT_MIN         25000
-#define IRONLAKE_DOT_MAX         350000
-#define IRONLAKE_VCO_MIN         1760000
-#define IRONLAKE_VCO_MAX         3510000
-#define IRONLAKE_M1_MIN          12
-#define IRONLAKE_M1_MAX          22
-#define IRONLAKE_M2_MIN          5
-#define IRONLAKE_M2_MAX          9
-#define IRONLAKE_P2_DOT_LIMIT    225000 /* 225Mhz */
-
-/* We have parameter ranges for different type of outputs. */
-
-/* DAC & HDMI Refclk 120Mhz */
-#define IRONLAKE_DAC_N_MIN	1
-#define IRONLAKE_DAC_N_MAX	5
-#define IRONLAKE_DAC_M_MIN	79
-#define IRONLAKE_DAC_M_MAX	127
-#define IRONLAKE_DAC_P_MIN	5
-#define IRONLAKE_DAC_P_MAX	80
-#define IRONLAKE_DAC_P1_MIN	1
-#define IRONLAKE_DAC_P1_MAX	8
-#define IRONLAKE_DAC_P2_SLOW	10
-#define IRONLAKE_DAC_P2_FAST	5
-
-/* LVDS single-channel 120Mhz refclk */
-#define IRONLAKE_LVDS_S_N_MIN	1
-#define IRONLAKE_LVDS_S_N_MAX	3
-#define IRONLAKE_LVDS_S_M_MIN	79
-#define IRONLAKE_LVDS_S_M_MAX	118
-#define IRONLAKE_LVDS_S_P_MIN	28
-#define IRONLAKE_LVDS_S_P_MAX	112
-#define IRONLAKE_LVDS_S_P1_MIN	2
-#define IRONLAKE_LVDS_S_P1_MAX	8
-#define IRONLAKE_LVDS_S_P2_SLOW	14
-#define IRONLAKE_LVDS_S_P2_FAST	14
-
-/* LVDS dual-channel 120Mhz refclk */
-#define IRONLAKE_LVDS_D_N_MIN	1
-#define IRONLAKE_LVDS_D_N_MAX	3
-#define IRONLAKE_LVDS_D_M_MIN	79
-#define IRONLAKE_LVDS_D_M_MAX	127
-#define IRONLAKE_LVDS_D_P_MIN	14
-#define IRONLAKE_LVDS_D_P_MAX	56
-#define IRONLAKE_LVDS_D_P1_MIN	2
-#define IRONLAKE_LVDS_D_P1_MAX	8
-#define IRONLAKE_LVDS_D_P2_SLOW	7
-#define IRONLAKE_LVDS_D_P2_FAST	7
-
-/* LVDS single-channel 100Mhz refclk */
-#define IRONLAKE_LVDS_S_SSC_N_MIN	1
-#define IRONLAKE_LVDS_S_SSC_N_MAX	2
-#define IRONLAKE_LVDS_S_SSC_M_MIN	79
-#define IRONLAKE_LVDS_S_SSC_M_MAX	126
-#define IRONLAKE_LVDS_S_SSC_P_MIN	28
-#define IRONLAKE_LVDS_S_SSC_P_MAX	112
-#define IRONLAKE_LVDS_S_SSC_P1_MIN	2
-#define IRONLAKE_LVDS_S_SSC_P1_MAX	8
-#define IRONLAKE_LVDS_S_SSC_P2_SLOW	14
-#define IRONLAKE_LVDS_S_SSC_P2_FAST	14
-
-/* LVDS dual-channel 100Mhz refclk */
-#define IRONLAKE_LVDS_D_SSC_N_MIN	1
-#define IRONLAKE_LVDS_D_SSC_N_MAX	3
-#define IRONLAKE_LVDS_D_SSC_M_MIN	79
-#define IRONLAKE_LVDS_D_SSC_M_MAX	126
-#define IRONLAKE_LVDS_D_SSC_P_MIN	14
-#define IRONLAKE_LVDS_D_SSC_P_MAX	42
-#define IRONLAKE_LVDS_D_SSC_P1_MIN	2
-#define IRONLAKE_LVDS_D_SSC_P1_MAX	6
-#define IRONLAKE_LVDS_D_SSC_P2_SLOW	7
-#define IRONLAKE_LVDS_D_SSC_P2_FAST	7
-
-/* DisplayPort */
-#define IRONLAKE_DP_N_MIN		1
-#define IRONLAKE_DP_N_MAX		2
-#define IRONLAKE_DP_M_MIN		81
-#define IRONLAKE_DP_M_MAX		90
-#define IRONLAKE_DP_P_MIN		10
-#define IRONLAKE_DP_P_MAX		20
-#define IRONLAKE_DP_P2_FAST		10
-#define IRONLAKE_DP_P2_SLOW		10
-#define IRONLAKE_DP_P2_LIMIT		0
-#define IRONLAKE_DP_P1_MIN		1
-#define IRONLAKE_DP_P1_MAX		2
-
 /* FDI */
 #define IRONLAKE_FDI_FREQ		2700000 /* in kHz for mode->clock */
 
@@ -353,292 +104,253 @@ intel_fdi_link_freq(struct drm_device *dev)
 }
 
 static const intel_limit_t intel_limits_i8xx_dvo = {
-        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
-        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
-        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
-        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
-        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
-        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
-        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
-        .p1  = { .min = I8XX_P1_MIN,		.max = I8XX_P1_MAX },
-	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
-		 .p2_slow = I8XX_P2_SLOW,	.p2_fast = I8XX_P2_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 930000, .max = 1400000 },
+        .n = { .min = 3, .max = 16 },
+        .m = { .min = 96, .max = 140 },
+        .m1 = { .min = 18, .max = 26 },
+        .m2 = { .min = 6, .max = 16 },
+        .p = { .min = 4, .max = 128 },
+        .p1 = { .min = 2, .max = 33 },
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 4, .p2_fast = 2 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i8xx_lvds = {
-        .dot = { .min = I8XX_DOT_MIN,		.max = I8XX_DOT_MAX },
-        .vco = { .min = I8XX_VCO_MIN,		.max = I8XX_VCO_MAX },
-        .n   = { .min = I8XX_N_MIN,		.max = I8XX_N_MAX },
-        .m   = { .min = I8XX_M_MIN,		.max = I8XX_M_MAX },
-        .m1  = { .min = I8XX_M1_MIN,		.max = I8XX_M1_MAX },
-        .m2  = { .min = I8XX_M2_MIN,		.max = I8XX_M2_MAX },
-        .p   = { .min = I8XX_P_MIN,		.max = I8XX_P_MAX },
-        .p1  = { .min = I8XX_P1_LVDS_MIN,	.max = I8XX_P1_LVDS_MAX },
-	.p2  = { .dot_limit = I8XX_P2_SLOW_LIMIT,
-		 .p2_slow = I8XX_P2_LVDS_SLOW,	.p2_fast = I8XX_P2_LVDS_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 930000, .max = 1400000 },
+        .n = { .min = 3, .max = 16 },
+        .m = { .min = 96, .max = 140 },
+        .m1 = { .min = 18, .max = 26 },
+        .m2 = { .min = 6, .max = 16 },
+        .p = { .min = 4, .max = 128 },
+        .p1 = { .min = 1, .max = 6 },
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
-	
+
 static const intel_limit_t intel_limits_i9xx_sdvo = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
-        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
-        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
-        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
-        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
-        .p   = { .min = I9XX_P_SDVO_DAC_MIN,	.max = I9XX_P_SDVO_DAC_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1400000, .max = 2800000 },
+        .n = { .min = 1, .max = 6 },
+        .m = { .min = 70, .max = 120 },
+        .m1 = { .min = 10, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 5, .max = 80 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 200000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_i9xx_lvds = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = I9XX_VCO_MIN,		.max = I9XX_VCO_MAX },
-        .n   = { .min = I9XX_N_MIN,		.max = I9XX_N_MAX },
-        .m   = { .min = I9XX_M_MIN,		.max = I9XX_M_MAX },
-        .m1  = { .min = I9XX_M1_MIN,		.max = I9XX_M1_MAX },
-        .m2  = { .min = I9XX_M2_MIN,		.max = I9XX_M2_MAX },
-        .p   = { .min = I9XX_P_LVDS_MIN,	.max = I9XX_P_LVDS_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	/* The single-channel range is 25-112Mhz, and dual-channel
-	 * is 80-224Mhz.  Prefer single channel as much as possible.
-	 */
-	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_FAST },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1400000, .max = 2800000 },
+        .n = { .min = 1, .max = 6 },
+        .m = { .min = 70, .max = 120 },
+        .m1 = { .min = 10, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 7, .max = 98 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 112000,
+		.p2_slow = 14, .p2_fast = 7 },
 	.find_pll = intel_find_best_PLL,
 };
 
-    /* below parameter and function is for G4X Chipset Family*/
+
 static const intel_limit_t intel_limits_g4x_sdvo = {
-	.dot = { .min = G4X_DOT_SDVO_MIN,	.max = G4X_DOT_SDVO_MAX },
-	.vco = { .min = G4X_VCO_MIN,	        .max = G4X_VCO_MAX},
-	.n   = { .min = G4X_N_SDVO_MIN,	        .max = G4X_N_SDVO_MAX },
-	.m   = { .min = G4X_M_SDVO_MIN,         .max = G4X_M_SDVO_MAX },
-	.m1  = { .min = G4X_M1_SDVO_MIN,	.max = G4X_M1_SDVO_MAX },
-	.m2  = { .min = G4X_M2_SDVO_MIN,	.max = G4X_M2_SDVO_MAX },
-	.p   = { .min = G4X_P_SDVO_MIN,         .max = G4X_P_SDVO_MAX },
-	.p1  = { .min = G4X_P1_SDVO_MIN,	.max = G4X_P1_SDVO_MAX},
-	.p2  = { .dot_limit = G4X_P2_SDVO_LIMIT,
-		 .p2_slow = G4X_P2_SDVO_SLOW,
-		 .p2_fast = G4X_P2_SDVO_FAST
+	.dot = { .min = 25000, .max = 270000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 4 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 10, .max = 30 },
+	.p1 = { .min = 1, .max = 3},
+	.p2 = { .dot_limit = 270000,
+		.p2_slow = 10,
+		.p2_fast = 10
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_hdmi = {
-	.dot = { .min = G4X_DOT_HDMI_DAC_MIN,	.max = G4X_DOT_HDMI_DAC_MAX },
-	.vco = { .min = G4X_VCO_MIN,	        .max = G4X_VCO_MAX},
-	.n   = { .min = G4X_N_HDMI_DAC_MIN,	.max = G4X_N_HDMI_DAC_MAX },
-	.m   = { .min = G4X_M_HDMI_DAC_MIN,	.max = G4X_M_HDMI_DAC_MAX },
-	.m1  = { .min = G4X_M1_HDMI_DAC_MIN,	.max = G4X_M1_HDMI_DAC_MAX },
-	.m2  = { .min = G4X_M2_HDMI_DAC_MIN,	.max = G4X_M2_HDMI_DAC_MAX },
-	.p   = { .min = G4X_P_HDMI_DAC_MIN,	.max = G4X_P_HDMI_DAC_MAX },
-	.p1  = { .min = G4X_P1_HDMI_DAC_MIN,	.max = G4X_P1_HDMI_DAC_MAX},
-	.p2  = { .dot_limit = G4X_P2_HDMI_DAC_LIMIT,
-		 .p2_slow = G4X_P2_HDMI_DAC_SLOW,
-		 .p2_fast = G4X_P2_HDMI_DAC_FAST
-	},
+	.dot = { .min = 22000, .max = 400000 },
+	.vco = { .min = 1750000, .max = 3500000},
+	.n = { .min = 1, .max = 4 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 16, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8},
+	.p2 = { .dot_limit = 165000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_single_channel_lvds = {
-	.dot = { .min = G4X_DOT_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_DOT_SINGLE_CHANNEL_LVDS_MAX },
-	.vco = { .min = G4X_VCO_MIN,
-		 .max = G4X_VCO_MAX },
-	.n   = { .min = G4X_N_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_N_SINGLE_CHANNEL_LVDS_MAX },
-	.m   = { .min = G4X_M_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M_SINGLE_CHANNEL_LVDS_MAX },
-	.m1  = { .min = G4X_M1_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M1_SINGLE_CHANNEL_LVDS_MAX },
-	.m2  = { .min = G4X_M2_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_M2_SINGLE_CHANNEL_LVDS_MAX },
-	.p   = { .min = G4X_P_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_P_SINGLE_CHANNEL_LVDS_MAX },
-	.p1  = { .min = G4X_P1_SINGLE_CHANNEL_LVDS_MIN,
-		 .max = G4X_P1_SINGLE_CHANNEL_LVDS_MAX },
-	.p2  = { .dot_limit = G4X_P2_SINGLE_CHANNEL_LVDS_LIMIT,
-		 .p2_slow = G4X_P2_SINGLE_CHANNEL_LVDS_SLOW,
-		 .p2_fast = G4X_P2_SINGLE_CHANNEL_LVDS_FAST
+	.dot = { .min = 20000, .max = 115000 },
+	.vco = { .min = 1750000, .max = 3500000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 0,
+		.p2_slow = 14, .p2_fast = 14
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_dual_channel_lvds = {
-	.dot = { .min = G4X_DOT_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_DOT_DUAL_CHANNEL_LVDS_MAX },
-	.vco = { .min = G4X_VCO_MIN,
-		 .max = G4X_VCO_MAX },
-	.n   = { .min = G4X_N_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_N_DUAL_CHANNEL_LVDS_MAX },
-	.m   = { .min = G4X_M_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M_DUAL_CHANNEL_LVDS_MAX },
-	.m1  = { .min = G4X_M1_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M1_DUAL_CHANNEL_LVDS_MAX },
-	.m2  = { .min = G4X_M2_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_M2_DUAL_CHANNEL_LVDS_MAX },
-	.p   = { .min = G4X_P_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_P_DUAL_CHANNEL_LVDS_MAX },
-	.p1  = { .min = G4X_P1_DUAL_CHANNEL_LVDS_MIN,
-		 .max = G4X_P1_DUAL_CHANNEL_LVDS_MAX },
-	.p2  = { .dot_limit = G4X_P2_DUAL_CHANNEL_LVDS_LIMIT,
-		 .p2_slow = G4X_P2_DUAL_CHANNEL_LVDS_SLOW,
-		 .p2_fast = G4X_P2_DUAL_CHANNEL_LVDS_FAST
+	.dot = { .min = 80000, .max = 224000 },
+	.vco = { .min = 1750000, .max = 3500000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 104, .max = 138 },
+	.m1 = { .min = 17, .max = 23 },
+	.m2 = { .min = 5, .max = 11 },
+	.p = { .min = 14, .max = 42 },
+	.p1 = { .min = 2, .max = 6 },
+	.p2 = { .dot_limit = 0,
+		.p2_slow = 7, .p2_fast = 7
 	},
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_g4x_display_port = {
-        .dot = { .min = G4X_DOT_DISPLAY_PORT_MIN,
-                 .max = G4X_DOT_DISPLAY_PORT_MAX },
-        .vco = { .min = G4X_VCO_MIN,
-                 .max = G4X_VCO_MAX},
-        .n   = { .min = G4X_N_DISPLAY_PORT_MIN,
-                 .max = G4X_N_DISPLAY_PORT_MAX },
-        .m   = { .min = G4X_M_DISPLAY_PORT_MIN,
-                 .max = G4X_M_DISPLAY_PORT_MAX },
-        .m1  = { .min = G4X_M1_DISPLAY_PORT_MIN,
-                 .max = G4X_M1_DISPLAY_PORT_MAX },
-        .m2  = { .min = G4X_M2_DISPLAY_PORT_MIN,
-                 .max = G4X_M2_DISPLAY_PORT_MAX },
-        .p   = { .min = G4X_P_DISPLAY_PORT_MIN,
-                 .max = G4X_P_DISPLAY_PORT_MAX },
-        .p1  = { .min = G4X_P1_DISPLAY_PORT_MIN,
-                 .max = G4X_P1_DISPLAY_PORT_MAX},
-        .p2  = { .dot_limit = G4X_P2_DISPLAY_PORT_LIMIT,
-                 .p2_slow = G4X_P2_DISPLAY_PORT_SLOW,
-                 .p2_fast = G4X_P2_DISPLAY_PORT_FAST },
+        .dot = { .min = 161670, .max = 227000 },
+        .vco = { .min = 1750000, .max = 3500000},
+        .n = { .min = 1, .max = 2 },
+        .m = { .min = 97, .max = 108 },
+        .m1 = { .min = 0x10, .max = 0x12 },
+        .m2 = { .min = 0x05, .max = 0x06 },
+        .p = { .min = 10, .max = 20 },
+        .p1 = { .min = 1, .max = 2},
+        .p2 = { .dot_limit = 0,
+		.p2_slow = 10, .p2_fast = 10 },
         .find_pll = intel_find_pll_g4x_dp,
 };
 
 static const intel_limit_t intel_limits_pineview_sdvo = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX},
-        .vco = { .min = PINEVIEW_VCO_MIN,		.max = PINEVIEW_VCO_MAX },
-        .n   = { .min = PINEVIEW_N_MIN,		.max = PINEVIEW_N_MAX },
-        .m   = { .min = PINEVIEW_M_MIN,		.max = PINEVIEW_M_MAX },
-        .m1  = { .min = PINEVIEW_M1_MIN,		.max = PINEVIEW_M1_MAX },
-        .m2  = { .min = PINEVIEW_M2_MIN,		.max = PINEVIEW_M2_MAX },
-        .p   = { .min = I9XX_P_SDVO_DAC_MIN,    .max = I9XX_P_SDVO_DAC_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	.p2  = { .dot_limit = I9XX_P2_SDVO_DAC_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_SDVO_DAC_SLOW,	.p2_fast = I9XX_P2_SDVO_DAC_FAST },
+        .dot = { .min = 20000, .max = 400000},
+        .vco = { .min = 1700000, .max = 3500000 },
+	/* Pineview's Ncounter is a ring counter */
+        .n = { .min = 3, .max = 6 },
+        .m = { .min = 2, .max = 256 },
+	/* Pineview only has one combined m divider, which we treat as m2. */
+        .m1 = { .min = 0, .max = 0 },
+        .m2 = { .min = 0, .max = 254 },
+        .p = { .min = 5, .max = 80 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 200000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_pineview_lvds = {
-        .dot = { .min = I9XX_DOT_MIN,		.max = I9XX_DOT_MAX },
-        .vco = { .min = PINEVIEW_VCO_MIN,		.max = PINEVIEW_VCO_MAX },
-        .n   = { .min = PINEVIEW_N_MIN,		.max = PINEVIEW_N_MAX },
-        .m   = { .min = PINEVIEW_M_MIN,		.max = PINEVIEW_M_MAX },
-        .m1  = { .min = PINEVIEW_M1_MIN,		.max = PINEVIEW_M1_MAX },
-        .m2  = { .min = PINEVIEW_M2_MIN,		.max = PINEVIEW_M2_MAX },
-        .p   = { .min = PINEVIEW_P_LVDS_MIN,	.max = PINEVIEW_P_LVDS_MAX },
-        .p1  = { .min = I9XX_P1_MIN,		.max = I9XX_P1_MAX },
-	/* Pineview only supports single-channel mode. */
-	.p2  = { .dot_limit = I9XX_P2_LVDS_SLOW_LIMIT,
-		 .p2_slow = I9XX_P2_LVDS_SLOW,	.p2_fast = I9XX_P2_LVDS_SLOW },
+        .dot = { .min = 20000, .max = 400000 },
+        .vco = { .min = 1700000, .max = 3500000 },
+        .n = { .min = 3, .max = 6 },
+        .m = { .min = 2, .max = 256 },
+        .m1 = { .min = 0, .max = 0 },
+        .m2 = { .min = 0, .max = 254 },
+        .p = { .min = 7, .max = 112 },
+        .p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 112000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_find_best_PLL,
 };
 
+/* Ironlake / Sandybridge
+ *
+ * We calculate clock using (register_value + 2) for N/M1/M2, so here
+ * the range value for them is (actual_value - 2).
+ */
 static const intel_limit_t intel_limits_ironlake_dac = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_DAC_N_MIN,        .max = IRONLAKE_DAC_N_MAX },
-	.m   = { .min = IRONLAKE_DAC_M_MIN,        .max = IRONLAKE_DAC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_DAC_P_MIN,	   .max = IRONLAKE_DAC_P_MAX },
-	.p1  = { .min = IRONLAKE_DAC_P1_MIN,       .max = IRONLAKE_DAC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_DAC_P2_SLOW,
-		 .p2_fast = IRONLAKE_DAC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 5 },
+	.m = { .min = 79, .max = 127 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 5, .max = 80 },
+	.p1 = { .min = 1, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 10, .p2_fast = 5 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_single_lvds = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_S_N_MIN,     .max = IRONLAKE_LVDS_S_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_S_M_MIN,     .max = IRONLAKE_LVDS_S_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_S_P_MIN,     .max = IRONLAKE_LVDS_S_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_S_P1_MIN,    .max = IRONLAKE_LVDS_S_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_S_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_S_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 118 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_dual_lvds = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_D_N_MIN,     .max = IRONLAKE_LVDS_D_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_D_M_MIN,     .max = IRONLAKE_LVDS_D_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_D_P_MIN,     .max = IRONLAKE_LVDS_D_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_D_P1_MIN,    .max = IRONLAKE_LVDS_D_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_D_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_D_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 127 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 14, .max = 56 },
+	.p1 = { .min = 2, .max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
+/* LVDS 100mhz refclk limits. */
 static const intel_limit_t intel_limits_ironlake_single_lvds_100m = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_S_SSC_N_MIN, .max = IRONLAKE_LVDS_S_SSC_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_S_SSC_M_MIN, .max = IRONLAKE_LVDS_S_SSC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_S_SSC_P_MIN, .max = IRONLAKE_LVDS_S_SSC_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_S_SSC_P1_MIN,.max = IRONLAKE_LVDS_S_SSC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_S_SSC_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_S_SSC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 2 },
+	.m = { .min = 79, .max = 126 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 28, .max = 112 },
+	.p1 = { .min = 2,.max = 8 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 14, .p2_fast = 14 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_dual_lvds_100m = {
-	.dot = { .min = IRONLAKE_DOT_MIN,          .max = IRONLAKE_DOT_MAX },
-	.vco = { .min = IRONLAKE_VCO_MIN,          .max = IRONLAKE_VCO_MAX },
-	.n   = { .min = IRONLAKE_LVDS_D_SSC_N_MIN, .max = IRONLAKE_LVDS_D_SSC_N_MAX },
-	.m   = { .min = IRONLAKE_LVDS_D_SSC_M_MIN, .max = IRONLAKE_LVDS_D_SSC_M_MAX },
-	.m1  = { .min = IRONLAKE_M1_MIN,           .max = IRONLAKE_M1_MAX },
-	.m2  = { .min = IRONLAKE_M2_MIN,           .max = IRONLAKE_M2_MAX },
-	.p   = { .min = IRONLAKE_LVDS_D_SSC_P_MIN, .max = IRONLAKE_LVDS_D_SSC_P_MAX },
-	.p1  = { .min = IRONLAKE_LVDS_D_SSC_P1_MIN,.max = IRONLAKE_LVDS_D_SSC_P1_MAX },
-	.p2  = { .dot_limit = IRONLAKE_P2_DOT_LIMIT,
-		 .p2_slow = IRONLAKE_LVDS_D_SSC_P2_SLOW,
-		 .p2_fast = IRONLAKE_LVDS_D_SSC_P2_FAST },
+	.dot = { .min = 25000, .max = 350000 },
+	.vco = { .min = 1760000, .max = 3510000 },
+	.n = { .min = 1, .max = 3 },
+	.m = { .min = 79, .max = 126 },
+	.m1 = { .min = 12, .max = 22 },
+	.m2 = { .min = 5, .max = 9 },
+	.p = { .min = 14, .max = 42 },
+	.p1 = { .min = 2,.max = 6 },
+	.p2 = { .dot_limit = 225000,
+		.p2_slow = 7, .p2_fast = 7 },
 	.find_pll = intel_g4x_find_best_PLL,
 };
 
 static const intel_limit_t intel_limits_ironlake_display_port = {
-        .dot = { .min = IRONLAKE_DOT_MIN,
-                 .max = IRONLAKE_DOT_MAX },
-        .vco = { .min = IRONLAKE_VCO_MIN,
-                 .max = IRONLAKE_VCO_MAX},
-        .n   = { .min = IRONLAKE_DP_N_MIN,
-                 .max = IRONLAKE_DP_N_MAX },
-        .m   = { .min = IRONLAKE_DP_M_MIN,
-                 .max = IRONLAKE_DP_M_MAX },
-        .m1  = { .min = IRONLAKE_M1_MIN,
-                 .max = IRONLAKE_M1_MAX },
-        .m2  = { .min = IRONLAKE_M2_MIN,
-                 .max = IRONLAKE_M2_MAX },
-        .p   = { .min = IRONLAKE_DP_P_MIN,
-                 .max = IRONLAKE_DP_P_MAX },
-        .p1  = { .min = IRONLAKE_DP_P1_MIN,
-                 .max = IRONLAKE_DP_P1_MAX},
-        .p2  = { .dot_limit = IRONLAKE_DP_P2_LIMIT,
-                 .p2_slow = IRONLAKE_DP_P2_SLOW,
-                 .p2_fast = IRONLAKE_DP_P2_FAST },
+        .dot = { .min = 25000, .max = 350000 },
+        .vco = { .min = 1760000, .max = 3510000},
+        .n = { .min = 1, .max = 2 },
+        .m = { .min = 81, .max = 90 },
+        .m1 = { .min = 12, .max = 22 },
+        .m2 = { .min = 5, .max = 9 },
+        .p = { .min = 10, .max = 20 },
+        .p1 = { .min = 1, .max = 2},
+        .p2 = { .dot_limit = 0,
+		.p2_slow = 10, .p2_fast = 10 },
         .find_pll = intel_find_pll_ironlake_dp,
 };
 
@@ -1828,7 +1540,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 	u32 blt_ecoskpd;
 
 	/* Make sure blitter notifies FBC of writes */
-	__gen6_gt_force_wake_get(dev_priv);
+	gen6_gt_force_wake_get(dev_priv);
 	blt_ecoskpd = I915_READ(GEN6_BLITTER_ECOSKPD);
 	blt_ecoskpd |= GEN6_BLITTER_FBC_NOTIFY <<
 		GEN6_BLITTER_LOCK_SHIFT;
@@ -1839,7 +1551,7 @@ static void sandybridge_blit_fbc_update(struct drm_device *dev)
 			 GEN6_BLITTER_LOCK_SHIFT);
 	I915_WRITE(GEN6_BLITTER_ECOSKPD, blt_ecoskpd);
 	POSTING_READ(GEN6_BLITTER_ECOSKPD);
-	__gen6_gt_force_wake_put(dev_priv);
+	gen6_gt_force_wake_put(dev_priv);
 }
 
 static void ironlake_enable_fbc(struct drm_crtc *crtc, unsigned long interval)
@@ -2019,6 +1731,11 @@ static void intel_update_fbc(struct drm_device *dev)
 	intel_fb = to_intel_framebuffer(fb);
 	obj = intel_fb->obj;
 
+	if (!i915_enable_fbc) {
+		DRM_DEBUG_KMS("fbc disabled per module param (default off)\n");
+		dev_priv->no_fbc_reason = FBC_MODULE_PARAM;
+		goto out_disable;
+	}
 	if (intel_fb->obj->base.size > dev_priv->cfb_size) {
 		DRM_DEBUG_KMS("framebuffer too large, disabling "
 			      "compression\n");
@@ -2339,8 +2056,13 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc)
 	/* enable normal train */
 	reg = FDI_TX_CTL(pipe);
 	temp = I915_READ(reg);
-	temp &= ~FDI_LINK_TRAIN_NONE;
-	temp |= FDI_LINK_TRAIN_NONE | FDI_TX_ENHANCE_FRAME_ENABLE;
+	if (IS_IVYBRIDGE(dev)) {
+		temp &= ~FDI_LINK_TRAIN_NONE_IVB;
+		temp |= FDI_LINK_TRAIN_NONE_IVB | FDI_TX_ENHANCE_FRAME_ENABLE;
+	} else {
+		temp &= ~FDI_LINK_TRAIN_NONE;
+		temp |= FDI_LINK_TRAIN_NONE | FDI_TX_ENHANCE_FRAME_ENABLE;
+	}
 	I915_WRITE(reg, temp);
 
 	reg = FDI_RX_CTL(pipe);
@@ -2357,6 +2079,11 @@ static void intel_fdi_normal_train(struct drm_crtc *crtc)
 	/* wait one idle pattern time */
 	POSTING_READ(reg);
 	udelay(1000);
+
+	/* IVB wants error correction enabled */
+	if (IS_IVYBRIDGE(dev))
+		I915_WRITE(reg, I915_READ(reg) | FDI_FS_ERRC_ENABLE |
+			   FDI_FE_ERRC_ENABLE);
 }
 
 /* The FDI link training functions for ILK/Ibexpeak. */
@@ -2584,7 +2311,116 @@ static void gen6_fdi_link_train(struct drm_crtc *crtc)
 	DRM_DEBUG_KMS("FDI train done.\n");
 }
 
-static void ironlake_fdi_enable(struct drm_crtc *crtc)
+/* Manual link training for Ivy Bridge A0 parts */
+static void ivb_manual_fdi_link_train(struct drm_crtc *crtc)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	u32 reg, temp, i;
+
+	/* Train 1: umask FDI RX Interrupt symbol_lock and bit_lock bit
+	   for train result */
+	reg = FDI_RX_IMR(pipe);
+	temp = I915_READ(reg);
+	temp &= ~FDI_RX_SYMBOL_LOCK;
+	temp &= ~FDI_RX_BIT_LOCK;
+	I915_WRITE(reg, temp);
+
+	POSTING_READ(reg);
+	udelay(150);
+
+	/* enable CPU FDI TX and PCH FDI RX */
+	reg = FDI_TX_CTL(pipe);
+	temp = I915_READ(reg);
+	temp &= ~(7 << 19);
+	temp |= (intel_crtc->fdi_lanes - 1) << 19;
+	temp &= ~(FDI_LINK_TRAIN_AUTO | FDI_LINK_TRAIN_NONE_IVB);
+	temp |= FDI_LINK_TRAIN_PATTERN_1_IVB;
+	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
+	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
+	I915_WRITE(reg, temp | FDI_TX_ENABLE);
+
+	reg = FDI_RX_CTL(pipe);
+	temp = I915_READ(reg);
+	temp &= ~FDI_LINK_TRAIN_AUTO;
+	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+	temp |= FDI_LINK_TRAIN_PATTERN_1_CPT;
+	I915_WRITE(reg, temp | FDI_RX_ENABLE);
+
+	POSTING_READ(reg);
+	udelay(150);
+
+	for (i = 0; i < 4; i++ ) {
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
+		temp |= snb_b_fdi_train_param[i];
+		I915_WRITE(reg, temp);
+
+		POSTING_READ(reg);
+		udelay(500);
+
+		reg = FDI_RX_IIR(pipe);
+		temp = I915_READ(reg);
+		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+
+		if (temp & FDI_RX_BIT_LOCK ||
+		    (I915_READ(reg) & FDI_RX_BIT_LOCK)) {
+			I915_WRITE(reg, temp | FDI_RX_BIT_LOCK);
+			DRM_DEBUG_KMS("FDI train 1 done.\n");
+			break;
+		}
+	}
+	if (i == 4)
+		DRM_ERROR("FDI train 1 fail!\n");
+
+	/* Train 2 */
+	reg = FDI_TX_CTL(pipe);
+	temp = I915_READ(reg);
+	temp &= ~FDI_LINK_TRAIN_NONE_IVB;
+	temp |= FDI_LINK_TRAIN_PATTERN_2_IVB;
+	temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
+	temp |= FDI_LINK_TRAIN_400MV_0DB_SNB_B;
+	I915_WRITE(reg, temp);
+
+	reg = FDI_RX_CTL(pipe);
+	temp = I915_READ(reg);
+	temp &= ~FDI_LINK_TRAIN_PATTERN_MASK_CPT;
+	temp |= FDI_LINK_TRAIN_PATTERN_2_CPT;
+	I915_WRITE(reg, temp);
+
+	POSTING_READ(reg);
+	udelay(150);
+
+	for (i = 0; i < 4; i++ ) {
+		reg = FDI_TX_CTL(pipe);
+		temp = I915_READ(reg);
+		temp &= ~FDI_LINK_TRAIN_VOL_EMP_MASK;
+		temp |= snb_b_fdi_train_param[i];
+		I915_WRITE(reg, temp);
+
+		POSTING_READ(reg);
+		udelay(500);
+
+		reg = FDI_RX_IIR(pipe);
+		temp = I915_READ(reg);
+		DRM_DEBUG_KMS("FDI_RX_IIR 0x%x\n", temp);
+
+		if (temp & FDI_RX_SYMBOL_LOCK) {
+			I915_WRITE(reg, temp | FDI_RX_SYMBOL_LOCK);
+			DRM_DEBUG_KMS("FDI train 2 done.\n");
+			break;
+		}
+	}
+	if (i == 4)
+		DRM_ERROR("FDI train 2 fail!\n");
+
+	DRM_DEBUG_KMS("FDI train done.\n");
+}
+
+static void ironlake_fdi_pll_enable(struct drm_crtc *crtc)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
@@ -2757,10 +2593,7 @@ static void ironlake_pch_enable(struct drm_crtc *crtc)
 	u32 reg, temp;
 
 	/* For PCH output, training FDI link */
-	if (IS_GEN6(dev))
-		gen6_fdi_link_train(crtc);
-	else
-		ironlake_fdi_link_train(crtc);
+	dev_priv->display.fdi_link_train(crtc);
 
 	intel_enable_pch_pll(dev_priv, pipe);
 
@@ -2850,7 +2683,7 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 	is_pch_port = intel_crtc_driving_pch(crtc);
 
 	if (is_pch_port)
-		ironlake_fdi_enable(crtc);
+		ironlake_fdi_pll_enable(crtc);
 	else
 		ironlake_fdi_disable(crtc);
 
@@ -2873,7 +2706,11 @@ static void ironlake_crtc_enable(struct drm_crtc *crtc)
 		ironlake_pch_enable(crtc);
 
 	intel_crtc_load_lut(crtc);
+
+	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
+	mutex_unlock(&dev->struct_mutex);
+
 	intel_crtc_update_cursor(crtc, true);
 }
 
@@ -2969,8 +2806,11 @@ static void ironlake_crtc_disable(struct drm_crtc *crtc)
 
 	intel_crtc->active = false;
 	intel_update_watermarks(dev);
+
+	mutex_lock(&dev->struct_mutex);
 	intel_update_fbc(dev);
 	intel_clear_scanline_wait(dev);
+	mutex_unlock(&dev->struct_mutex);
 }
 
 static void ironlake_crtc_dpms(struct drm_crtc *crtc, int mode)
@@ -3497,11 +3337,11 @@ static unsigned long intel_calculate_wm(unsigned long clock_in_khz,
 		1000;
 	entries_required = DIV_ROUND_UP(entries_required, wm->cacheline_size);
 
-	DRM_DEBUG_KMS("FIFO entries required for mode: %d\n", entries_required);
+	DRM_DEBUG_KMS("FIFO entries required for mode: %ld\n", entries_required);
 
 	wm_size = fifo_size - (entries_required + wm->guard_size);
 
-	DRM_DEBUG_KMS("FIFO watermark level: %d\n", wm_size);
+	DRM_DEBUG_KMS("FIFO watermark level: %ld\n", wm_size);
 
 	/* Don't promote wm_size to unsigned... */
 	if (wm_size > (long)wm->max_wm)
@@ -3823,13 +3663,13 @@ static bool g4x_check_srwm(struct drm_device *dev,
 		      display_wm, cursor_wm);
 
 	if (display_wm > display->max_wm) {
-		DRM_DEBUG_KMS("display watermark is too large(%d), disabling\n",
+		DRM_DEBUG_KMS("display watermark is too large(%d/%ld), disabling\n",
 			      display_wm, display->max_wm);
 		return false;
 	}
 
 	if (cursor_wm > cursor->max_wm) {
-		DRM_DEBUG_KMS("cursor watermark is too large(%d), disabling\n",
+		DRM_DEBUG_KMS("cursor watermark is too large(%d/%ld), disabling\n",
 			      cursor_wm, cursor->max_wm);
 		return false;
 	}
@@ -4516,34 +4356,28 @@ static inline bool intel_panel_use_ssc(struct drm_i915_private *dev_priv)
 	return dev_priv->lvds_use_ssc && i915_panel_use_ssc;
 }
 
-static int intel_crtc_mode_set(struct drm_crtc *crtc,
-			       struct drm_display_mode *mode,
-			       struct drm_display_mode *adjusted_mode,
-			       int x, int y,
-			       struct drm_framebuffer *old_fb)
+static int i9xx_crtc_mode_set(struct drm_crtc *crtc,
+			      struct drm_display_mode *mode,
+			      struct drm_display_mode *adjusted_mode,
+			      int x, int y,
+			      struct drm_framebuffer *old_fb)
 {
 	struct drm_device *dev = crtc->dev;
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
 	int pipe = intel_crtc->pipe;
 	int plane = intel_crtc->plane;
-	u32 fp_reg, dpll_reg;
 	int refclk, num_connectors = 0;
 	intel_clock_t clock, reduced_clock;
 	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
 	bool ok, has_reduced_clock = false, is_sdvo = false, is_dvo = false;
 	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
-	struct intel_encoder *has_edp_encoder = NULL;
 	struct drm_mode_config *mode_config = &dev->mode_config;
 	struct intel_encoder *encoder;
 	const intel_limit_t *limit;
 	int ret;
-	struct fdi_m_n m_n = {0};
-	u32 reg, temp;
+	u32 temp;
 	u32 lvds_sync = 0;
-	int target_clock;
-
-	drm_vblank_pre_modeset(dev, pipe);
 
 	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
 		if (encoder->base.crtc != crtc)
@@ -4571,9 +4405,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		case INTEL_OUTPUT_DISPLAYPORT:
 			is_dp = true;
 			break;
-		case INTEL_OUTPUT_EDP:
-			has_edp_encoder = encoder;
-			break;
 		}
 
 		num_connectors++;
@@ -4585,9 +4416,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			      refclk / 1000);
 	} else if (!IS_GEN2(dev)) {
 		refclk = 96000;
-		if (HAS_PCH_SPLIT(dev) &&
-		    (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)))
-			refclk = 120000; /* 120Mhz refclk */
 	} else {
 		refclk = 48000;
 	}
@@ -4601,7 +4429,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
 	if (!ok) {
 		DRM_ERROR("Couldn't find PLL settings for mode!\n");
-		drm_vblank_post_modeset(dev, pipe);
 		return -EINVAL;
 	}
 
@@ -4645,143 +4472,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		}
 	}
 
-	/* FDI link */
-	if (HAS_PCH_SPLIT(dev)) {
-		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
-		int lane = 0, link_bw, bpp;
-		/* CPU eDP doesn't require FDI link, so just set DP M/N
-		   according to current link config */
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			target_clock = mode->clock;
-			intel_edp_link_config(has_edp_encoder,
-					      &lane, &link_bw);
-		} else {
-			/* [e]DP over FDI requires target mode clock
-			   instead of link clock */
-			if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
-				target_clock = mode->clock;
-			else
-				target_clock = adjusted_mode->clock;
-
-			/* FDI is a binary signal running at ~2.7GHz, encoding
-			 * each output octet as 10 bits. The actual frequency
-			 * is stored as a divider into a 100MHz clock, and the
-			 * mode pixel clock is stored in units of 1KHz.
-			 * Hence the bw of each lane in terms of the mode signal
-			 * is:
-			 */
-			link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
-		}
-
-		/* determine panel color depth */
-		temp = I915_READ(PIPECONF(pipe));
-		temp &= ~PIPE_BPC_MASK;
-		if (is_lvds) {
-			/* the BPC will be 6 if it is 18-bit LVDS panel */
-			if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
-				temp |= PIPE_8BPC;
-			else
-				temp |= PIPE_6BPC;
-		} else if (has_edp_encoder) {
-			switch (dev_priv->edp.bpp/3) {
-			case 8:
-				temp |= PIPE_8BPC;
-				break;
-			case 10:
-				temp |= PIPE_10BPC;
-				break;
-			case 6:
-				temp |= PIPE_6BPC;
-				break;
-			case 12:
-				temp |= PIPE_12BPC;
-				break;
-			}
-		} else
-			temp |= PIPE_8BPC;
-		I915_WRITE(PIPECONF(pipe), temp);
-
-		switch (temp & PIPE_BPC_MASK) {
-		case PIPE_8BPC:
-			bpp = 24;
-			break;
-		case PIPE_10BPC:
-			bpp = 30;
-			break;
-		case PIPE_6BPC:
-			bpp = 18;
-			break;
-		case PIPE_12BPC:
-			bpp = 36;
-			break;
-		default:
-			DRM_ERROR("unknown pipe bpc value\n");
-			bpp = 24;
-		}
-
-		if (!lane) {
-			/* 
-			 * Account for spread spectrum to avoid
-			 * oversubscribing the link. Max center spread
-			 * is 2.5%; use 5% for safety's sake.
-			 */
-			u32 bps = target_clock * bpp * 21 / 20;
-			lane = bps / (link_bw * 8) + 1;
-		}
-
-		intel_crtc->fdi_lanes = lane;
-
-		if (pixel_multiplier > 1)
-			link_bw *= pixel_multiplier;
-		ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
-	}
-
-	/* Ironlake: try to setup display ref clock before DPLL
-	 * enabling. This is only under driver's control after
-	 * PCH B stepping, previous chipset stepping should be
-	 * ignoring this setting.
-	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		temp = I915_READ(PCH_DREF_CONTROL);
-		/* Always enable nonspread source */
-		temp &= ~DREF_NONSPREAD_SOURCE_MASK;
-		temp |= DREF_NONSPREAD_SOURCE_ENABLE;
-		temp &= ~DREF_SSC_SOURCE_MASK;
-		temp |= DREF_SSC_SOURCE_ENABLE;
-		I915_WRITE(PCH_DREF_CONTROL, temp);
-
-		POSTING_READ(PCH_DREF_CONTROL);
-		udelay(200);
-
-		if (has_edp_encoder) {
-			if (intel_panel_use_ssc(dev_priv)) {
-				temp |= DREF_SSC1_ENABLE;
-				I915_WRITE(PCH_DREF_CONTROL, temp);
-
-				POSTING_READ(PCH_DREF_CONTROL);
-				udelay(200);
-			}
-			temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
-
-			/* Enable CPU source on CPU attached eDP */
-			if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-				if (intel_panel_use_ssc(dev_priv))
-					temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
-				else
-					temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
-			} else {
-				/* Enable SSC on PCH eDP if needed */
-				if (intel_panel_use_ssc(dev_priv)) {
-					DRM_ERROR("enabling SSC on PCH\n");
-					temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
-				}
-			}
-			I915_WRITE(PCH_DREF_CONTROL, temp);
-			POSTING_READ(PCH_DREF_CONTROL);
-			udelay(200);
-		}
-	}
-
 	if (IS_PINEVIEW(dev)) {
 		fp = (1 << clock.n) << 16 | clock.m1 << 8 | clock.m2;
 		if (has_reduced_clock)
@@ -4794,25 +4484,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 				reduced_clock.m2;
 	}
 
-	/* Enable autotuning of the PLL clock (if permissible) */
-	if (HAS_PCH_SPLIT(dev)) {
-		int factor = 21;
-
-		if (is_lvds) {
-			if ((intel_panel_use_ssc(dev_priv) &&
-			     dev_priv->lvds_ssc_freq == 100) ||
-			    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
-				factor = 25;
-		} else if (is_sdvo && is_tv)
-			factor = 20;
-
-		if (clock.m1 < factor * clock.n)
-			fp |= FP_CB_TUNE;
-	}
-
-	dpll = 0;
-	if (!HAS_PCH_SPLIT(dev))
-		dpll = DPLL_VGA_MODE_DIS;
+	dpll = DPLL_VGA_MODE_DIS;
 
 	if (!IS_GEN2(dev)) {
 		if (is_lvds)
@@ -4824,12 +4496,10 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			if (pixel_multiplier > 1) {
 				if (IS_I945G(dev) || IS_I945GM(dev) || IS_G33(dev))
 					dpll |= (pixel_multiplier - 1) << SDVO_MULTIPLIER_SHIFT_HIRES;
-				else if (HAS_PCH_SPLIT(dev))
-					dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
 			}
 			dpll |= DPLL_DVO_HIGH_SPEED;
 		}
-		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+		if (is_dp)
 			dpll |= DPLL_DVO_HIGH_SPEED;
 
 		/* compute bitmask from p1 value */
@@ -4837,9 +4507,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT_PINEVIEW;
 		else {
 			dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
-			/* also FPA1 */
-			if (HAS_PCH_SPLIT(dev))
-				dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 			if (IS_G4X(dev) && has_reduced_clock)
 				dpll |= (1 << (reduced_clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
 		}
@@ -4857,7 +4524,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
 			break;
 		}
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev))
+		if (INTEL_INFO(dev)->gen >= 4)
 			dpll |= (6 << PLL_LOAD_PULSE_PHASE_SHIFT);
 	} else {
 		if (is_lvds) {
@@ -4891,12 +4558,10 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 	/* Ironlake's plane is forced to pipe, bit 24 is to
 	   enable color space conversion */
-	if (!HAS_PCH_SPLIT(dev)) {
-		if (pipe == 0)
-			dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
-		else
-			dspcntr |= DISPPLANE_SEL_PIPE_B;
-	}
+	if (pipe == 0)
+		dspcntr &= ~DISPPLANE_SEL_PIPE_MASK;
+	else
+		dspcntr |= DISPPLANE_SEL_PIPE_B;
 
 	if (pipe == 0 && INTEL_INFO(dev)->gen < 4) {
 		/* Enable pixel doubling when the dot clock is > 90% of the (display)
@@ -4912,27 +4577,506 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			pipeconf &= ~PIPECONF_DOUBLE_WIDE;
 	}
 
-	if (!HAS_PCH_SPLIT(dev))
-		dpll |= DPLL_VCO_ENABLE;
+	dpll |= DPLL_VCO_ENABLE;
 
 	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
 	drm_mode_debug_printmodeline(mode);
 
-	/* assign to Ironlake registers */
-	if (HAS_PCH_SPLIT(dev)) {
-		fp_reg = PCH_FP0(pipe);
-		dpll_reg = PCH_DPLL(pipe);
+	I915_WRITE(FP0(pipe), fp);
+	I915_WRITE(DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
+
+	POSTING_READ(DPLL(pipe));
+	udelay(150);
+
+	/* The LVDS pin pair needs to be on before the DPLLs are enabled.
+	 * This is an exception to the general rule that mode_set doesn't turn
+	 * things on.
+	 */
+	if (is_lvds) {
+		temp = I915_READ(LVDS);
+		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
+		if (pipe == 1) {
+			temp |= LVDS_PIPEB_SELECT;
+		} else {
+			temp &= ~LVDS_PIPEB_SELECT;
+		}
+		/* set the corresponsding LVDS_BORDER bit */
+		temp |= dev_priv->lvds_border_bits;
+		/* Set the B0-B3 data pairs corresponding to whether we're going to
+		 * set the DPLLs for dual-channel mode or not.
+		 */
+		if (clock.p2 == 7)
+			temp |= LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP;
+		else
+			temp &= ~(LVDS_B0B3_POWER_UP | LVDS_CLKB_POWER_UP);
+
+		/* It would be nice to set 24 vs 18-bit mode (LVDS_A3_POWER_UP)
+		 * appropriately here, but we need to look more thoroughly into how
+		 * panels behave in the two modes.
+		 */
+		/* set the dithering flag on LVDS as needed */
+		if (INTEL_INFO(dev)->gen >= 4) {
+			if (dev_priv->lvds_dither)
+				temp |= LVDS_ENABLE_DITHER;
+			else
+				temp &= ~LVDS_ENABLE_DITHER;
+		}
+		if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
+			lvds_sync |= LVDS_HSYNC_POLARITY;
+		if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
+			lvds_sync |= LVDS_VSYNC_POLARITY;
+		if ((temp & (LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY))
+		    != lvds_sync) {
+			char flags[2] = "-+";
+			DRM_INFO("Changing LVDS panel from "
+				 "(%chsync, %cvsync) to (%chsync, %cvsync)\n",
+				 flags[!(temp & LVDS_HSYNC_POLARITY)],
+				 flags[!(temp & LVDS_VSYNC_POLARITY)],
+				 flags[!(lvds_sync & LVDS_HSYNC_POLARITY)],
+				 flags[!(lvds_sync & LVDS_VSYNC_POLARITY)]);
+			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
+			temp |= lvds_sync;
+		}
+		I915_WRITE(LVDS, temp);
+	}
+
+	if (is_dp) {
+		intel_dp_set_m_n(crtc, mode, adjusted_mode);
+	}
+
+	I915_WRITE(DPLL(pipe), dpll);
+
+	/* Wait for the clocks to stabilize. */
+	POSTING_READ(DPLL(pipe));
+	udelay(150);
+
+	if (INTEL_INFO(dev)->gen >= 4) {
+		temp = 0;
+		if (is_sdvo) {
+			temp = intel_mode_get_pixel_multiplier(adjusted_mode);
+			if (temp > 1)
+				temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
+			else
+				temp = 0;
+		}
+		I915_WRITE(DPLL_MD(pipe), temp);
+	} else {
+		/* The pixel multiplier can only be updated once the
+		 * DPLL is enabled and the clocks are stable.
+		 *
+		 * So write it again.
+		 */
+		I915_WRITE(DPLL(pipe), dpll);
+	}
+
+	intel_crtc->lowfreq_avail = false;
+	if (is_lvds && has_reduced_clock && i915_powersave) {
+		I915_WRITE(FP1(pipe), fp2);
+		intel_crtc->lowfreq_avail = true;
+		if (HAS_PIPE_CXSR(dev)) {
+			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
+			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
+		}
+	} else {
+		I915_WRITE(FP1(pipe), fp);
+		if (HAS_PIPE_CXSR(dev)) {
+			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
+			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
+		}
+	}
+
+	if (adjusted_mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		pipeconf |= PIPECONF_INTERLACE_W_FIELD_INDICATION;
+		/* the chip adds 2 halflines automatically */
+		adjusted_mode->crtc_vdisplay -= 1;
+		adjusted_mode->crtc_vtotal -= 1;
+		adjusted_mode->crtc_vblank_start -= 1;
+		adjusted_mode->crtc_vblank_end -= 1;
+		adjusted_mode->crtc_vsync_end -= 1;
+		adjusted_mode->crtc_vsync_start -= 1;
+	} else
+		pipeconf &= ~PIPECONF_INTERLACE_W_FIELD_INDICATION; /* progressive */
+
+	I915_WRITE(HTOTAL(pipe),
+		   (adjusted_mode->crtc_hdisplay - 1) |
+		   ((adjusted_mode->crtc_htotal - 1) << 16));
+	I915_WRITE(HBLANK(pipe),
+		   (adjusted_mode->crtc_hblank_start - 1) |
+		   ((adjusted_mode->crtc_hblank_end - 1) << 16));
+	I915_WRITE(HSYNC(pipe),
+		   (adjusted_mode->crtc_hsync_start - 1) |
+		   ((adjusted_mode->crtc_hsync_end - 1) << 16));
+
+	I915_WRITE(VTOTAL(pipe),
+		   (adjusted_mode->crtc_vdisplay - 1) |
+		   ((adjusted_mode->crtc_vtotal - 1) << 16));
+	I915_WRITE(VBLANK(pipe),
+		   (adjusted_mode->crtc_vblank_start - 1) |
+		   ((adjusted_mode->crtc_vblank_end - 1) << 16));
+	I915_WRITE(VSYNC(pipe),
+		   (adjusted_mode->crtc_vsync_start - 1) |
+		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
+
+	/* pipesrc and dspsize control the size that is scaled from,
+	 * which should always be the user's requested size.
+	 */
+	I915_WRITE(DSPSIZE(plane),
+		   ((mode->vdisplay - 1) << 16) |
+		   (mode->hdisplay - 1));
+	I915_WRITE(DSPPOS(plane), 0);
+	I915_WRITE(PIPESRC(pipe),
+		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
+
+	I915_WRITE(PIPECONF(pipe), pipeconf);
+	POSTING_READ(PIPECONF(pipe));
+	intel_enable_pipe(dev_priv, pipe, false);
+
+	intel_wait_for_vblank(dev, pipe);
+
+	I915_WRITE(DSPCNTR(plane), dspcntr);
+	POSTING_READ(DSPCNTR(plane));
+
+	ret = intel_pipe_set_base(crtc, x, y, old_fb);
+
+	intel_update_watermarks(dev);
+
+	return ret;
+}
+
+static int ironlake_crtc_mode_set(struct drm_crtc *crtc,
+				  struct drm_display_mode *mode,
+				  struct drm_display_mode *adjusted_mode,
+				  int x, int y,
+				  struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int plane = intel_crtc->plane;
+	int refclk, num_connectors = 0;
+	intel_clock_t clock, reduced_clock;
+	u32 dpll, fp = 0, fp2 = 0, dspcntr, pipeconf;
+	bool ok, has_reduced_clock = false, is_sdvo = false;
+	bool is_crt = false, is_lvds = false, is_tv = false, is_dp = false;
+	struct intel_encoder *has_edp_encoder = NULL;
+	struct drm_mode_config *mode_config = &dev->mode_config;
+	struct intel_encoder *encoder;
+	const intel_limit_t *limit;
+	int ret;
+	struct fdi_m_n m_n = {0};
+	u32 temp;
+	u32 lvds_sync = 0;
+	int target_clock, pixel_multiplier, lane, link_bw, bpp, factor;
+
+	list_for_each_entry(encoder, &mode_config->encoder_list, base.head) {
+		if (encoder->base.crtc != crtc)
+			continue;
+
+		switch (encoder->type) {
+		case INTEL_OUTPUT_LVDS:
+			is_lvds = true;
+			break;
+		case INTEL_OUTPUT_SDVO:
+		case INTEL_OUTPUT_HDMI:
+			is_sdvo = true;
+			if (encoder->needs_tv_clock)
+				is_tv = true;
+			break;
+		case INTEL_OUTPUT_TVOUT:
+			is_tv = true;
+			break;
+		case INTEL_OUTPUT_ANALOG:
+			is_crt = true;
+			break;
+		case INTEL_OUTPUT_DISPLAYPORT:
+			is_dp = true;
+			break;
+		case INTEL_OUTPUT_EDP:
+			has_edp_encoder = encoder;
+			break;
+		}
+
+		num_connectors++;
+	}
+
+	if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2) {
+		refclk = dev_priv->lvds_ssc_freq * 1000;
+		DRM_DEBUG_KMS("using SSC reference clock of %d MHz\n",
+			      refclk / 1000);
+	} else {
+		refclk = 96000;
+		if (!has_edp_encoder ||
+		    intel_encoder_is_pch_edp(&has_edp_encoder->base))
+			refclk = 120000; /* 120Mhz refclk */
+	}
+
+	/*
+	 * Returns a set of divisors for the desired target clock with the given
+	 * refclk, or FALSE.  The returned values represent the clock equation:
+	 * reflck * (5 * (m1 + 2) + (m2 + 2)) / (n + 2) / p1 / p2.
+	 */
+	limit = intel_limit(crtc, refclk);
+	ok = limit->find_pll(limit, crtc, adjusted_mode->clock, refclk, &clock);
+	if (!ok) {
+		DRM_ERROR("Couldn't find PLL settings for mode!\n");
+		return -EINVAL;
+	}
+
+	/* Ensure that the cursor is valid for the new mode before changing... */
+	intel_crtc_update_cursor(crtc, true);
+
+	if (is_lvds && dev_priv->lvds_downclock_avail) {
+		has_reduced_clock = limit->find_pll(limit, crtc,
+						    dev_priv->lvds_downclock,
+						    refclk,
+						    &reduced_clock);
+		if (has_reduced_clock && (clock.p != reduced_clock.p)) {
+			/*
+			 * If the different P is found, it means that we can't
+			 * switch the display clock by using the FP0/FP1.
+			 * In such case we will disable the LVDS downclock
+			 * feature.
+			 */
+			DRM_DEBUG_KMS("Different P is found for "
+				      "LVDS clock/downclock\n");
+			has_reduced_clock = 0;
+		}
+	}
+	/* SDVO TV has fixed PLL values depend on its clock range,
+	   this mirrors vbios setting. */
+	if (is_sdvo && is_tv) {
+		if (adjusted_mode->clock >= 100000
+		    && adjusted_mode->clock < 140500) {
+			clock.p1 = 2;
+			clock.p2 = 10;
+			clock.n = 3;
+			clock.m1 = 16;
+			clock.m2 = 8;
+		} else if (adjusted_mode->clock >= 140500
+			   && adjusted_mode->clock <= 200000) {
+			clock.p1 = 1;
+			clock.p2 = 10;
+			clock.n = 6;
+			clock.m1 = 12;
+			clock.m2 = 8;
+		}
+	}
+
+	/* FDI link */
+	pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+	lane = 0;
+	/* CPU eDP doesn't require FDI link, so just set DP M/N
+	   according to current link config */
+	if (has_edp_encoder &&
+	    !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		target_clock = mode->clock;
+		intel_edp_link_config(has_edp_encoder,
+				      &lane, &link_bw);
 	} else {
-		fp_reg = FP0(pipe);
-		dpll_reg = DPLL(pipe);
+		/* [e]DP over FDI requires target mode clock
+		   instead of link clock */
+		if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+			target_clock = mode->clock;
+		else
+			target_clock = adjusted_mode->clock;
+
+		/* FDI is a binary signal running at ~2.7GHz, encoding
+		 * each output octet as 10 bits. The actual frequency
+		 * is stored as a divider into a 100MHz clock, and the
+		 * mode pixel clock is stored in units of 1KHz.
+		 * Hence the bw of each lane in terms of the mode signal
+		 * is:
+		 */
+		link_bw = intel_fdi_link_freq(dev) * MHz(100)/KHz(1)/10;
+	}
+
+	/* determine panel color depth */
+	temp = I915_READ(PIPECONF(pipe));
+	temp &= ~PIPE_BPC_MASK;
+	if (is_lvds) {
+		/* the BPC will be 6 if it is 18-bit LVDS panel */
+		if ((I915_READ(PCH_LVDS) & LVDS_A3_POWER_MASK) == LVDS_A3_POWER_UP)
+			temp |= PIPE_8BPC;
+		else
+			temp |= PIPE_6BPC;
+	} else if (has_edp_encoder) {
+		switch (dev_priv->edp.bpp/3) {
+		case 8:
+			temp |= PIPE_8BPC;
+			break;
+		case 10:
+			temp |= PIPE_10BPC;
+			break;
+		case 6:
+			temp |= PIPE_6BPC;
+			break;
+		case 12:
+			temp |= PIPE_12BPC;
+			break;
+		}
+	} else
+		temp |= PIPE_8BPC;
+	I915_WRITE(PIPECONF(pipe), temp);
+
+	switch (temp & PIPE_BPC_MASK) {
+	case PIPE_8BPC:
+		bpp = 24;
+		break;
+	case PIPE_10BPC:
+		bpp = 30;
+		break;
+	case PIPE_6BPC:
+		bpp = 18;
+		break;
+	case PIPE_12BPC:
+		bpp = 36;
+		break;
+	default:
+		DRM_ERROR("unknown pipe bpc value\n");
+		bpp = 24;
+	}
+
+	if (!lane) {
+		/*
+		 * Account for spread spectrum to avoid
+		 * oversubscribing the link. Max center spread
+		 * is 2.5%; use 5% for safety's sake.
+		 */
+		u32 bps = target_clock * bpp * 21 / 20;
+		lane = bps / (link_bw * 8) + 1;
 	}
 
+	intel_crtc->fdi_lanes = lane;
+
+	if (pixel_multiplier > 1)
+		link_bw *= pixel_multiplier;
+	ironlake_compute_m_n(bpp, lane, target_clock, link_bw, &m_n);
+
+	/* Ironlake: try to setup display ref clock before DPLL
+	 * enabling. This is only under driver's control after
+	 * PCH B stepping, previous chipset stepping should be
+	 * ignoring this setting.
+	 */
+	temp = I915_READ(PCH_DREF_CONTROL);
+	/* Always enable nonspread source */
+	temp &= ~DREF_NONSPREAD_SOURCE_MASK;
+	temp |= DREF_NONSPREAD_SOURCE_ENABLE;
+	temp &= ~DREF_SSC_SOURCE_MASK;
+	temp |= DREF_SSC_SOURCE_ENABLE;
+	I915_WRITE(PCH_DREF_CONTROL, temp);
+
+	POSTING_READ(PCH_DREF_CONTROL);
+	udelay(200);
+
+	if (has_edp_encoder) {
+		if (intel_panel_use_ssc(dev_priv)) {
+			temp |= DREF_SSC1_ENABLE;
+			I915_WRITE(PCH_DREF_CONTROL, temp);
+
+			POSTING_READ(PCH_DREF_CONTROL);
+			udelay(200);
+		}
+		temp &= ~DREF_CPU_SOURCE_OUTPUT_MASK;
+
+		/* Enable CPU source on CPU attached eDP */
+		if (!intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+			if (intel_panel_use_ssc(dev_priv))
+				temp |= DREF_CPU_SOURCE_OUTPUT_DOWNSPREAD;
+			else
+				temp |= DREF_CPU_SOURCE_OUTPUT_NONSPREAD;
+		} else {
+			/* Enable SSC on PCH eDP if needed */
+			if (intel_panel_use_ssc(dev_priv)) {
+				DRM_ERROR("enabling SSC on PCH\n");
+				temp |= DREF_SUPERSPREAD_SOURCE_ENABLE;
+			}
+		}
+		I915_WRITE(PCH_DREF_CONTROL, temp);
+		POSTING_READ(PCH_DREF_CONTROL);
+		udelay(200);
+	}
+
+	fp = clock.n << 16 | clock.m1 << 8 | clock.m2;
+	if (has_reduced_clock)
+		fp2 = reduced_clock.n << 16 | reduced_clock.m1 << 8 |
+			reduced_clock.m2;
+
+	/* Enable autotuning of the PLL clock (if permissible) */
+	factor = 21;
+	if (is_lvds) {
+		if ((intel_panel_use_ssc(dev_priv) &&
+		     dev_priv->lvds_ssc_freq == 100) ||
+		    (I915_READ(PCH_LVDS) & LVDS_CLKB_POWER_MASK) == LVDS_CLKB_POWER_UP)
+			factor = 25;
+	} else if (is_sdvo && is_tv)
+		factor = 20;
+
+	if (clock.m1 < factor * clock.n)
+		fp |= FP_CB_TUNE;
+
+	dpll = 0;
+
+	if (is_lvds)
+		dpll |= DPLLB_MODE_LVDS;
+	else
+		dpll |= DPLLB_MODE_DAC_SERIAL;
+	if (is_sdvo) {
+		int pixel_multiplier = intel_mode_get_pixel_multiplier(adjusted_mode);
+		if (pixel_multiplier > 1) {
+			dpll |= (pixel_multiplier - 1) << PLL_REF_SDVO_HDMI_MULTIPLIER_SHIFT;
+		}
+		dpll |= DPLL_DVO_HIGH_SPEED;
+	}
+	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base))
+		dpll |= DPLL_DVO_HIGH_SPEED;
+
+	/* compute bitmask from p1 value */
+	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA01_P1_POST_DIV_SHIFT;
+	/* also FPA1 */
+	dpll |= (1 << (clock.p1 - 1)) << DPLL_FPA1_P1_POST_DIV_SHIFT;
+
+	switch (clock.p2) {
+	case 5:
+		dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_5;
+		break;
+	case 7:
+		dpll |= DPLLB_LVDS_P2_CLOCK_DIV_7;
+		break;
+	case 10:
+		dpll |= DPLL_DAC_SERIAL_P2_CLOCK_DIV_10;
+		break;
+	case 14:
+		dpll |= DPLLB_LVDS_P2_CLOCK_DIV_14;
+		break;
+	}
+
+	if (is_sdvo && is_tv)
+		dpll |= PLL_REF_INPUT_TVCLKINBC;
+	else if (is_tv)
+		/* XXX: just matching BIOS for now */
+		/*	dpll |= PLL_REF_INPUT_TVCLKINBC; */
+		dpll |= 3;
+	else if (is_lvds && intel_panel_use_ssc(dev_priv) && num_connectors < 2)
+		dpll |= PLLB_REF_INPUT_SPREADSPECTRUMIN;
+	else
+		dpll |= PLL_REF_INPUT_DREFCLK;
+
+	/* setup pipeconf */
+	pipeconf = I915_READ(PIPECONF(pipe));
+
+	/* Set up the display plane register */
+	dspcntr = DISPPLANE_GAMMA_ENABLE;
+
+	DRM_DEBUG_KMS("Mode for pipe %c:\n", pipe == 0 ? 'A' : 'B');
+	drm_mode_debug_printmodeline(mode);
+
 	/* PCH eDP needs FDI, but CPU eDP does not */
 	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-		I915_WRITE(fp_reg, fp);
-		I915_WRITE(dpll_reg, dpll & ~DPLL_VCO_ENABLE);
+		I915_WRITE(PCH_FP0(pipe), fp);
+		I915_WRITE(PCH_DPLL(pipe), dpll & ~DPLL_VCO_ENABLE);
 
-		POSTING_READ(dpll_reg);
+		POSTING_READ(PCH_DPLL(pipe));
 		udelay(150);
 	}
 
@@ -4964,11 +5108,7 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 	 * things on.
 	 */
 	if (is_lvds) {
-		reg = LVDS;
-		if (HAS_PCH_SPLIT(dev))
-			reg = PCH_LVDS;
-
-		temp = I915_READ(reg);
+		temp = I915_READ(PCH_LVDS);
 		temp |= LVDS_PORT_EN | LVDS_A0A2_CLKA_POWER_UP;
 		if (pipe == 1) {
 			if (HAS_PCH_CPT(dev))
@@ -4995,13 +5135,6 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		 * appropriately here, but we need to look more thoroughly into how
 		 * panels behave in the two modes.
 		 */
-		/* set the dithering flag on non-PCH LVDS as needed */
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-			if (dev_priv->lvds_dither)
-				temp |= LVDS_ENABLE_DITHER;
-			else
-				temp &= ~LVDS_ENABLE_DITHER;
-		}
 		if (adjusted_mode->flags & DRM_MODE_FLAG_NHSYNC)
 			lvds_sync |= LVDS_HSYNC_POLARITY;
 		if (adjusted_mode->flags & DRM_MODE_FLAG_NVSYNC)
@@ -5018,22 +5151,20 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 			temp &= ~(LVDS_HSYNC_POLARITY | LVDS_VSYNC_POLARITY);
 			temp |= lvds_sync;
 		}
-		I915_WRITE(reg, temp);
+		I915_WRITE(PCH_LVDS, temp);
 	}
 
 	/* set the dithering flag and clear for anything other than a panel. */
-	if (HAS_PCH_SPLIT(dev)) {
-		pipeconf &= ~PIPECONF_DITHER_EN;
-		pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
-		if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
-			pipeconf |= PIPECONF_DITHER_EN;
-			pipeconf |= PIPECONF_DITHER_TYPE_ST1;
-		}
+	pipeconf &= ~PIPECONF_DITHER_EN;
+	pipeconf &= ~PIPECONF_DITHER_TYPE_MASK;
+	if (dev_priv->lvds_dither && (is_lvds || has_edp_encoder)) {
+		pipeconf |= PIPECONF_DITHER_EN;
+		pipeconf |= PIPECONF_DITHER_TYPE_ST1;
 	}
 
 	if (is_dp || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
 		intel_dp_set_m_n(crtc, mode, adjusted_mode);
-	} else if (HAS_PCH_SPLIT(dev)) {
+	} else {
 		/* For non-DP output, clear any trans DP clock recovery setting.*/
 		I915_WRITE(TRANSDATA_M1(pipe), 0);
 		I915_WRITE(TRANSDATA_N1(pipe), 0);
@@ -5041,43 +5172,32 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		I915_WRITE(TRANSDPLINK_N1(pipe), 0);
 	}
 
-	if (!has_edp_encoder || intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-		I915_WRITE(dpll_reg, dpll);
+	if (!has_edp_encoder ||
+	    intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		I915_WRITE(PCH_DPLL(pipe), dpll);
 
 		/* Wait for the clocks to stabilize. */
-		POSTING_READ(dpll_reg);
+		POSTING_READ(PCH_DPLL(pipe));
 		udelay(150);
 
-		if (INTEL_INFO(dev)->gen >= 4 && !HAS_PCH_SPLIT(dev)) {
-			temp = 0;
-			if (is_sdvo) {
-				temp = intel_mode_get_pixel_multiplier(adjusted_mode);
-				if (temp > 1)
-					temp = (temp - 1) << DPLL_MD_UDI_MULTIPLIER_SHIFT;
-				else
-					temp = 0;
-			}
-			I915_WRITE(DPLL_MD(pipe), temp);
-		} else {
-			/* The pixel multiplier can only be updated once the
-			 * DPLL is enabled and the clocks are stable.
-			 *
-			 * So write it again.
-			 */
-			I915_WRITE(dpll_reg, dpll);
-		}
+		/* The pixel multiplier can only be updated once the
+		 * DPLL is enabled and the clocks are stable.
+		 *
+		 * So write it again.
+		 */
+		I915_WRITE(PCH_DPLL(pipe), dpll);
 	}
 
 	intel_crtc->lowfreq_avail = false;
 	if (is_lvds && has_reduced_clock && i915_powersave) {
-		I915_WRITE(fp_reg + 4, fp2);
+		I915_WRITE(PCH_FP1(pipe), fp2);
 		intel_crtc->lowfreq_avail = true;
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("enabling CxSR downclocking\n");
 			pipeconf |= PIPECONF_CXSR_DOWNCLOCK;
 		}
 	} else {
-		I915_WRITE(fp_reg + 4, fp);
+		I915_WRITE(PCH_FP1(pipe), fp);
 		if (HAS_PIPE_CXSR(dev)) {
 			DRM_DEBUG_KMS("disabling CxSR downclocking\n");
 			pipeconf &= ~PIPECONF_CXSR_DOWNCLOCK;
@@ -5116,33 +5236,24 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 		   (adjusted_mode->crtc_vsync_start - 1) |
 		   ((adjusted_mode->crtc_vsync_end - 1) << 16));
 
-	/* pipesrc and dspsize control the size that is scaled from,
-	 * which should always be the user's requested size.
+	/* pipesrc controls the size that is scaled from, which should
+	 * always be the user's requested size.
 	 */
-	if (!HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(DSPSIZE(plane),
-			   ((mode->vdisplay - 1) << 16) |
-			   (mode->hdisplay - 1));
-		I915_WRITE(DSPPOS(plane), 0);
-	}
 	I915_WRITE(PIPESRC(pipe),
 		   ((mode->hdisplay - 1) << 16) | (mode->vdisplay - 1));
 
-	if (HAS_PCH_SPLIT(dev)) {
-		I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
-		I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
-		I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
-		I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
+	I915_WRITE(PIPE_DATA_M1(pipe), TU_SIZE(m_n.tu) | m_n.gmch_m);
+	I915_WRITE(PIPE_DATA_N1(pipe), m_n.gmch_n);
+	I915_WRITE(PIPE_LINK_M1(pipe), m_n.link_m);
+	I915_WRITE(PIPE_LINK_N1(pipe), m_n.link_n);
 
-		if (has_edp_encoder && !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
-			ironlake_set_pll_edp(crtc, adjusted_mode->clock);
-		}
+	if (has_edp_encoder &&
+	    !intel_encoder_is_pch_edp(&has_edp_encoder->base)) {
+		ironlake_set_pll_edp(crtc, adjusted_mode->clock);
 	}
 
 	I915_WRITE(PIPECONF(pipe), pipeconf);
 	POSTING_READ(PIPECONF(pipe));
-	if (!HAS_PCH_SPLIT(dev))
-		intel_enable_pipe(dev_priv, pipe, false);
 
 	intel_wait_for_vblank(dev, pipe);
 
@@ -5161,6 +5272,26 @@ static int intel_crtc_mode_set(struct drm_crtc *crtc,
 
 	intel_update_watermarks(dev);
 
+	return ret;
+}
+
+static int intel_crtc_mode_set(struct drm_crtc *crtc,
+			       struct drm_display_mode *mode,
+			       struct drm_display_mode *adjusted_mode,
+			       int x, int y,
+			       struct drm_framebuffer *old_fb)
+{
+	struct drm_device *dev = crtc->dev;
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct intel_crtc *intel_crtc = to_intel_crtc(crtc);
+	int pipe = intel_crtc->pipe;
+	int ret;
+
+	drm_vblank_pre_modeset(dev, pipe);
+
+	ret = dev_priv->display.crtc_mode_set(crtc, mode, adjusted_mode,
+					      x, y, old_fb);
+
 	drm_vblank_post_modeset(dev, pipe);
 
 	return ret;
@@ -5483,43 +5614,140 @@ static struct drm_display_mode load_detect_mode = {
 		 704, 832, 0, 480, 489, 491, 520, 0, DRM_MODE_FLAG_NHSYNC | DRM_MODE_FLAG_NVSYNC),
 };
 
-struct drm_crtc *intel_get_load_detect_pipe(struct intel_encoder *intel_encoder,
-					    struct drm_connector *connector,
-					    struct drm_display_mode *mode,
-					    int *dpms_mode)
+static struct drm_framebuffer *
+intel_framebuffer_create(struct drm_device *dev,
+			 struct drm_mode_fb_cmd *mode_cmd,
+			 struct drm_i915_gem_object *obj)
+{
+	struct intel_framebuffer *intel_fb;
+	int ret;
+
+	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
+	if (!intel_fb) {
+		drm_gem_object_unreference_unlocked(&obj->base);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
+	if (ret) {
+		drm_gem_object_unreference_unlocked(&obj->base);
+		kfree(intel_fb);
+		return ERR_PTR(ret);
+	}
+
+	return &intel_fb->base;
+}
+
+static u32
+intel_framebuffer_pitch_for_width(int width, int bpp)
+{
+	u32 pitch = DIV_ROUND_UP(width * bpp, 8);
+	return ALIGN(pitch, 64);
+}
+
+static u32
+intel_framebuffer_size_for_mode(struct drm_display_mode *mode, int bpp)
+{
+	u32 pitch = intel_framebuffer_pitch_for_width(mode->hdisplay, bpp);
+	return ALIGN(pitch * mode->vdisplay, PAGE_SIZE);
+}
+
+static struct drm_framebuffer *
+intel_framebuffer_create_for_mode(struct drm_device *dev,
+				  struct drm_display_mode *mode,
+				  int depth, int bpp)
+{
+	struct drm_i915_gem_object *obj;
+	struct drm_mode_fb_cmd mode_cmd;
+
+	obj = i915_gem_alloc_object(dev,
+				    intel_framebuffer_size_for_mode(mode, bpp));
+	if (obj == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	mode_cmd.width = mode->hdisplay;
+	mode_cmd.height = mode->vdisplay;
+	mode_cmd.depth = depth;
+	mode_cmd.bpp = bpp;
+	mode_cmd.pitch = intel_framebuffer_pitch_for_width(mode_cmd.width, bpp);
+
+	return intel_framebuffer_create(dev, &mode_cmd, obj);
+}
+
+static struct drm_framebuffer *
+mode_fits_in_fbdev(struct drm_device *dev,
+		   struct drm_display_mode *mode)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	struct drm_i915_gem_object *obj;
+	struct drm_framebuffer *fb;
+
+	if (dev_priv->fbdev == NULL)
+		return NULL;
+
+	obj = dev_priv->fbdev->ifb.obj;
+	if (obj == NULL)
+		return NULL;
+
+	fb = &dev_priv->fbdev->ifb.base;
+	if (fb->pitch < intel_framebuffer_pitch_for_width(mode->hdisplay,
+							  fb->bits_per_pixel))
+		return NULL;
+
+	if (obj->base.size < mode->vdisplay * fb->pitch)
+		return NULL;
+
+	return fb;
+}
+
+bool intel_get_load_detect_pipe(struct intel_encoder *intel_encoder,
+				struct drm_connector *connector,
+				struct drm_display_mode *mode,
+				struct intel_load_detect_pipe *old)
 {
 	struct intel_crtc *intel_crtc;
 	struct drm_crtc *possible_crtc;
-	struct drm_crtc *supported_crtc =NULL;
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_crtc *crtc = NULL;
 	struct drm_device *dev = encoder->dev;
-	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
-	struct drm_crtc_helper_funcs *crtc_funcs;
+	struct drm_framebuffer *old_fb;
 	int i = -1;
 
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      encoder->base.id, drm_get_encoder_name(encoder));
+
 	/*
 	 * Algorithm gets a little messy:
+	 *
 	 *   - if the connector already has an assigned crtc, use it (but make
 	 *     sure it's on first)
+	 *
 	 *   - try to find the first unused crtc that can drive this connector,
 	 *     and use that if we find one
-	 *   - if there are no unused crtcs available, try to use the first
-	 *     one we found that supports the connector
 	 */
 
 	/* See if we already have a CRTC for this connector */
 	if (encoder->crtc) {
 		crtc = encoder->crtc;
-		/* Make sure the crtc and connector are running */
+
 		intel_crtc = to_intel_crtc(crtc);
-		*dpms_mode = intel_crtc->dpms_mode;
+		old->dpms_mode = intel_crtc->dpms_mode;
+		old->load_detect_temp = false;
+
+		/* Make sure the crtc and connector are running */
 		if (intel_crtc->dpms_mode != DRM_MODE_DPMS_ON) {
+			struct drm_encoder_helper_funcs *encoder_funcs;
+			struct drm_crtc_helper_funcs *crtc_funcs;
+
 			crtc_funcs = crtc->helper_private;
 			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
+
+			encoder_funcs = encoder->helper_private;
 			encoder_funcs->dpms(encoder, DRM_MODE_DPMS_ON);
 		}
-		return crtc;
+
+		return true;
 	}
 
 	/* Find an unused one (if possible) */
@@ -5531,46 +5759,66 @@ struct drm_crtc *intel_get_load_detect_pipe(struct intel_encoder *intel_encoder,
 			crtc = possible_crtc;
 			break;
 		}
-		if (!supported_crtc)
-			supported_crtc = possible_crtc;
 	}
 
 	/*
 	 * If we didn't find an unused CRTC, don't use any.
 	 */
 	if (!crtc) {
-		return NULL;
+		DRM_DEBUG_KMS("no pipe available for load-detect\n");
+		return false;
 	}
 
 	encoder->crtc = crtc;
 	connector->encoder = encoder;
-	intel_encoder->load_detect_temp = true;
 
 	intel_crtc = to_intel_crtc(crtc);
-	*dpms_mode = intel_crtc->dpms_mode;
+	old->dpms_mode = intel_crtc->dpms_mode;
+	old->load_detect_temp = true;
+	old->release_fb = NULL;
 
-	if (!crtc->enabled) {
-		if (!mode)
-			mode = &load_detect_mode;
-		drm_crtc_helper_set_mode(crtc, mode, 0, 0, crtc->fb);
-	} else {
-		if (intel_crtc->dpms_mode != DRM_MODE_DPMS_ON) {
-			crtc_funcs = crtc->helper_private;
-			crtc_funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-		}
+	if (!mode)
+		mode = &load_detect_mode;
 
-		/* Add this connector to the crtc */
-		encoder_funcs->mode_set(encoder, &crtc->mode, &crtc->mode);
-		encoder_funcs->commit(encoder);
+	old_fb = crtc->fb;
+
+	/* We need a framebuffer large enough to accommodate all accesses
+	 * that the plane may generate whilst we perform load detection.
+	 * We can not rely on the fbcon either being present (we get called
+	 * during its initialisation to detect all boot displays, or it may
+	 * not even exist) or that it is large enough to satisfy the
+	 * requested mode.
+	 */
+	crtc->fb = mode_fits_in_fbdev(dev, mode);
+	if (crtc->fb == NULL) {
+		DRM_DEBUG_KMS("creating tmp fb for load-detection\n");
+		crtc->fb = intel_framebuffer_create_for_mode(dev, mode, 24, 32);
+		old->release_fb = crtc->fb;
+	} else
+		DRM_DEBUG_KMS("reusing fbdev for load-detection framebuffer\n");
+	if (IS_ERR(crtc->fb)) {
+		DRM_DEBUG_KMS("failed to allocate framebuffer for load-detection\n");
+		crtc->fb = old_fb;
+		return false;
+	}
+
+	if (!drm_crtc_helper_set_mode(crtc, mode, 0, 0, old_fb)) {
+		DRM_DEBUG_KMS("failed to set mode on load-detect pipe\n");
+		if (old->release_fb)
+			old->release_fb->funcs->destroy(old->release_fb);
+		crtc->fb = old_fb;
+		return false;
 	}
+
 	/* let the connector get through one full cycle before testing */
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
 
-	return crtc;
+	return true;
 }
 
 void intel_release_load_detect_pipe(struct intel_encoder *intel_encoder,
-				    struct drm_connector *connector, int dpms_mode)
+				    struct drm_connector *connector,
+				    struct intel_load_detect_pipe *old)
 {
 	struct drm_encoder *encoder = &intel_encoder->base;
 	struct drm_device *dev = encoder->dev;
@@ -5578,19 +5826,24 @@ void intel_release_load_detect_pipe(struct intel_encoder *intel_encoder,
 	struct drm_encoder_helper_funcs *encoder_funcs = encoder->helper_private;
 	struct drm_crtc_helper_funcs *crtc_funcs = crtc->helper_private;
 
-	if (intel_encoder->load_detect_temp) {
-		encoder->crtc = NULL;
+	DRM_DEBUG_KMS("[CONNECTOR:%d:%s], [ENCODER:%d:%s]\n",
+		      connector->base.id, drm_get_connector_name(connector),
+		      encoder->base.id, drm_get_encoder_name(encoder));
+
+	if (old->load_detect_temp) {
 		connector->encoder = NULL;
-		intel_encoder->load_detect_temp = false;
-		crtc->enabled = drm_helper_crtc_in_use(crtc);
 		drm_helper_disable_unused_functions(dev);
+
+		if (old->release_fb)
+			old->release_fb->funcs->destroy(old->release_fb);
+
+		return;
 	}
 
 	/* Switch crtc and encoder back off if necessary */
-	if (crtc->enabled && dpms_mode != DRM_MODE_DPMS_ON) {
-		if (encoder->crtc == crtc)
-			encoder_funcs->dpms(encoder, dpms_mode);
-		crtc_funcs->dpms(crtc, dpms_mode);
+	if (old->dpms_mode != DRM_MODE_DPMS_ON) {
+		encoder_funcs->dpms(encoder, old->dpms_mode);
+		crtc_funcs->dpms(crtc, old->dpms_mode);
 	}
 }
 
@@ -6185,6 +6438,7 @@ static int intel_crtc_page_flip(struct drm_crtc *crtc,
 		break;
 
 	case 6:
+	case 7:
 		OUT_RING(MI_DISPLAY_FLIP |
 			 MI_DISPLAY_FLIP_PLANE(intel_crtc->plane));
 		OUT_RING(fb->pitch | obj->tiling_mode);
@@ -6504,6 +6758,9 @@ static void intel_setup_outputs(struct drm_device *dev)
 	}
 
 	intel_panel_setup_backlight(dev);
+
+	/* disable all the possible outputs/crtcs before entering KMS mode */
+	drm_helper_disable_unused_functions(dev);
 }
 
 static void intel_user_framebuffer_destroy(struct drm_framebuffer *fb)
@@ -6571,27 +6828,12 @@ intel_user_framebuffer_create(struct drm_device *dev,
 			      struct drm_mode_fb_cmd *mode_cmd)
 {
 	struct drm_i915_gem_object *obj;
-	struct intel_framebuffer *intel_fb;
-	int ret;
 
 	obj = to_intel_bo(drm_gem_object_lookup(dev, filp, mode_cmd->handle));
 	if (&obj->base == NULL)
 		return ERR_PTR(-ENOENT);
 
-	intel_fb = kzalloc(sizeof(*intel_fb), GFP_KERNEL);
-	if (!intel_fb) {
-		drm_gem_object_unreference_unlocked(&obj->base);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	ret = intel_framebuffer_init(dev, intel_fb, mode_cmd, obj);
-	if (ret) {
-		drm_gem_object_unreference_unlocked(&obj->base);
-		kfree(intel_fb);
-		return ERR_PTR(ret);
-	}
-
-	return &intel_fb->base;
+	return intel_framebuffer_create(dev, mode_cmd, obj);
 }
 
 static const struct drm_mode_config_funcs intel_mode_funcs = {
@@ -6605,13 +6847,14 @@ intel_alloc_context_page(struct drm_device *dev)
 	struct drm_i915_gem_object *ctx;
 	int ret;
 
+	WARN_ON(!mutex_is_locked(&dev->struct_mutex));
+
 	ctx = i915_gem_alloc_object(dev, 4096);
 	if (!ctx) {
 		DRM_DEBUG("failed to alloc power context, RC6 disabled\n");
 		return NULL;
 	}
 
-	mutex_lock(&dev->struct_mutex);
 	ret = i915_gem_object_pin(ctx, 4096, true);
 	if (ret) {
 		DRM_ERROR("failed to pin power context: %d\n", ret);
@@ -6623,7 +6866,6 @@ intel_alloc_context_page(struct drm_device *dev)
 		DRM_ERROR("failed to set-domain on power context: %d\n", ret);
 		goto err_unpin;
 	}
-	mutex_unlock(&dev->struct_mutex);
 
 	return ctx;
 
@@ -6758,6 +7000,11 @@ void gen6_disable_rps(struct drm_device *dev)
 	I915_WRITE(GEN6_RPNSWREQ, 1 << 31);
 	I915_WRITE(GEN6_PMINTRMSK, 0xffffffff);
 	I915_WRITE(GEN6_PMIER, 0);
+
+	spin_lock_irq(&dev_priv->rps_lock);
+	dev_priv->pm_iir = 0;
+	spin_unlock_irq(&dev_priv->rps_lock);
+
 	I915_WRITE(GEN6_PMIIR, I915_READ(GEN6_PMIIR));
 }
 
@@ -6851,7 +7098,7 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 {
 	u32 rp_state_cap = I915_READ(GEN6_RP_STATE_CAP);
 	u32 gt_perf_status = I915_READ(GEN6_GT_PERF_STATUS);
-	u32 pcu_mbox;
+	u32 pcu_mbox, rc6_mask = 0;
 	int cur_freq, min_freq, max_freq;
 	int i;
 
@@ -6862,7 +7109,8 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	 * userspace...
 	 */
 	I915_WRITE(GEN6_RC_STATE, 0);
-	__gen6_gt_force_wake_get(dev_priv);
+	mutex_lock(&dev_priv->dev->struct_mutex);
+	gen6_gt_force_wake_get(dev_priv);
 
 	/* disable the counters and set deterministic thresholds */
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -6882,9 +7130,12 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 	I915_WRITE(GEN6_RC6p_THRESHOLD, 100000);
 	I915_WRITE(GEN6_RC6pp_THRESHOLD, 64000); /* unused */
 
+	if (i915_enable_rc6)
+		rc6_mask = GEN6_RC_CTL_RC6p_ENABLE |
+			GEN6_RC_CTL_RC6_ENABLE;
+
 	I915_WRITE(GEN6_RC_CONTROL,
-		   GEN6_RC_CTL_RC6p_ENABLE |
-		   GEN6_RC_CTL_RC6_ENABLE |
+		   rc6_mask |
 		   GEN6_RC_CTL_EI_MODE(1) |
 		   GEN6_RC_CTL_HW_ENABLE);
 
@@ -6956,168 +7207,237 @@ void gen6_enable_rps(struct drm_i915_private *dev_priv)
 		   GEN6_PM_RP_DOWN_THRESHOLD |
 		   GEN6_PM_RP_UP_EI_EXPIRED |
 		   GEN6_PM_RP_DOWN_EI_EXPIRED);
+	spin_lock_irq(&dev_priv->rps_lock);
+	WARN_ON(dev_priv->pm_iir != 0);
 	I915_WRITE(GEN6_PMIMR, 0);
+	spin_unlock_irq(&dev_priv->rps_lock);
 	/* enable all PM interrupts */
 	I915_WRITE(GEN6_PMINTRMSK, 0);
 
-	__gen6_gt_force_wake_put(dev_priv);
+	gen6_gt_force_wake_put(dev_priv);
+	mutex_unlock(&dev_priv->dev->struct_mutex);
 }
 
-void intel_enable_clock_gating(struct drm_device *dev)
+static void ironlake_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+
+	/* Required for FBC */
+	dspclk_gate |= DPFCUNIT_CLOCK_GATE_DISABLE |
+		DPFCRUNIT_CLOCK_GATE_DISABLE |
+		DPFDUNIT_CLOCK_GATE_DISABLE;
+	/* Required for CxSR */
+	dspclk_gate |= DPARBUNIT_CLOCK_GATE_DISABLE;
+
+	I915_WRITE(PCH_3DCGDIS0,
+		   MARIUNIT_CLOCK_GATE_DISABLE |
+		   SVSMUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(PCH_3DCGDIS1,
+		   VFMUNIT_CLOCK_GATE_DISABLE);
+
+	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
+
+	/*
+	 * According to the spec the following bits should be set in
+	 * order to enable memory self-refresh
+	 * The bit 22/21 of 0x42004
+	 * The bit 5 of 0x42020
+	 * The bit 15 of 0x45000
+	 */
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   (I915_READ(ILK_DISPLAY_CHICKEN2) |
+		    ILK_DPARB_GATE | ILK_VSDPFD_FULL));
+	I915_WRITE(ILK_DSPCLK_GATE,
+		   (I915_READ(ILK_DSPCLK_GATE) |
+		    ILK_DPARB_CLK_GATE));
+	I915_WRITE(DISP_ARB_CTL,
+		   (I915_READ(DISP_ARB_CTL) |
+		    DISP_FBC_WM_DIS));
+	I915_WRITE(WM3_LP_ILK, 0);
+	I915_WRITE(WM2_LP_ILK, 0);
+	I915_WRITE(WM1_LP_ILK, 0);
+
+	/*
+	 * Based on the document from hardware guys the following bits
+	 * should be set unconditionally in order to enable FBC.
+	 * The bit 22 of 0x42000
+	 * The bit 22 of 0x42004
+	 * The bit 7,8,9 of 0x42020.
+	 */
+	if (IS_IRONLAKE_M(dev)) {
+		I915_WRITE(ILK_DISPLAY_CHICKEN1,
+			   I915_READ(ILK_DISPLAY_CHICKEN1) |
+			   ILK_FBCQ_DIS);
+		I915_WRITE(ILK_DISPLAY_CHICKEN2,
+			   I915_READ(ILK_DISPLAY_CHICKEN2) |
+			   ILK_DPARB_GATE);
+		I915_WRITE(ILK_DSPCLK_GATE,
+			   I915_READ(ILK_DSPCLK_GATE) |
+			   ILK_DPFC_DIS1 |
+			   ILK_DPFC_DIS2 |
+			   ILK_CLK_FBC);
+	}
+
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
+	I915_WRITE(_3D_CHICKEN2,
+		   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
+		   _3D_CHICKEN2_WM_READ_PIPELINED);
+}
+
+static void gen6_init_clock_gating(struct drm_device *dev)
 {
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	int pipe;
+	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+
+	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
+
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_ELPIN_409_SELECT);
+
+	I915_WRITE(WM3_LP_ILK, 0);
+	I915_WRITE(WM2_LP_ILK, 0);
+	I915_WRITE(WM1_LP_ILK, 0);
 
 	/*
-	 * Disable clock gating reported to work incorrectly according to the
-	 * specs, but enable as much else as we can.
+	 * According to the spec the following bits should be
+	 * set in order to enable memory self-refresh and fbc:
+	 * The bit21 and bit22 of 0x42000
+	 * The bit21 and bit22 of 0x42004
+	 * The bit5 and bit7 of 0x42020
+	 * The bit14 of 0x70180
+	 * The bit14 of 0x71180
 	 */
-	if (HAS_PCH_SPLIT(dev)) {
-		uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(ILK_DISPLAY_CHICKEN1,
+		   I915_READ(ILK_DISPLAY_CHICKEN1) |
+		   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
+	I915_WRITE(ILK_DISPLAY_CHICKEN2,
+		   I915_READ(ILK_DISPLAY_CHICKEN2) |
+		   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
+	I915_WRITE(ILK_DSPCLK_GATE,
+		   I915_READ(ILK_DSPCLK_GATE) |
+		   ILK_DPARB_CLK_GATE  |
+		   ILK_DPFD_CLK_GATE);
 
-		if (IS_GEN5(dev)) {
-			/* Required for FBC */
-			dspclk_gate |= DPFCUNIT_CLOCK_GATE_DISABLE |
-				DPFCRUNIT_CLOCK_GATE_DISABLE |
-				DPFDUNIT_CLOCK_GATE_DISABLE;
-			/* Required for CxSR */
-			dspclk_gate |= DPARBUNIT_CLOCK_GATE_DISABLE;
-
-			I915_WRITE(PCH_3DCGDIS0,
-				   MARIUNIT_CLOCK_GATE_DISABLE |
-				   SVSMUNIT_CLOCK_GATE_DISABLE);
-			I915_WRITE(PCH_3DCGDIS1,
-				   VFMUNIT_CLOCK_GATE_DISABLE);
-		}
+	for_each_pipe(pipe)
+		I915_WRITE(DSPCNTR(pipe),
+			   I915_READ(DSPCNTR(pipe)) |
+			   DISPPLANE_TRICKLE_FEED_DISABLE);
+}
 
-		I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
+static void ivybridge_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	int pipe;
+	uint32_t dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE;
 
-		/*
-		 * On Ibex Peak and Cougar Point, we need to disable clock
-		 * gating for the panel power sequencer or it will fail to
-		 * start up when no ports are active.
-		 */
-		I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(PCH_DSPCLK_GATE_D, dspclk_gate);
 
-		/*
-		 * According to the spec the following bits should be set in
-		 * order to enable memory self-refresh
-		 * The bit 22/21 of 0x42004
-		 * The bit 5 of 0x42020
-		 * The bit 15 of 0x45000
-		 */
-		if (IS_GEN5(dev)) {
-			I915_WRITE(ILK_DISPLAY_CHICKEN2,
-					(I915_READ(ILK_DISPLAY_CHICKEN2) |
-					ILK_DPARB_GATE | ILK_VSDPFD_FULL));
-			I915_WRITE(ILK_DSPCLK_GATE,
-					(I915_READ(ILK_DSPCLK_GATE) |
-						ILK_DPARB_CLK_GATE));
-			I915_WRITE(DISP_ARB_CTL,
-					(I915_READ(DISP_ARB_CTL) |
-						DISP_FBC_WM_DIS));
-			I915_WRITE(WM3_LP_ILK, 0);
-			I915_WRITE(WM2_LP_ILK, 0);
-			I915_WRITE(WM1_LP_ILK, 0);
-		}
-		/*
-		 * Based on the document from hardware guys the following bits
-		 * should be set unconditionally in order to enable FBC.
-		 * The bit 22 of 0x42000
-		 * The bit 22 of 0x42004
-		 * The bit 7,8,9 of 0x42020.
-		 */
-		if (IS_IRONLAKE_M(dev)) {
-			I915_WRITE(ILK_DISPLAY_CHICKEN1,
-				   I915_READ(ILK_DISPLAY_CHICKEN1) |
-				   ILK_FBCQ_DIS);
-			I915_WRITE(ILK_DISPLAY_CHICKEN2,
-				   I915_READ(ILK_DISPLAY_CHICKEN2) |
-				   ILK_DPARB_GATE);
-			I915_WRITE(ILK_DSPCLK_GATE,
-				   I915_READ(ILK_DSPCLK_GATE) |
-				   ILK_DPFC_DIS1 |
-				   ILK_DPFC_DIS2 |
-				   ILK_CLK_FBC);
-		}
+	I915_WRITE(WM3_LP_ILK, 0);
+	I915_WRITE(WM2_LP_ILK, 0);
+	I915_WRITE(WM1_LP_ILK, 0);
 
-		I915_WRITE(ILK_DISPLAY_CHICKEN2,
-			   I915_READ(ILK_DISPLAY_CHICKEN2) |
-			   ILK_ELPIN_409_SELECT);
+	I915_WRITE(ILK_DSPCLK_GATE, IVB_VRHUNIT_CLK_GATE);
 
-		if (IS_GEN5(dev)) {
-			I915_WRITE(_3D_CHICKEN2,
-				   _3D_CHICKEN2_WM_READ_PIPELINED << 16 |
-				   _3D_CHICKEN2_WM_READ_PIPELINED);
-		}
+	for_each_pipe(pipe)
+		I915_WRITE(DSPCNTR(pipe),
+			   I915_READ(DSPCNTR(pipe)) |
+			   DISPPLANE_TRICKLE_FEED_DISABLE);
+}
 
-		if (IS_GEN6(dev)) {
-			I915_WRITE(WM3_LP_ILK, 0);
-			I915_WRITE(WM2_LP_ILK, 0);
-			I915_WRITE(WM1_LP_ILK, 0);
+static void g4x_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	uint32_t dspclk_gate;
 
-			/*
-			 * According to the spec the following bits should be
-			 * set in order to enable memory self-refresh and fbc:
-			 * The bit21 and bit22 of 0x42000
-			 * The bit21 and bit22 of 0x42004
-			 * The bit5 and bit7 of 0x42020
-			 * The bit14 of 0x70180
-			 * The bit14 of 0x71180
-			 */
-			I915_WRITE(ILK_DISPLAY_CHICKEN1,
-				   I915_READ(ILK_DISPLAY_CHICKEN1) |
-				   ILK_FBCQ_DIS | ILK_PABSTRETCH_DIS);
-			I915_WRITE(ILK_DISPLAY_CHICKEN2,
-				   I915_READ(ILK_DISPLAY_CHICKEN2) |
-				   ILK_DPARB_GATE | ILK_VSDPFD_FULL);
-			I915_WRITE(ILK_DSPCLK_GATE,
-				   I915_READ(ILK_DSPCLK_GATE) |
-				   ILK_DPARB_CLK_GATE  |
-				   ILK_DPFD_CLK_GATE);
-
-			for_each_pipe(pipe)
-				I915_WRITE(DSPCNTR(pipe),
-					   I915_READ(DSPCNTR(pipe)) |
-					   DISPPLANE_TRICKLE_FEED_DISABLE);
-		}
-	} else if (IS_G4X(dev)) {
-		uint32_t dspclk_gate;
-		I915_WRITE(RENCLK_GATE_D1, 0);
-		I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
-		       GS_UNIT_CLOCK_GATE_DISABLE |
-		       CL_UNIT_CLOCK_GATE_DISABLE);
-		I915_WRITE(RAMCLK_GATE_D, 0);
-		dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
-			OVRUNIT_CLOCK_GATE_DISABLE |
-			OVCUNIT_CLOCK_GATE_DISABLE;
-		if (IS_GM45(dev))
-			dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
-		I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
-	} else if (IS_CRESTLINE(dev)) {
-		I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
-		I915_WRITE(RENCLK_GATE_D2, 0);
-		I915_WRITE(DSPCLK_GATE_D, 0);
-		I915_WRITE(RAMCLK_GATE_D, 0);
-		I915_WRITE16(DEUC, 0);
-	} else if (IS_BROADWATER(dev)) {
-		I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
-		       I965_RCC_CLOCK_GATE_DISABLE |
-		       I965_RCPB_CLOCK_GATE_DISABLE |
-		       I965_ISC_CLOCK_GATE_DISABLE |
-		       I965_FBC_CLOCK_GATE_DISABLE);
-		I915_WRITE(RENCLK_GATE_D2, 0);
-	} else if (IS_GEN3(dev)) {
-		u32 dstate = I915_READ(D_STATE);
+	I915_WRITE(RENCLK_GATE_D1, 0);
+	I915_WRITE(RENCLK_GATE_D2, VF_UNIT_CLOCK_GATE_DISABLE |
+		   GS_UNIT_CLOCK_GATE_DISABLE |
+		   CL_UNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(RAMCLK_GATE_D, 0);
+	dspclk_gate = VRHUNIT_CLOCK_GATE_DISABLE |
+		OVRUNIT_CLOCK_GATE_DISABLE |
+		OVCUNIT_CLOCK_GATE_DISABLE;
+	if (IS_GM45(dev))
+		dspclk_gate |= DSSUNIT_CLOCK_GATE_DISABLE;
+	I915_WRITE(DSPCLK_GATE_D, dspclk_gate);
+}
 
-		dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
-			DSTATE_DOT_CLOCK_GATING;
-		I915_WRITE(D_STATE, dstate);
-	} else if (IS_I85X(dev) || IS_I865G(dev)) {
-		I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
-	} else if (IS_I830(dev)) {
-		I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
-	}
+static void crestline_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(RENCLK_GATE_D1, I965_RCC_CLOCK_GATE_DISABLE);
+	I915_WRITE(RENCLK_GATE_D2, 0);
+	I915_WRITE(DSPCLK_GATE_D, 0);
+	I915_WRITE(RAMCLK_GATE_D, 0);
+	I915_WRITE16(DEUC, 0);
+}
+
+static void broadwater_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(RENCLK_GATE_D1, I965_RCZ_CLOCK_GATE_DISABLE |
+		   I965_RCC_CLOCK_GATE_DISABLE |
+		   I965_RCPB_CLOCK_GATE_DISABLE |
+		   I965_ISC_CLOCK_GATE_DISABLE |
+		   I965_FBC_CLOCK_GATE_DISABLE);
+	I915_WRITE(RENCLK_GATE_D2, 0);
+}
+
+static void gen3_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+	u32 dstate = I915_READ(D_STATE);
+
+	dstate |= DSTATE_PLL_D3_OFF | DSTATE_GFX_CLOCK_GATING |
+		DSTATE_DOT_CLOCK_GATING;
+	I915_WRITE(D_STATE, dstate);
+}
+
+static void i85x_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(RENCLK_GATE_D1, SV_CLOCK_GATE_DISABLE);
+}
+
+static void i830_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	I915_WRITE(DSPCLK_GATE_D, OVRUNIT_CLOCK_GATE_DISABLE);
+}
+
+static void ibx_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+}
+
+static void cpt_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	/*
+	 * On Ibex Peak and Cougar Point, we need to disable clock
+	 * gating for the panel power sequencer or it will fail to
+	 * start up when no ports are active.
+	 */
+	I915_WRITE(SOUTH_DSPCLK_GATE_D, PCH_DPLSUNIT_CLOCK_GATE_DISABLE);
+	I915_WRITE(SOUTH_CHICKEN2, I915_READ(SOUTH_CHICKEN2) |
+		   DPLS_EDP_PPS_FIX_DIS);
 }
 
 static void ironlake_teardown_rc6(struct drm_device *dev)
@@ -7187,9 +7507,12 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	if (!i915_enable_rc6)
 		return;
 
+	mutex_lock(&dev->struct_mutex);
 	ret = ironlake_setup_rc6(dev);
-	if (ret)
+	if (ret) {
+		mutex_unlock(&dev->struct_mutex);
 		return;
+	}
 
 	/*
 	 * GPU can automatically power down the render unit if given a page
@@ -7198,6 +7521,7 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	ret = BEGIN_LP_RING(6);
 	if (ret) {
 		ironlake_teardown_rc6(dev);
+		mutex_unlock(&dev->struct_mutex);
 		return;
 	}
 
@@ -7213,10 +7537,33 @@ void ironlake_enable_rc6(struct drm_device *dev)
 	OUT_RING(MI_FLUSH);
 	ADVANCE_LP_RING();
 
+	/*
+	 * Wait for the command parser to advance past MI_SET_CONTEXT. The HW
+	 * does an implicit flush, combined with MI_FLUSH above, it should be
+	 * safe to assume that renderctx is valid
+	 */
+	ret = intel_wait_ring_idle(LP_RING(dev_priv));
+	if (ret) {
+		DRM_ERROR("failed to enable ironlake power power savings\n");
+		ironlake_teardown_rc6(dev);
+		mutex_unlock(&dev->struct_mutex);
+		return;
+	}
+
 	I915_WRITE(PWRCTXA, dev_priv->pwrctx->gtt_offset | PWRCTX_EN);
 	I915_WRITE(RSTDBYCTL, I915_READ(RSTDBYCTL) & ~RCX_SW_EXIT);
+	mutex_unlock(&dev->struct_mutex);
 }
 
+void intel_init_clock_gating(struct drm_device *dev)
+{
+	struct drm_i915_private *dev_priv = dev->dev_private;
+
+	dev_priv->display.init_clock_gating(dev);
+
+	if (dev_priv->display.init_pch_clock_gating)
+		dev_priv->display.init_pch_clock_gating(dev);
+}
 
 /* Set up chip specific display functions */
 static void intel_init_display(struct drm_device *dev)
@@ -7224,10 +7571,13 @@ static void intel_init_display(struct drm_device *dev)
 	struct drm_i915_private *dev_priv = dev->dev_private;
 
 	/* We always want a DPMS function */
-	if (HAS_PCH_SPLIT(dev))
+	if (HAS_PCH_SPLIT(dev)) {
 		dev_priv->display.dpms = ironlake_crtc_dpms;
-	else
+		dev_priv->display.crtc_mode_set = ironlake_crtc_mode_set;
+	} else {
 		dev_priv->display.dpms = i9xx_crtc_dpms;
+		dev_priv->display.crtc_mode_set = i9xx_crtc_mode_set;
+	}
 
 	if (I915_HAS_FBC(dev)) {
 		if (HAS_PCH_SPLIT(dev)) {
@@ -7271,6 +7621,11 @@ static void intel_init_display(struct drm_device *dev)
 
 	/* For FIFO watermark updates */
 	if (HAS_PCH_SPLIT(dev)) {
+		if (HAS_PCH_IBX(dev))
+			dev_priv->display.init_pch_clock_gating = ibx_init_clock_gating;
+		else if (HAS_PCH_CPT(dev))
+			dev_priv->display.init_pch_clock_gating = cpt_init_clock_gating;
+
 		if (IS_GEN5(dev)) {
 			if (I915_READ(MLTR_ILK) & ILK_SRLT_MASK)
 				dev_priv->display.update_wm = ironlake_update_wm;
@@ -7279,6 +7634,8 @@ static void intel_init_display(struct drm_device *dev)
 					      "Disable CxSR\n");
 				dev_priv->display.update_wm = NULL;
 			}
+			dev_priv->display.fdi_link_train = ironlake_fdi_link_train;
+			dev_priv->display.init_clock_gating = ironlake_init_clock_gating;
 		} else if (IS_GEN6(dev)) {
 			if (SNB_READ_WM0_LATENCY()) {
 				dev_priv->display.update_wm = sandybridge_update_wm;
@@ -7287,6 +7644,20 @@ static void intel_init_display(struct drm_device *dev)
 					      "Disable CxSR\n");
 				dev_priv->display.update_wm = NULL;
 			}
+			dev_priv->display.fdi_link_train = gen6_fdi_link_train;
+			dev_priv->display.init_clock_gating = gen6_init_clock_gating;
+		} else if (IS_IVYBRIDGE(dev)) {
+			/* FIXME: detect B0+ stepping and use auto training */
+			dev_priv->display.fdi_link_train = ivb_manual_fdi_link_train;
+			if (SNB_READ_WM0_LATENCY()) {
+				dev_priv->display.update_wm = sandybridge_update_wm;
+			} else {
+				DRM_DEBUG_KMS("Failed to read display plane latency. "
+					      "Disable CxSR\n");
+				dev_priv->display.update_wm = NULL;
+			}
+			dev_priv->display.init_clock_gating = ivybridge_init_clock_gating;
+
 		} else
 			dev_priv->display.update_wm = NULL;
 	} else if (IS_PINEVIEW(dev)) {
@@ -7304,18 +7675,30 @@ static void intel_init_display(struct drm_device *dev)
 			dev_priv->display.update_wm = NULL;
 		} else
 			dev_priv->display.update_wm = pineview_update_wm;
-	} else if (IS_G4X(dev))
+	} else if (IS_G4X(dev)) {
 		dev_priv->display.update_wm = g4x_update_wm;
-	else if (IS_GEN4(dev))
+		dev_priv->display.init_clock_gating = g4x_init_clock_gating;
+	} else if (IS_GEN4(dev)) {
 		dev_priv->display.update_wm = i965_update_wm;
-	else if (IS_GEN3(dev)) {
+		if (IS_CRESTLINE(dev))
+			dev_priv->display.init_clock_gating = crestline_init_clock_gating;
+		else if (IS_BROADWATER(dev))
+			dev_priv->display.init_clock_gating = broadwater_init_clock_gating;
+	} else if (IS_GEN3(dev)) {
 		dev_priv->display.update_wm = i9xx_update_wm;
 		dev_priv->display.get_fifo_size = i9xx_get_fifo_size;
+		dev_priv->display.init_clock_gating = gen3_init_clock_gating;
+	} else if (IS_I865G(dev)) {
+		dev_priv->display.update_wm = i830_update_wm;
+		dev_priv->display.init_clock_gating = i85x_init_clock_gating;
+		dev_priv->display.get_fifo_size = i830_get_fifo_size;
 	} else if (IS_I85X(dev)) {
 		dev_priv->display.update_wm = i9xx_update_wm;
 		dev_priv->display.get_fifo_size = i85x_get_fifo_size;
+		dev_priv->display.init_clock_gating = i85x_init_clock_gating;
 	} else {
 		dev_priv->display.update_wm = i830_update_wm;
+		dev_priv->display.init_clock_gating = i830_init_clock_gating;
 		if (IS_845G(dev))
 			dev_priv->display.get_fifo_size = i845_get_fifo_size;
 		else
@@ -7441,12 +7824,11 @@ void intel_modeset_init(struct drm_device *dev)
 		intel_crtc_init(dev, i);
 	}
 
-	intel_setup_outputs(dev);
-
-	intel_enable_clock_gating(dev);
-
 	/* Just disable it once at startup */
 	i915_disable_vga(dev);
+	intel_setup_outputs(dev);
+
+	intel_init_clock_gating(dev);
 
 	if (IS_IRONLAKE_M(dev)) {
 		ironlake_enable_drps(dev);
@@ -7456,12 +7838,15 @@ void intel_modeset_init(struct drm_device *dev)
 	if (IS_GEN6(dev))
 		gen6_enable_rps(dev_priv);
 
-	if (IS_IRONLAKE_M(dev))
-		ironlake_enable_rc6(dev);
-
 	INIT_WORK(&dev_priv->idle_work, intel_idle_update);
 	setup_timer(&dev_priv->idle_timer, intel_gpu_idle_timer,
 		    (unsigned long)dev);
+}
+
+void intel_modeset_gem_init(struct drm_device *dev)
+{
+	if (IS_IRONLAKE_M(dev))
+		ironlake_enable_rc6(dev);
 
 	intel_setup_overlay(dev);
 }
diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h
index 1d20712d527f..831d7a4a0d18 100644
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@@ -140,7 +140,6 @@ struct intel_fbdev {
 struct intel_encoder {
 	struct drm_encoder base;
 	int type;
-	bool load_detect_temp;
 	bool needs_tv_clock;
 	void (*hot_plug)(struct intel_encoder *);
 	int crtc_mask;
@@ -291,13 +290,19 @@ int intel_get_pipe_from_crtc_id(struct drm_device *dev, void *data,
 				struct drm_file *file_priv);
 extern void intel_wait_for_vblank(struct drm_device *dev, int pipe);
 extern void intel_wait_for_pipe_off(struct drm_device *dev, int pipe);
-extern struct drm_crtc *intel_get_load_detect_pipe(struct intel_encoder *intel_encoder,
-						   struct drm_connector *connector,
-						   struct drm_display_mode *mode,
-						   int *dpms_mode);
+
+struct intel_load_detect_pipe {
+	struct drm_framebuffer *release_fb;
+	bool load_detect_temp;
+	int dpms_mode;
+};
+extern bool intel_get_load_detect_pipe(struct intel_encoder *intel_encoder,
+				       struct drm_connector *connector,
+				       struct drm_display_mode *mode,
+				       struct intel_load_detect_pipe *old);
 extern void intel_release_load_detect_pipe(struct intel_encoder *intel_encoder,
 					   struct drm_connector *connector,
-					   int dpms_mode);
+					   struct intel_load_detect_pipe *old);
 
 extern struct drm_connector* intel_sdvo_find(struct drm_device *dev, int sdvoB);
 extern int intel_sdvo_supports_hotplug(struct drm_connector *connector);
@@ -339,4 +344,6 @@ extern int intel_overlay_attrs(struct drm_device *dev, void *data,
 
 extern void intel_fb_output_poll_changed(struct drm_device *dev);
 extern void intel_fb_restore_mode(struct drm_device *dev);
+
+extern void intel_init_clock_gating(struct drm_device *dev);
 #endif /* __INTEL_DRV_H__ */
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.c b/drivers/gpu/drm/i915/intel_ringbuffer.c
index e9e6f71418a4..95c4b1429935 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.c
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.c
@@ -236,7 +236,7 @@ init_pipe_control(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->agp_type = AGP_USER_CACHED_MEMORY;
+	obj->cache_level = I915_CACHE_LLC;
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret)
@@ -286,7 +286,7 @@ static int init_render_ring(struct intel_ring_buffer *ring)
 
 	if (INTEL_INFO(dev)->gen > 3) {
 		int mode = VS_TIMER_DISPATCH << 16 | VS_TIMER_DISPATCH;
-		if (IS_GEN6(dev))
+		if (IS_GEN6(dev) || IS_GEN7(dev))
 			mode |= MI_FLUSH_ENABLE << 16 | MI_FLUSH_ENABLE;
 		I915_WRITE(MI_MODE, mode);
 	}
@@ -551,10 +551,31 @@ render_ring_put_irq(struct intel_ring_buffer *ring)
 
 void intel_ring_setup_status_page(struct intel_ring_buffer *ring)
 {
+	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = ring->dev->dev_private;
-	u32 mmio = IS_GEN6(ring->dev) ?
-		RING_HWS_PGA_GEN6(ring->mmio_base) :
-		RING_HWS_PGA(ring->mmio_base);
+	u32 mmio = 0;
+
+	/* The ring status page addresses are no longer next to the rest of
+	 * the ring registers as of gen7.
+	 */
+	if (IS_GEN7(dev)) {
+		switch (ring->id) {
+		case RING_RENDER:
+			mmio = RENDER_HWS_PGA_GEN7;
+			break;
+		case RING_BLT:
+			mmio = BLT_HWS_PGA_GEN7;
+			break;
+		case RING_BSD:
+			mmio = BSD_HWS_PGA_GEN7;
+			break;
+		}
+	} else if (IS_GEN6(ring->dev)) {
+		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
+	} else {
+		mmio = RING_HWS_PGA(ring->mmio_base);
+	}
+
 	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
 	POSTING_READ(mmio);
 }
@@ -600,7 +621,7 @@ ring_add_request(struct intel_ring_buffer *ring,
 }
 
 static bool
-ring_get_irq(struct intel_ring_buffer *ring, u32 flag)
+gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 {
 	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
@@ -609,71 +630,67 @@ ring_get_irq(struct intel_ring_buffer *ring, u32 flag)
 	       return false;
 
 	spin_lock(&ring->irq_lock);
-	if (ring->irq_refcount++ == 0)
-		ironlake_enable_irq(dev_priv, flag);
+	if (ring->irq_refcount++ == 0) {
+		ring->irq_mask &= ~rflag;
+		I915_WRITE_IMR(ring, ring->irq_mask);
+		ironlake_enable_irq(dev_priv, gflag);
+	}
 	spin_unlock(&ring->irq_lock);
 
 	return true;
 }
 
 static void
-ring_put_irq(struct intel_ring_buffer *ring, u32 flag)
+gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
 {
 	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	spin_lock(&ring->irq_lock);
-	if (--ring->irq_refcount == 0)
-		ironlake_disable_irq(dev_priv, flag);
+	if (--ring->irq_refcount == 0) {
+		ring->irq_mask |= rflag;
+		I915_WRITE_IMR(ring, ring->irq_mask);
+		ironlake_disable_irq(dev_priv, gflag);
+	}
 	spin_unlock(&ring->irq_lock);
 }
 
 static bool
-gen6_ring_get_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
+bsd_ring_get_irq(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	if (!dev->irq_enabled)
-	       return false;
+		return false;
 
 	spin_lock(&ring->irq_lock);
 	if (ring->irq_refcount++ == 0) {
-		ring->irq_mask &= ~rflag;
-		I915_WRITE_IMR(ring, ring->irq_mask);
-		ironlake_enable_irq(dev_priv, gflag);
+		if (IS_G4X(dev))
+			i915_enable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
+		else
+			ironlake_enable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
 	}
 	spin_unlock(&ring->irq_lock);
 
 	return true;
 }
-
 static void
-gen6_ring_put_irq(struct intel_ring_buffer *ring, u32 gflag, u32 rflag)
+bsd_ring_put_irq(struct intel_ring_buffer *ring)
 {
 	struct drm_device *dev = ring->dev;
 	drm_i915_private_t *dev_priv = dev->dev_private;
 
 	spin_lock(&ring->irq_lock);
 	if (--ring->irq_refcount == 0) {
-		ring->irq_mask |= rflag;
-		I915_WRITE_IMR(ring, ring->irq_mask);
-		ironlake_disable_irq(dev_priv, gflag);
+		if (IS_G4X(dev))
+			i915_disable_irq(dev_priv, I915_BSD_USER_INTERRUPT);
+		else
+			ironlake_disable_irq(dev_priv, GT_BSD_USER_INTERRUPT);
 	}
 	spin_unlock(&ring->irq_lock);
 }
 
-static bool
-bsd_ring_get_irq(struct intel_ring_buffer *ring)
-{
-	return ring_get_irq(ring, GT_BSD_USER_INTERRUPT);
-}
-static void
-bsd_ring_put_irq(struct intel_ring_buffer *ring)
-{
-	ring_put_irq(ring, GT_BSD_USER_INTERRUPT);
-}
-
 static int
 ring_dispatch_execbuffer(struct intel_ring_buffer *ring, u32 offset, u32 length)
 {
@@ -759,7 +776,7 @@ static int init_status_page(struct intel_ring_buffer *ring)
 		ret = -ENOMEM;
 		goto err;
 	}
-	obj->agp_type = AGP_USER_CACHED_MEMORY;
+	obj->cache_level = I915_CACHE_LLC;
 
 	ret = i915_gem_object_pin(obj, 4096, true);
 	if (ret != 0) {
@@ -800,6 +817,7 @@ int intel_init_ring_buffer(struct drm_device *dev,
 	INIT_LIST_HEAD(&ring->request_list);
 	INIT_LIST_HEAD(&ring->gpu_write_list);
 
+	init_waitqueue_head(&ring->irq_queue);
 	spin_lock_init(&ring->irq_lock);
 	ring->irq_mask = ~0;
 
@@ -872,7 +890,7 @@ void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring)
 
 	/* Disable the ring buffer. The ring must be idle at this point */
 	dev_priv = ring->dev->dev_private;
-	ret = intel_wait_ring_buffer(ring, ring->size - 8);
+	ret = intel_wait_ring_idle(ring);
 	if (ret)
 		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
 			  ring->name, ret);
@@ -1333,7 +1351,7 @@ int intel_init_bsd_ring_buffer(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	struct intel_ring_buffer *ring = &dev_priv->ring[VCS];
 
-	if (IS_GEN6(dev))
+	if (IS_GEN6(dev) || IS_GEN7(dev))
 		*ring = gen6_bsd_ring;
 	else
 		*ring = bsd_ring;
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index f23cc5f037a6..c0e0ee63fbf4 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -14,27 +14,24 @@ struct  intel_hw_status_page {
 	struct		drm_i915_gem_object *obj;
 };
 
-#define I915_RING_READ(reg) i915_gt_read(dev_priv, reg)
-#define I915_RING_WRITE(reg, val) i915_gt_write(dev_priv, reg, val)
+#define I915_READ_TAIL(ring) I915_READ(RING_TAIL((ring)->mmio_base))
+#define I915_WRITE_TAIL(ring, val) I915_WRITE(RING_TAIL((ring)->mmio_base), val)
 
-#define I915_READ_TAIL(ring) I915_RING_READ(RING_TAIL((ring)->mmio_base))
-#define I915_WRITE_TAIL(ring, val) I915_RING_WRITE(RING_TAIL((ring)->mmio_base), val)
+#define I915_READ_START(ring) I915_READ(RING_START((ring)->mmio_base))
+#define I915_WRITE_START(ring, val) I915_WRITE(RING_START((ring)->mmio_base), val)
 
-#define I915_READ_START(ring) I915_RING_READ(RING_START((ring)->mmio_base))
-#define I915_WRITE_START(ring, val) I915_RING_WRITE(RING_START((ring)->mmio_base), val)
+#define I915_READ_HEAD(ring)  I915_READ(RING_HEAD((ring)->mmio_base))
+#define I915_WRITE_HEAD(ring, val) I915_WRITE(RING_HEAD((ring)->mmio_base), val)
 
-#define I915_READ_HEAD(ring)  I915_RING_READ(RING_HEAD((ring)->mmio_base))
-#define I915_WRITE_HEAD(ring, val) I915_RING_WRITE(RING_HEAD((ring)->mmio_base), val)
+#define I915_READ_CTL(ring) I915_READ(RING_CTL((ring)->mmio_base))
+#define I915_WRITE_CTL(ring, val) I915_WRITE(RING_CTL((ring)->mmio_base), val)
 
-#define I915_READ_CTL(ring) I915_RING_READ(RING_CTL((ring)->mmio_base))
-#define I915_WRITE_CTL(ring, val) I915_RING_WRITE(RING_CTL((ring)->mmio_base), val)
+#define I915_READ_IMR(ring) I915_READ(RING_IMR((ring)->mmio_base))
+#define I915_WRITE_IMR(ring, val) I915_WRITE(RING_IMR((ring)->mmio_base), val)
 
-#define I915_READ_IMR(ring) I915_RING_READ(RING_IMR((ring)->mmio_base))
-#define I915_WRITE_IMR(ring, val) I915_RING_WRITE(RING_IMR((ring)->mmio_base), val)
-
-#define I915_READ_NOPID(ring) I915_RING_READ(RING_NOPID((ring)->mmio_base))
-#define I915_READ_SYNC_0(ring) I915_RING_READ(RING_SYNC_0((ring)->mmio_base))
-#define I915_READ_SYNC_1(ring) I915_RING_READ(RING_SYNC_1((ring)->mmio_base))
+#define I915_READ_NOPID(ring) I915_READ(RING_NOPID((ring)->mmio_base))
+#define I915_READ_SYNC_0(ring) I915_READ(RING_SYNC_0((ring)->mmio_base))
+#define I915_READ_SYNC_1(ring) I915_READ(RING_SYNC_1((ring)->mmio_base))
 
 struct  intel_ring_buffer {
 	const char	*name;
@@ -164,7 +161,13 @@ intel_read_status_page(struct intel_ring_buffer *ring,
 #define I915_BREADCRUMB_INDEX		0x21
 
 void intel_cleanup_ring_buffer(struct intel_ring_buffer *ring);
+
 int __must_check intel_wait_ring_buffer(struct intel_ring_buffer *ring, int n);
+static inline int intel_wait_ring_idle(struct intel_ring_buffer *ring)
+{
+	return intel_wait_ring_buffer(ring, ring->space - 8);
+}
+
 int __must_check intel_ring_begin(struct intel_ring_buffer *ring, int n);
 
 static inline void intel_ring_emit(struct intel_ring_buffer *ring,
diff --git a/drivers/gpu/drm/i915/intel_sdvo.c b/drivers/gpu/drm/i915/intel_sdvo.c
index 4324f33212d6..754086f83941 100644
--- a/drivers/gpu/drm/i915/intel_sdvo.c
+++ b/drivers/gpu/drm/i915/intel_sdvo.c
@@ -2544,21 +2544,19 @@ bool intel_sdvo_init(struct drm_device *dev, int sdvo_reg)
 	if (!intel_sdvo)
 		return false;
 
+	intel_sdvo->sdvo_reg = sdvo_reg;
+	intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(dev, sdvo_reg) >> 1;
+	intel_sdvo_select_i2c_bus(dev_priv, intel_sdvo, sdvo_reg);
 	if (!intel_sdvo_init_ddc_proxy(intel_sdvo, dev)) {
 		kfree(intel_sdvo);
 		return false;
 	}
 
-	intel_sdvo->sdvo_reg = sdvo_reg;
-
+	/* encoder type will be decided later */
 	intel_encoder = &intel_sdvo->base;
 	intel_encoder->type = INTEL_OUTPUT_SDVO;
-	/* encoder type will be decided later */
 	drm_encoder_init(dev, &intel_encoder->base, &intel_sdvo_enc_funcs, 0);
 
-	intel_sdvo->slave_addr = intel_sdvo_get_slave_addr(dev, sdvo_reg) >> 1;
-	intel_sdvo_select_i2c_bus(dev_priv, intel_sdvo, sdvo_reg);
-
 	/* Read the regs to test if we can talk to the device */
 	for (i = 0; i < 0x40; i++) {
 		u8 byte;
diff --git a/drivers/gpu/drm/i915/intel_tv.c b/drivers/gpu/drm/i915/intel_tv.c
index 6b22c1dcc015..113e4e7264cd 100644
--- a/drivers/gpu/drm/i915/intel_tv.c
+++ b/drivers/gpu/drm/i915/intel_tv.c
@@ -1361,15 +1361,14 @@ intel_tv_detect(struct drm_connector *connector, bool force)
 	if (intel_tv->base.base.crtc && intel_tv->base.base.crtc->enabled) {
 		type = intel_tv_detect_type(intel_tv, connector);
 	} else if (force) {
-		struct drm_crtc *crtc;
-		int dpms_mode;
+		struct intel_load_detect_pipe tmp;
 
-		crtc = intel_get_load_detect_pipe(&intel_tv->base, connector,
-						  &mode, &dpms_mode);
-		if (crtc) {
+		if (intel_get_load_detect_pipe(&intel_tv->base, connector,
+					       &mode, &tmp)) {
 			type = intel_tv_detect_type(intel_tv, connector);
-			intel_release_load_detect_pipe(&intel_tv->base, connector,
-						       dpms_mode);
+			intel_release_load_detect_pipe(&intel_tv->base,
+						       connector,
+						       &tmp);
 		} else
 			return connector_status_unknown;
 	} else
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index de70959b9ed5..ca1639918f57 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -11,6 +11,8 @@ config DRM_NOUVEAU
 	select FRAMEBUFFER_CONSOLE if !EXPERT
 	select FB_BACKLIGHT if DRM_NOUVEAU_BACKLIGHT
 	select ACPI_VIDEO if ACPI && X86 && BACKLIGHT_CLASS_DEVICE && VIDEO_OUTPUT_CONTROL && INPUT
+	select ACPI_WMI if ACPI
+	select MXM_WMI if ACPI
 	help
 	  Choose this option for open-source nVidia support.
 
diff --git a/drivers/gpu/drm/nouveau/Makefile b/drivers/gpu/drm/nouveau/Makefile
index e12c97fd8db8..0583677e4581 100644
--- a/drivers/gpu/drm/nouveau/Makefile
+++ b/drivers/gpu/drm/nouveau/Makefile
@@ -20,6 +20,8 @@ nouveau-y := nouveau_drv.o nouveau_state.o nouveau_channel.o nouveau_mem.o \
              nv40_graph.o nv50_graph.o nvc0_graph.o \
              nv40_grctx.o nv50_grctx.o nvc0_grctx.o \
              nv84_crypt.o \
+             nva3_copy.o nvc0_copy.o \
+             nv40_mpeg.o nv50_mpeg.o \
              nv04_instmem.o nv50_instmem.o nvc0_instmem.o \
              nv50_evo.o nv50_crtc.o nv50_dac.o nv50_sor.o \
              nv50_cursor.o nv50_display.o \
diff --git a/drivers/gpu/drm/nouveau/nouveau_acpi.c b/drivers/gpu/drm/nouveau/nouveau_acpi.c
index a54238058dc5..f0d459bb46e4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_acpi.c
+++ b/drivers/gpu/drm/nouveau/nouveau_acpi.c
@@ -4,6 +4,8 @@
 #include <acpi/acpi_drivers.h>
 #include <acpi/acpi_bus.h>
 #include <acpi/video.h>
+#include <acpi/acpi.h>
+#include <linux/mxm-wmi.h>
 
 #include "drmP.h"
 #include "drm.h"
@@ -35,15 +37,71 @@
 
 static struct nouveau_dsm_priv {
 	bool dsm_detected;
+	bool optimus_detected;
 	acpi_handle dhandle;
 	acpi_handle rom_handle;
 } nouveau_dsm_priv;
 
+#define NOUVEAU_DSM_HAS_MUX 0x1
+#define NOUVEAU_DSM_HAS_OPT 0x2
+
 static const char nouveau_dsm_muid[] = {
 	0xA0, 0xA0, 0x95, 0x9D, 0x60, 0x00, 0x48, 0x4D,
 	0xB3, 0x4D, 0x7E, 0x5F, 0xEA, 0x12, 0x9F, 0xD4,
 };
 
+static const char nouveau_op_dsm_muid[] = {
+	0xF8, 0xD8, 0x86, 0xA4, 0xDA, 0x0B, 0x1B, 0x47,
+	0xA7, 0x2B, 0x60, 0x42, 0xA6, 0xB5, 0xBE, 0xE0,
+};
+
+static int nouveau_optimus_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
+{
+	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
+	struct acpi_object_list input;
+	union acpi_object params[4];
+	union acpi_object *obj;
+	int err;
+
+	input.count = 4;
+	input.pointer = params;
+	params[0].type = ACPI_TYPE_BUFFER;
+	params[0].buffer.length = sizeof(nouveau_op_dsm_muid);
+	params[0].buffer.pointer = (char *)nouveau_op_dsm_muid;
+	params[1].type = ACPI_TYPE_INTEGER;
+	params[1].integer.value = 0x00000100;
+	params[2].type = ACPI_TYPE_INTEGER;
+	params[2].integer.value = func;
+	params[3].type = ACPI_TYPE_BUFFER;
+	params[3].buffer.length = 0;
+
+	err = acpi_evaluate_object(handle, "_DSM", &input, &output);
+	if (err) {
+		printk(KERN_INFO "failed to evaluate _DSM: %d\n", err);
+		return err;
+	}
+
+	obj = (union acpi_object *)output.pointer;
+
+	if (obj->type == ACPI_TYPE_INTEGER)
+		if (obj->integer.value == 0x80000002) {
+			return -ENODEV;
+		}
+
+	if (obj->type == ACPI_TYPE_BUFFER) {
+		if (obj->buffer.length == 4 && result) {
+			*result = 0;
+			*result |= obj->buffer.pointer[0];
+			*result |= (obj->buffer.pointer[1] << 8);
+			*result |= (obj->buffer.pointer[2] << 16);
+			*result |= (obj->buffer.pointer[3] << 24);
+		}
+	}
+
+	kfree(output.pointer);
+	return 0;
+}
+
 static int nouveau_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 {
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
@@ -92,6 +150,8 @@ static int nouveau_dsm(acpi_handle handle, int func, int arg, uint32_t *result)
 
 static int nouveau_dsm_switch_mux(acpi_handle handle, int mux_id)
 {
+	mxm_wmi_call_mxmx(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0);
+	mxm_wmi_call_mxds(mux_id == NOUVEAU_DSM_LED_STAMINA ? MXM_MXDS_ADAPTER_IGD : MXM_MXDS_ADAPTER_0);
 	return nouveau_dsm(handle, NOUVEAU_DSM_LED, mux_id, NULL);
 }
 
@@ -148,11 +208,11 @@ static struct vga_switcheroo_handler nouveau_dsm_handler = {
 	.get_client_id = nouveau_dsm_get_client_id,
 };
 
-static bool nouveau_dsm_pci_probe(struct pci_dev *pdev)
+static int nouveau_dsm_pci_probe(struct pci_dev *pdev)
 {
 	acpi_handle dhandle, nvidia_handle;
 	acpi_status status;
-	int ret;
+	int ret, retval = 0;
 	uint32_t result;
 
 	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
@@ -166,11 +226,17 @@ static bool nouveau_dsm_pci_probe(struct pci_dev *pdev)
 
 	ret = nouveau_dsm(dhandle, NOUVEAU_DSM_SUPPORTED,
 			  NOUVEAU_DSM_SUPPORTED_FUNCTIONS, &result);
-	if (ret < 0)
-		return false;
+	if (ret == 0)
+		retval |= NOUVEAU_DSM_HAS_MUX;
 
-	nouveau_dsm_priv.dhandle = dhandle;
-	return true;
+	ret = nouveau_optimus_dsm(dhandle, 0, 0, &result);
+	if (ret == 0)
+		retval |= NOUVEAU_DSM_HAS_OPT;
+
+	if (retval)
+		nouveau_dsm_priv.dhandle = dhandle;
+
+	return retval;
 }
 
 static bool nouveau_dsm_detect(void)
@@ -179,22 +245,42 @@ static bool nouveau_dsm_detect(void)
 	struct acpi_buffer buffer = {sizeof(acpi_method_name), acpi_method_name};
 	struct pci_dev *pdev = NULL;
 	int has_dsm = 0;
+	int has_optimus;
 	int vga_count = 0;
+	bool guid_valid;
+	int retval;
+	bool ret = false;
+
+	/* lookup the MXM GUID */
+	guid_valid = mxm_wmi_supported();
 
+	if (guid_valid)
+		printk("MXM: GUID detected in BIOS\n");
+
+	/* now do DSM detection */
 	while ((pdev = pci_get_class(PCI_CLASS_DISPLAY_VGA << 8, pdev)) != NULL) {
 		vga_count++;
 
-		has_dsm |= (nouveau_dsm_pci_probe(pdev) == true);
+		retval = nouveau_dsm_pci_probe(pdev);
+		printk("ret val is %d\n", retval);
+		if (retval & NOUVEAU_DSM_HAS_MUX)
+			has_dsm |= 1;
+		if (retval & NOUVEAU_DSM_HAS_OPT)
+			has_optimus = 1;
 	}
 
-	if (vga_count == 2 && has_dsm) {
+	if (vga_count == 2 && has_dsm && guid_valid) {
 		acpi_get_name(nouveau_dsm_priv.dhandle, ACPI_FULL_PATHNAME, &buffer);
 		printk(KERN_INFO "VGA switcheroo: detected DSM switching method %s handle\n",
 		       acpi_method_name);
 		nouveau_dsm_priv.dsm_detected = true;
-		return true;
+		ret = true;
 	}
-	return false;
+
+	if (has_optimus == 1)
+		nouveau_dsm_priv.optimus_detected = true;
+
+	return ret;
 }
 
 void nouveau_register_dsm_handler(void)
@@ -247,7 +333,7 @@ bool nouveau_acpi_rom_supported(struct pci_dev *pdev)
 	acpi_status status;
 	acpi_handle dhandle, rom_handle;
 
-	if (!nouveau_dsm_priv.dsm_detected)
+	if (!nouveau_dsm_priv.dsm_detected && !nouveau_dsm_priv.optimus_detected)
 		return false;
 
 	dhandle = DEVICE_ACPI_HANDLE(&pdev->dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index 90aef64b76f2..729d5fd7c88d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -5049,11 +5049,7 @@ int get_pll_limits(struct drm_device *dev, uint32_t limit_match, struct pll_lims
 		pll_lim->vco1.max_n = record[11];
 		pll_lim->min_p = record[12];
 		pll_lim->max_p = record[13];
-		/* where did this go to?? */
-		if ((entry[0] & 0xf0) == 0x80)
-			pll_lim->refclk = 27000;
-		else
-			pll_lim->refclk = 100000;
+		pll_lim->refclk = ROM16(entry[9]) * 1000;
 	}
 
 	/*
@@ -6035,6 +6031,7 @@ parse_dcb_connector_table(struct nvbios *bios)
 		case DCB_CONNECTOR_DVI_I:
 		case DCB_CONNECTOR_DVI_D:
 		case DCB_CONNECTOR_LVDS:
+		case DCB_CONNECTOR_LVDS_SPWG:
 		case DCB_CONNECTOR_DP:
 		case DCB_CONNECTOR_eDP:
 		case DCB_CONNECTOR_HDMI_0:
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 8a54fa7edf5c..050c314119df 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -82,6 +82,7 @@ enum dcb_connector_type {
 	DCB_CONNECTOR_DVI_I = 0x30,
 	DCB_CONNECTOR_DVI_D = 0x31,
 	DCB_CONNECTOR_LVDS = 0x40,
+	DCB_CONNECTOR_LVDS_SPWG = 0x41,
 	DCB_CONNECTOR_DP = 0x46,
 	DCB_CONNECTOR_eDP = 0x47,
 	DCB_CONNECTOR_HDMI_0 = 0x60,
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 4cea35c57d15..a7583a8ddb01 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -268,9 +268,8 @@ nouveau_channel_put_unlocked(struct nouveau_channel **pchan)
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_crypt_engine *pcrypt = &dev_priv->engine.crypt;
 	unsigned long flags;
+	int i;
 
 	/* decrement the refcount, and we're done if there's still refs */
 	if (likely(!atomic_dec_and_test(&chan->users))) {
@@ -294,19 +293,12 @@ nouveau_channel_put_unlocked(struct nouveau_channel **pchan)
 	/* boot it off the hardware */
 	pfifo->reassign(dev, false);
 
-	/* We want to give pgraph a chance to idle and get rid of all
-	 * potential errors. We need to do this without the context
-	 * switch lock held, otherwise the irq handler is unable to
-	 * process them.
-	 */
-	if (pgraph->channel(dev) == chan)
-		nouveau_wait_for_idle(dev);
-
 	/* destroy the engine specific contexts */
 	pfifo->destroy_context(chan);
-	pgraph->destroy_context(chan);
-	if (pcrypt->destroy_context)
-		pcrypt->destroy_context(chan);
+	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
+		if (chan->engctx[i])
+			dev_priv->eng[i]->context_del(chan, i);
+	}
 
 	pfifo->reassign(dev, true);
 
@@ -414,7 +406,7 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
 	struct nouveau_channel *chan;
 	int ret;
 
-	if (dev_priv->engine.graph.accel_blocked)
+	if (!dev_priv->eng[NVOBJ_ENGINE_GR])
 		return -ENODEV;
 
 	if (init->fb_ctxdma_handle == ~0 || init->tt_ctxdma_handle == ~0)
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 7ae151109a66..1595d0b6e815 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -442,7 +442,7 @@ nouveau_connector_set_property(struct drm_connector *connector,
 		}
 
 		/* LVDS always needs gpu scaling */
-		if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS &&
+		if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS &&
 		    value == DRM_MODE_SCALE_NONE)
 			return -EINVAL;
 
@@ -650,6 +650,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 		ret = get_slave_funcs(encoder)->get_modes(encoder, connector);
 
 	if (nv_connector->dcb->type == DCB_CONNECTOR_LVDS ||
+	    nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG ||
 	    nv_connector->dcb->type == DCB_CONNECTOR_eDP)
 		ret += nouveau_connector_scaler_modes_add(connector);
 
@@ -810,6 +811,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 		type = DRM_MODE_CONNECTOR_HDMIA;
 		break;
 	case DCB_CONNECTOR_LVDS:
+	case DCB_CONNECTOR_LVDS_SPWG:
 		type = DRM_MODE_CONNECTOR_LVDS;
 		funcs = &nouveau_connector_funcs_lvds;
 		break;
@@ -838,7 +840,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
 
 	/* Check if we need dithering enabled */
-	if (dcb->type == DCB_CONNECTOR_LVDS) {
+	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS) {
 		bool dummy, is_24bit = false;
 
 		ret = nouveau_bios_parse_lvds_table(dev, 0, &dummy, &is_24bit);
@@ -883,7 +885,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 				nv_connector->use_dithering ?
 				DRM_MODE_DITHERING_ON : DRM_MODE_DITHERING_OFF);
 
-		if (dcb->type != DCB_CONNECTOR_LVDS) {
+		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS) {
 			if (dev_priv->card_type >= NV_50)
 				connector->polled = DRM_CONNECTOR_POLL_HPD;
 			else
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index 764c15d537ba..eb514ea29377 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -276,7 +276,7 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct nouveau_fence *fence;
 	int ret;
 
-	if (dev_priv->engine.graph.accel_blocked)
+	if (!dev_priv->channel)
 		return -ENODEV;
 
 	s = kzalloc(sizeof(*s), GFP_KERNEL);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 155ebdcbf06f..02c6f37d8bd7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -162,11 +162,10 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 	struct drm_device *dev = pci_get_drvdata(pdev);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct drm_crtc *crtc;
-	int ret, i;
+	int ret, i, e;
 
 	if (pm_state.event == PM_EVENT_PRETHAW)
 		return 0;
@@ -206,12 +205,17 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 			nouveau_channel_idle(chan);
 	}
 
-	pgraph->fifo_access(dev, false);
-	nouveau_wait_for_idle(dev);
 	pfifo->reassign(dev, false);
 	pfifo->disable(dev);
 	pfifo->unload_context(dev);
-	pgraph->unload_context(dev);
+
+	for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
+		if (dev_priv->eng[e]) {
+			ret = dev_priv->eng[e]->fini(dev, e);
+			if (ret)
+				goto out_abort;
+		}
+	}
 
 	ret = pinstmem->suspend(dev);
 	if (ret) {
@@ -242,9 +246,12 @@ nouveau_pci_suspend(struct pci_dev *pdev, pm_message_t pm_state)
 
 out_abort:
 	NV_INFO(dev, "Re-enabling acceleration..\n");
+	for (e = e + 1; e < NVOBJ_ENGINE_NR; e++) {
+		if (dev_priv->eng[e])
+			dev_priv->eng[e]->init(dev, e);
+	}
 	pfifo->enable(dev);
 	pfifo->reassign(dev, true);
-	pgraph->fifo_access(dev, true);
 	return ret;
 }
 
@@ -299,8 +306,10 @@ nouveau_pci_resume(struct pci_dev *pdev)
 	engine->mc.init(dev);
 	engine->timer.init(dev);
 	engine->fb.init(dev);
-	engine->graph.init(dev);
-	engine->crypt.init(dev);
+	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
+		if (dev_priv->eng[i])
+			dev_priv->eng[i]->init(dev, i);
+	}
 	engine->fifo.init(dev);
 
 	nouveau_irq_postinstall(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index a76514a209b3..9c56331941e2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -150,13 +150,12 @@ enum nouveau_flags {
 
 #define NVOBJ_ENGINE_SW		0
 #define NVOBJ_ENGINE_GR		1
-#define NVOBJ_ENGINE_PPP	2
-#define NVOBJ_ENGINE_COPY	3
-#define NVOBJ_ENGINE_VP		4
-#define NVOBJ_ENGINE_CRYPT      5
-#define NVOBJ_ENGINE_BSP	6
-#define NVOBJ_ENGINE_DISPLAY	0xcafe0001
-#define NVOBJ_ENGINE_INT	0xdeadbeef
+#define NVOBJ_ENGINE_CRYPT	2
+#define NVOBJ_ENGINE_COPY0	3
+#define NVOBJ_ENGINE_COPY1	4
+#define NVOBJ_ENGINE_MPEG	5
+#define NVOBJ_ENGINE_DISPLAY	15
+#define NVOBJ_ENGINE_NR		16
 
 #define NVOBJ_FLAG_DONT_MAP             (1 << 0)
 #define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
@@ -245,11 +244,8 @@ struct nouveau_channel {
 	struct nouveau_gpuobj *cache;
 	void *fifo_priv;
 
-	/* PGRAPH context */
-	/* XXX may be merge 2 pointers as private data ??? */
-	struct nouveau_gpuobj *ramin_grctx;
-	struct nouveau_gpuobj *crypt_ctx;
-	void *pgraph_ctx;
+	/* Execution engine contexts */
+	void *engctx[NVOBJ_ENGINE_NR];
 
 	/* NV50 VM */
 	struct nouveau_vm     *vm;
@@ -298,6 +294,18 @@ struct nouveau_channel {
 	} debugfs;
 };
 
+struct nouveau_exec_engine {
+	void (*destroy)(struct drm_device *, int engine);
+	int  (*init)(struct drm_device *, int engine);
+	int  (*fini)(struct drm_device *, int engine);
+	int  (*context_new)(struct nouveau_channel *, int engine);
+	void (*context_del)(struct nouveau_channel *, int engine);
+	int  (*object_new)(struct nouveau_channel *, int engine,
+			   u32 handle, u16 class);
+	void (*set_tile_region)(struct drm_device *dev, int i);
+	void (*tlb_flush)(struct drm_device *, int engine);
+};
+
 struct nouveau_instmem_engine {
 	void	*priv;
 
@@ -364,30 +372,6 @@ struct nouveau_fifo_engine {
 	void (*tlb_flush)(struct drm_device *dev);
 };
 
-struct nouveau_pgraph_engine {
-	bool accel_blocked;
-	bool registered;
-	int grctx_size;
-	void *priv;
-
-	/* NV2x/NV3x context table (0x400780) */
-	struct nouveau_gpuobj *ctx_table;
-
-	int  (*init)(struct drm_device *);
-	void (*takedown)(struct drm_device *);
-
-	void (*fifo_access)(struct drm_device *, bool);
-
-	struct nouveau_channel *(*channel)(struct drm_device *);
-	int  (*create_context)(struct nouveau_channel *);
-	void (*destroy_context)(struct nouveau_channel *);
-	int  (*load_context)(struct nouveau_channel *);
-	int  (*unload_context)(struct drm_device *);
-	void (*tlb_flush)(struct drm_device *dev);
-
-	void (*set_tile_region)(struct drm_device *dev, int i);
-};
-
 struct nouveau_display_engine {
 	void *priv;
 	int (*early_init)(struct drm_device *);
@@ -426,6 +410,19 @@ struct nouveau_pm_voltage {
 	int nr_level;
 };
 
+struct nouveau_pm_memtiming {
+	int id;
+	u32 reg_100220;
+	u32 reg_100224;
+	u32 reg_100228;
+	u32 reg_10022c;
+	u32 reg_100230;
+	u32 reg_100234;
+	u32 reg_100238;
+	u32 reg_10023c;
+	u32 reg_100240;
+};
+
 #define NOUVEAU_PM_MAX_LEVEL 8
 struct nouveau_pm_level {
 	struct device_attribute dev_attr;
@@ -436,11 +433,13 @@ struct nouveau_pm_level {
 	u32 memory;
 	u32 shader;
 	u32 unk05;
+	u32 unk0a;
 
 	u8 voltage;
 	u8 fanspeed;
 
 	u16 memscript;
+	struct nouveau_pm_memtiming *timing;
 };
 
 struct nouveau_pm_temp_sensor_constants {
@@ -457,17 +456,6 @@ struct nouveau_pm_threshold_temp {
 	s16 fan_boost;
 };
 
-struct nouveau_pm_memtiming {
-	u32 reg_100220;
-	u32 reg_100224;
-	u32 reg_100228;
-	u32 reg_10022c;
-	u32 reg_100230;
-	u32 reg_100234;
-	u32 reg_100238;
-	u32 reg_10023c;
-};
-
 struct nouveau_pm_memtimings {
 	bool supported;
 	struct nouveau_pm_memtiming *timing;
@@ -499,16 +487,6 @@ struct nouveau_pm_engine {
 	int (*temp_get)(struct drm_device *);
 };
 
-struct nouveau_crypt_engine {
-	bool registered;
-
-	int  (*init)(struct drm_device *);
-	void (*takedown)(struct drm_device *);
-	int  (*create_context)(struct nouveau_channel *);
-	void (*destroy_context)(struct nouveau_channel *);
-	void (*tlb_flush)(struct drm_device *dev);
-};
-
 struct nouveau_vram_engine {
 	int  (*init)(struct drm_device *);
 	int  (*get)(struct drm_device *, u64, u32 align, u32 size_nc,
@@ -523,12 +501,10 @@ struct nouveau_engine {
 	struct nouveau_mc_engine      mc;
 	struct nouveau_timer_engine   timer;
 	struct nouveau_fb_engine      fb;
-	struct nouveau_pgraph_engine  graph;
 	struct nouveau_fifo_engine    fifo;
 	struct nouveau_display_engine display;
 	struct nouveau_gpio_engine    gpio;
 	struct nouveau_pm_engine      pm;
-	struct nouveau_crypt_engine   crypt;
 	struct nouveau_vram_engine    vram;
 };
 
@@ -637,6 +613,7 @@ struct drm_nouveau_private {
 	enum nouveau_card_type card_type;
 	/* exact chipset, derived from NV_PMC_BOOT_0 */
 	int chipset;
+	int stepping;
 	int flags;
 
 	void __iomem *mmio;
@@ -647,6 +624,7 @@ struct drm_nouveau_private {
 	u32 ramin_base;
 	bool ramin_available;
 	struct drm_mm ramin_heap;
+	struct nouveau_exec_engine *eng[NVOBJ_ENGINE_NR];
 	struct list_head gpuobj_list;
 	struct list_head classes;
 
@@ -745,10 +723,6 @@ struct drm_nouveau_private {
 	uint32_t crtc_owner;
 	uint32_t dac_users[4];
 
-	struct nouveau_suspend_resume {
-		uint32_t *ramin_copy;
-	} susres;
-
 	struct backlight_device *backlight;
 
 	struct {
@@ -757,8 +731,6 @@ struct drm_nouveau_private {
 
 	struct nouveau_fbdev *nfbdev;
 	struct apertures_struct *apertures;
-
-	bool powered_down;
 };
 
 static inline struct drm_nouveau_private *
@@ -883,17 +855,27 @@ extern void nouveau_channel_ref(struct nouveau_channel *chan,
 extern void nouveau_channel_idle(struct nouveau_channel *chan);
 
 /* nouveau_object.c */
-#define NVOBJ_CLASS(d,c,e) do {                                                \
+#define NVOBJ_ENGINE_ADD(d, e, p) do {                                         \
+	struct drm_nouveau_private *dev_priv = (d)->dev_private;               \
+	dev_priv->eng[NVOBJ_ENGINE_##e] = (p);                                 \
+} while (0)
+
+#define NVOBJ_ENGINE_DEL(d, e) do {                                            \
+	struct drm_nouveau_private *dev_priv = (d)->dev_private;               \
+	dev_priv->eng[NVOBJ_ENGINE_##e] = NULL;                                \
+} while (0)
+
+#define NVOBJ_CLASS(d, c, e) do {                                              \
 	int ret = nouveau_gpuobj_class_new((d), (c), NVOBJ_ENGINE_##e);        \
 	if (ret)                                                               \
 		return ret;                                                    \
-} while(0)
+} while (0)
 
-#define NVOBJ_MTHD(d,c,m,e) do {                                               \
+#define NVOBJ_MTHD(d, c, m, e) do {                                            \
 	int ret = nouveau_gpuobj_mthd_new((d), (c), (m), (e));                 \
 	if (ret)                                                               \
 		return ret;                                                    \
-} while(0)
+} while (0)
 
 extern int  nouveau_gpuobj_early_init(struct drm_device *);
 extern int  nouveau_gpuobj_init(struct drm_device *);
@@ -903,7 +885,7 @@ extern void nouveau_gpuobj_resume(struct drm_device *dev);
 extern int  nouveau_gpuobj_class_new(struct drm_device *, u32 class, u32 eng);
 extern int  nouveau_gpuobj_mthd_new(struct drm_device *, u32 class, u32 mthd,
 				    int (*exec)(struct nouveau_channel *,
-					        u32 class, u32 mthd, u32 data));
+						u32 class, u32 mthd, u32 data));
 extern int  nouveau_gpuobj_mthd_call(struct nouveau_channel *, u32, u32, u32);
 extern int  nouveau_gpuobj_mthd_call2(struct drm_device *, int, u32, u32, u32);
 extern int nouveau_gpuobj_channel_init(struct nouveau_channel *,
@@ -1137,81 +1119,50 @@ extern int  nvc0_fifo_load_context(struct nouveau_channel *);
 extern int  nvc0_fifo_unload_context(struct drm_device *);
 
 /* nv04_graph.c */
-extern int  nv04_graph_init(struct drm_device *);
-extern void nv04_graph_takedown(struct drm_device *);
+extern int  nv04_graph_create(struct drm_device *);
 extern void nv04_graph_fifo_access(struct drm_device *, bool);
-extern struct nouveau_channel *nv04_graph_channel(struct drm_device *);
-extern int  nv04_graph_create_context(struct nouveau_channel *);
-extern void nv04_graph_destroy_context(struct nouveau_channel *);
-extern int  nv04_graph_load_context(struct nouveau_channel *);
-extern int  nv04_graph_unload_context(struct drm_device *);
+extern int  nv04_graph_object_new(struct nouveau_channel *, int, u32, u16);
 extern int  nv04_graph_mthd_page_flip(struct nouveau_channel *chan,
 				      u32 class, u32 mthd, u32 data);
 extern struct nouveau_bitfield nv04_graph_nsource[];
 
 /* nv10_graph.c */
-extern int  nv10_graph_init(struct drm_device *);
-extern void nv10_graph_takedown(struct drm_device *);
+extern int  nv10_graph_create(struct drm_device *);
 extern struct nouveau_channel *nv10_graph_channel(struct drm_device *);
-extern int  nv10_graph_create_context(struct nouveau_channel *);
-extern void nv10_graph_destroy_context(struct nouveau_channel *);
-extern int  nv10_graph_load_context(struct nouveau_channel *);
-extern int  nv10_graph_unload_context(struct drm_device *);
-extern void nv10_graph_set_tile_region(struct drm_device *dev, int i);
 extern struct nouveau_bitfield nv10_graph_intr[];
 extern struct nouveau_bitfield nv10_graph_nstatus[];
 
 /* nv20_graph.c */
-extern int  nv20_graph_create_context(struct nouveau_channel *);
-extern void nv20_graph_destroy_context(struct nouveau_channel *);
-extern int  nv20_graph_load_context(struct nouveau_channel *);
-extern int  nv20_graph_unload_context(struct drm_device *);
-extern int  nv20_graph_init(struct drm_device *);
-extern void nv20_graph_takedown(struct drm_device *);
-extern int  nv30_graph_init(struct drm_device *);
-extern void nv20_graph_set_tile_region(struct drm_device *dev, int i);
+extern int  nv20_graph_create(struct drm_device *);
 
 /* nv40_graph.c */
-extern int  nv40_graph_init(struct drm_device *);
-extern void nv40_graph_takedown(struct drm_device *);
-extern struct nouveau_channel *nv40_graph_channel(struct drm_device *);
-extern int  nv40_graph_create_context(struct nouveau_channel *);
-extern void nv40_graph_destroy_context(struct nouveau_channel *);
-extern int  nv40_graph_load_context(struct nouveau_channel *);
-extern int  nv40_graph_unload_context(struct drm_device *);
+extern int  nv40_graph_create(struct drm_device *);
 extern void nv40_grctx_init(struct nouveau_grctx *);
-extern void nv40_graph_set_tile_region(struct drm_device *dev, int i);
 
 /* nv50_graph.c */
-extern int  nv50_graph_init(struct drm_device *);
-extern void nv50_graph_takedown(struct drm_device *);
-extern void nv50_graph_fifo_access(struct drm_device *, bool);
-extern struct nouveau_channel *nv50_graph_channel(struct drm_device *);
-extern int  nv50_graph_create_context(struct nouveau_channel *);
-extern void nv50_graph_destroy_context(struct nouveau_channel *);
-extern int  nv50_graph_load_context(struct nouveau_channel *);
-extern int  nv50_graph_unload_context(struct drm_device *);
+extern int  nv50_graph_create(struct drm_device *);
 extern int  nv50_grctx_init(struct nouveau_grctx *);
-extern void nv50_graph_tlb_flush(struct drm_device *dev);
-extern void nv84_graph_tlb_flush(struct drm_device *dev);
 extern struct nouveau_enum nv50_data_error_names[];
+extern int  nv50_graph_isr_chid(struct drm_device *dev, u64 inst);
 
 /* nvc0_graph.c */
-extern int  nvc0_graph_init(struct drm_device *);
-extern void nvc0_graph_takedown(struct drm_device *);
-extern void nvc0_graph_fifo_access(struct drm_device *, bool);
-extern struct nouveau_channel *nvc0_graph_channel(struct drm_device *);
-extern int  nvc0_graph_create_context(struct nouveau_channel *);
-extern void nvc0_graph_destroy_context(struct nouveau_channel *);
-extern int  nvc0_graph_load_context(struct nouveau_channel *);
-extern int  nvc0_graph_unload_context(struct drm_device *);
+extern int  nvc0_graph_create(struct drm_device *);
+extern int  nvc0_graph_isr_chid(struct drm_device *dev, u64 inst);
 
 /* nv84_crypt.c */
-extern int  nv84_crypt_init(struct drm_device *dev);
-extern void nv84_crypt_fini(struct drm_device *dev);
-extern int  nv84_crypt_create_context(struct nouveau_channel *);
-extern void nv84_crypt_destroy_context(struct nouveau_channel *);
-extern void nv84_crypt_tlb_flush(struct drm_device *dev);
+extern int  nv84_crypt_create(struct drm_device *);
+
+/* nva3_copy.c */
+extern int  nva3_copy_create(struct drm_device *dev);
+
+/* nvc0_copy.c */
+extern int  nvc0_copy_create(struct drm_device *dev, int engine);
+
+/* nv40_mpeg.c */
+extern int  nv40_mpeg_create(struct drm_device *dev);
+
+/* nv50_mpeg.c */
+extern int  nv50_mpeg_create(struct drm_device *dev);
 
 /* nv04_instmem.c */
 extern int  nv04_instmem_init(struct drm_device *);
@@ -1402,8 +1353,8 @@ bool nv50_gpio_irq_enable(struct drm_device *, enum dcb_gpio_tag, bool on);
 /* nv50_calc. */
 int nv50_calc_pll(struct drm_device *, struct pll_lims *, int clk,
 		  int *N1, int *M1, int *N2, int *M2, int *P);
-int nv50_calc_pll2(struct drm_device *, struct pll_lims *,
-		   int clk, int *N, int *fN, int *M, int *P);
+int nva3_calc_pll(struct drm_device *, struct pll_lims *,
+		  int clk, int *N, int *fN, int *M, int *P);
 
 #ifndef ioread32_native
 #ifdef __BIG_ENDIAN
@@ -1579,6 +1530,13 @@ nv_match_device(struct drm_device *dev, unsigned device,
 		dev->pdev->subsystem_device == sub_device;
 }
 
+static inline void *
+nv_engine(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	return (void *)dev_priv->eng[engine];
+}
+
 /* returns 1 if device is one of the nv4x using the 0x4497 object class,
  * helpful to determine a number of other hardware features
  */
diff --git a/drivers/gpu/drm/nouveau/nouveau_grctx.h b/drivers/gpu/drm/nouveau/nouveau_grctx.h
index 4a8ad1307fa4..86c2e374e938 100644
--- a/drivers/gpu/drm/nouveau/nouveau_grctx.h
+++ b/drivers/gpu/drm/nouveau/nouveau_grctx.h
@@ -87,10 +87,10 @@ _cp_bra(struct nouveau_grctx *ctx, u32 mod, int flag, int state, int name)
 	cp_out(ctx, CP_BRA | (mod << 18) | ip | flag |
 		    (state ? 0 : CP_BRA_IF_CLEAR));
 }
-#define cp_bra(c,f,s,n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#define cp_bra(c, f, s, n) _cp_bra((c), 0, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
 #ifdef CP_BRA_MOD
-#define cp_cal(c,f,s,n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
-#define cp_ret(c,f,s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
+#define cp_cal(c, f, s, n) _cp_bra((c), 1, CP_FLAG_##f, CP_FLAG_##f##_##s, n)
+#define cp_ret(c, f, s) _cp_bra((c), 2, CP_FLAG_##f, CP_FLAG_##f##_##s, 0)
 #endif
 
 static inline void
@@ -98,14 +98,14 @@ _cp_wait(struct nouveau_grctx *ctx, int flag, int state)
 {
 	cp_out(ctx, CP_WAIT | flag | (state ? CP_WAIT_SET : 0));
 }
-#define cp_wait(c,f,s) _cp_wait((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+#define cp_wait(c, f, s) _cp_wait((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
 
 static inline void
 _cp_set(struct nouveau_grctx *ctx, int flag, int state)
 {
 	cp_out(ctx, CP_SET | flag | (state ? CP_SET_1 : 0));
 }
-#define cp_set(c,f,s) _cp_set((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
+#define cp_set(c, f, s) _cp_set((c), CP_FLAG_##f, CP_FLAG_##f##_##s)
 
 static inline void
 cp_pos(struct nouveau_grctx *ctx, int offset)
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index c3e953b08992..2960f583dc38 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -51,8 +51,7 @@ nv10_mem_update_tile_region(struct drm_device *dev,
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	int i = tile - dev_priv->tile.reg;
+	int i = tile - dev_priv->tile.reg, j;
 	unsigned long save;
 
 	nouveau_fence_unref(&tile->fence);
@@ -70,7 +69,10 @@ nv10_mem_update_tile_region(struct drm_device *dev,
 	nouveau_wait_for_idle(dev);
 
 	pfb->set_tile_region(dev, i);
-	pgraph->set_tile_region(dev, i);
+	for (j = 0; j < NVOBJ_ENGINE_NR; j++) {
+		if (dev_priv->eng[j] && dev_priv->eng[j]->set_tile_region)
+			dev_priv->eng[j]->set_tile_region(dev, i);
+	}
 
 	pfifo->cache_pull(dev, true);
 	pfifo->reassign(dev, true);
@@ -595,10 +597,10 @@ nouveau_mem_timing_init(struct drm_device *dev)
 	if (!memtimings->timing)
 		return;
 
-	/* Get "some number" from the timing reg for NV_40
+	/* Get "some number" from the timing reg for NV_40 and NV_50
 	 * Used in calculations later */
-	if(dev_priv->card_type == NV_40) {
-		magic_number = (nv_rd32(dev,0x100228) & 0x0f000000) >> 24;
+	if (dev_priv->card_type >= NV_40 && dev_priv->chipset < 0x98) {
+		magic_number = (nv_rd32(dev, 0x100228) & 0x0f000000) >> 24;
 	}
 
 	entry = mem + mem[1];
@@ -641,51 +643,68 @@ nouveau_mem_timing_init(struct drm_device *dev)
 		/* XXX: I don't trust the -1's and +1's... they must come
 		 *      from somewhere! */
 		timing->reg_100224 = (tUNK_0 + tUNK_19 + 1 + magic_number) << 24 |
-				      tUNK_18 << 16 |
+				      max(tUNK_18, (u8) 1) << 16 |
 				      (tUNK_1 + tUNK_19 + 1 + magic_number) << 8;
-		if(dev_priv->chipset == 0xa8) {
+		if (dev_priv->chipset == 0xa8) {
 			timing->reg_100224 |= (tUNK_2 - 1);
 		} else {
 			timing->reg_100224 |= (tUNK_2 + 2 - magic_number);
 		}
 
 		timing->reg_100228 = (tUNK_12 << 16 | tUNK_11 << 8 | tUNK_10);
-		if(dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa) {
+		if (dev_priv->chipset >= 0xa3 && dev_priv->chipset < 0xaa)
 			timing->reg_100228 |= (tUNK_19 - 1) << 24;
-		}
+		else
+			timing->reg_100228 |= magic_number << 24;
 
-		if(dev_priv->card_type == NV_40) {
+		if (dev_priv->card_type == NV_40) {
 			/* NV40: don't know what the rest of the regs are..
 			 * And don't need to know either */
-			timing->reg_100228 |= 0x20200000 | magic_number << 24;
-		} else if(dev_priv->card_type >= NV_50) {
-			/* XXX: reg_10022c */
-			timing->reg_10022c = tUNK_2 - 1;
+			timing->reg_100228 |= 0x20200000;
+		} else if (dev_priv->card_type >= NV_50) {
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
+				timing->reg_10022c = (0x14 + tUNK_2) << 24 |
+						     0x16 << 16 |
+						     (tUNK_2 - 1) << 8 |
+						     (tUNK_2 - 1);
+			} else {
+				/* XXX: reg_10022c for recentish cards */
+				timing->reg_10022c = tUNK_2 - 1;
+			}
 
 			timing->reg_100230 = (tUNK_20 << 24 | tUNK_21 << 16 |
 						  tUNK_13 << 8  | tUNK_13);
 
 			timing->reg_100234 = (tRAS << 24 | tRC);
-			timing->reg_100234 += max(tUNK_10,tUNK_11) << 16;
+			timing->reg_100234 += max(tUNK_10, tUNK_11) << 16;
 
-			if(dev_priv->chipset < 0xa3) {
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
 				timing->reg_100234 |= (tUNK_2 + 2) << 8;
 			} else {
 				/* XXX: +6? */
 				timing->reg_100234 |= (tUNK_19 + 6) << 8;
 			}
 
-			/* XXX; reg_100238, reg_10023c
-			 * reg_100238: 0x00??????
-			 * reg_10023c: 0x!!??0202 for NV50+ cards (empirical evidence) */
+			/* XXX; reg_100238
+			 * reg_100238: 0x00?????? */
 			timing->reg_10023c = 0x202;
-			if(dev_priv->chipset < 0xa3) {
+			if (dev_priv->chipset < 0x98 ||
+			    (dev_priv->chipset == 0x98 &&
+			     dev_priv->stepping <= 0xa1)) {
 				timing->reg_10023c |= 0x4000000 | (tUNK_2 - 1) << 16;
 			} else {
-				/* currently unknown
+				/* XXX: reg_10023c
+				 * currently unknown
 				 * 10023c seen as 06xxxxxx, 0bxxxxxx or 0fxxxxxx */
 			}
+
+			/* XXX: reg_100240? */
 		}
+		timing->id = i;
 
 		NV_DEBUG(dev, "Entry %d: 220: %08x %08x %08x %08x\n", i,
 			 timing->reg_100220, timing->reg_100224,
@@ -693,10 +712,11 @@ nouveau_mem_timing_init(struct drm_device *dev)
 		NV_DEBUG(dev, "         230: %08x %08x %08x %08x\n",
 			 timing->reg_100230, timing->reg_100234,
 			 timing->reg_100238, timing->reg_10023c);
+		NV_DEBUG(dev, "         240: %08x\n", timing->reg_100240);
 	}
 
 	memtimings->nr_timing = entries;
-	memtimings->supported = true;
+	memtimings->supported = (dev_priv->chipset <= 0x98);
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 67a16e01ffa6..8f97016f5b26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -361,20 +361,6 @@ nouveau_gpuobj_new_fake(struct drm_device *dev, u32 pinst, u64 vinst,
 	return 0;
 }
 
-
-static uint32_t
-nouveau_gpuobj_class_instmem_size(struct drm_device *dev, int class)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	/*XXX: dodgy hack for now */
-	if (dev_priv->card_type >= NV_50)
-		return 24;
-	if (dev_priv->card_type >= NV_40)
-		return 32;
-	return 16;
-}
-
 /*
    DMA objects are used to reference a piece of memory in the
    framebuffer, PCI or AGP address space. Each object is 16 bytes big
@@ -606,11 +592,11 @@ nouveau_gpuobj_dma_new(struct nouveau_channel *chan, int class, u64 base,
    set to 0?
 */
 static int
-nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
-		      struct nouveau_gpuobj **gpuobj_ret)
+nouveau_gpuobj_sw_new(struct nouveau_channel *chan, u32 handle, u16 class)
 {
 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct nouveau_gpuobj *gpuobj;
+	int ret;
 
 	gpuobj = kzalloc(sizeof(*gpuobj), GFP_KERNEL);
 	if (!gpuobj)
@@ -624,8 +610,10 @@ nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
 	spin_lock(&dev_priv->ramin_lock);
 	list_add_tail(&gpuobj->list, &dev_priv->gpuobj_list);
 	spin_unlock(&dev_priv->ramin_lock);
-	*gpuobj_ret = gpuobj;
-	return 0;
+
+	ret = nouveau_ramht_insert(chan, handle, gpuobj);
+	nouveau_gpuobj_ref(NULL, &gpuobj);
+	return ret;
 }
 
 int
@@ -634,101 +622,30 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, u32 handle, int class)
 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
 	struct drm_device *dev = chan->dev;
 	struct nouveau_gpuobj_class *oc;
-	struct nouveau_gpuobj *gpuobj;
 	int ret;
 
 	NV_DEBUG(dev, "ch%d class=0x%04x\n", chan->id, class);
 
 	list_for_each_entry(oc, &dev_priv->classes, head) {
-		if (oc->id == class)
-			goto found;
-	}
-
-	NV_ERROR(dev, "illegal object class: 0x%x\n", class);
-	return -EINVAL;
+		struct nouveau_exec_engine *eng = dev_priv->eng[oc->engine];
 
-found:
-	switch (oc->engine) {
-	case NVOBJ_ENGINE_SW:
-		if (dev_priv->card_type < NV_C0) {
-			ret = nouveau_gpuobj_sw_new(chan, class, &gpuobj);
-			if (ret)
-				return ret;
-			goto insert;
-		}
-		break;
-	case NVOBJ_ENGINE_GR:
-		if ((dev_priv->card_type >= NV_20 && !chan->ramin_grctx) ||
-		    (dev_priv->card_type  < NV_20 && !chan->pgraph_ctx)) {
-			struct nouveau_pgraph_engine *pgraph =
-				&dev_priv->engine.graph;
+		if (oc->id != class)
+			continue;
 
-			ret = pgraph->create_context(chan);
-			if (ret)
-				return ret;
-		}
-		break;
-	case NVOBJ_ENGINE_CRYPT:
-		if (!chan->crypt_ctx) {
-			struct nouveau_crypt_engine *pcrypt =
-				&dev_priv->engine.crypt;
+		if (oc->engine == NVOBJ_ENGINE_SW)
+			return nouveau_gpuobj_sw_new(chan, handle, class);
 
-			ret = pcrypt->create_context(chan);
+		if (!chan->engctx[oc->engine]) {
+			ret = eng->context_new(chan, oc->engine);
 			if (ret)
 				return ret;
 		}
-		break;
-	}
-
-	/* we're done if this is fermi */
-	if (dev_priv->card_type >= NV_C0)
-		return 0;
-
-	ret = nouveau_gpuobj_new(dev, chan,
-				 nouveau_gpuobj_class_instmem_size(dev, class),
-				 16,
-				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
-				 &gpuobj);
-	if (ret) {
-		NV_ERROR(dev, "error creating gpuobj: %d\n", ret);
-		return ret;
-	}
 
-	if (dev_priv->card_type >= NV_50) {
-		nv_wo32(gpuobj,  0, class);
-		nv_wo32(gpuobj, 20, 0x00010000);
-	} else {
-		switch (class) {
-		case NV_CLASS_NULL:
-			nv_wo32(gpuobj, 0, 0x00001030);
-			nv_wo32(gpuobj, 4, 0xFFFFFFFF);
-			break;
-		default:
-			if (dev_priv->card_type >= NV_40) {
-				nv_wo32(gpuobj, 0, class);
-#ifdef __BIG_ENDIAN
-				nv_wo32(gpuobj, 8, 0x01000000);
-#endif
-			} else {
-#ifdef __BIG_ENDIAN
-				nv_wo32(gpuobj, 0, class | 0x00080000);
-#else
-				nv_wo32(gpuobj, 0, class);
-#endif
-			}
-		}
+		return eng->object_new(chan, oc->engine, handle, class);
 	}
-	dev_priv->engine.instmem.flush(dev);
-
-	gpuobj->engine = oc->engine;
-	gpuobj->class  = oc->id;
 
-insert:
-	ret = nouveau_ramht_insert(chan, handle, gpuobj);
-	if (ret)
-		NV_ERROR(dev, "error adding gpuobj to RAMHT: %d\n", ret);
-	nouveau_gpuobj_ref(NULL, &gpuobj);
-	return ret;
+	NV_ERROR(dev, "illegal object class: 0x%x\n", class);
+	return -EINVAL;
 }
 
 static int
@@ -746,9 +663,6 @@ nouveau_gpuobj_channel_init_pramin(struct nouveau_channel *chan)
 	size = 0x2000;
 	base = 0;
 
-	/* PGRAPH context */
-	size += dev_priv->engine.graph.grctx_size;
-
 	if (dev_priv->card_type == NV_50) {
 		/* Various fixed table thingos */
 		size += 0x1400; /* mostly unknown stuff */
diff --git a/drivers/gpu/drm/nouveau/nouveau_perf.c b/drivers/gpu/drm/nouveau/nouveau_perf.c
index 670e3cb697ec..922fb6b664ed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_perf.c
+++ b/drivers/gpu/drm/nouveau/nouveau_perf.c
@@ -72,6 +72,68 @@ legacy_perf_init(struct drm_device *dev)
 	pm->nr_perflvl = 1;
 }
 
+static struct nouveau_pm_memtiming *
+nouveau_perf_timing(struct drm_device *dev, struct bit_entry *P,
+		    u16 memclk, u8 *entry, u8 recordlen, u8 entries)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_pm_engine *pm = &dev_priv->engine.pm;
+	struct nvbios *bios = &dev_priv->vbios;
+	u8 ramcfg;
+	int i;
+
+	/* perf v2 has a separate "timing map" table, we have to match
+	 * the target memory clock to a specific entry, *then* use
+	 * ramcfg to select the correct subentry
+	 */
+	if (P->version == 2) {
+		u8 *tmap = ROMPTR(bios, P->data[4]);
+		if (!tmap) {
+			NV_DEBUG(dev, "no timing map pointer\n");
+			return NULL;
+		}
+
+		if (tmap[0] != 0x10) {
+			NV_WARN(dev, "timing map 0x%02x unknown\n", tmap[0]);
+			return NULL;
+		}
+
+		entry = tmap + tmap[1];
+		recordlen = tmap[2] + (tmap[4] * tmap[3]);
+		for (i = 0; i < tmap[5]; i++, entry += recordlen) {
+			if (memclk >= ROM16(entry[0]) &&
+			    memclk <= ROM16(entry[2]))
+				break;
+		}
+
+		if (i == tmap[5]) {
+			NV_WARN(dev, "no match in timing map table\n");
+			return NULL;
+		}
+
+		entry += tmap[2];
+		recordlen = tmap[3];
+		entries   = tmap[4];
+	}
+
+	ramcfg = (nv_rd32(dev, NV_PEXTDEV_BOOT_0) & 0x0000003c) >> 2;
+	if (bios->ram_restrict_tbl_ptr)
+		ramcfg = bios->data[bios->ram_restrict_tbl_ptr + ramcfg];
+
+	if (ramcfg >= entries) {
+		NV_WARN(dev, "ramcfg strap out of bounds!\n");
+		return NULL;
+	}
+
+	entry += ramcfg * recordlen;
+	if (entry[1] >= pm->memtimings.nr_timing) {
+		NV_WARN(dev, "timingset %d does not exist\n", entry[1]);
+		return NULL;
+	}
+
+	return &pm->memtimings.timing[entry[1]];
+}
+
 void
 nouveau_perf_init(struct drm_device *dev)
 {
@@ -124,6 +186,8 @@ nouveau_perf_init(struct drm_device *dev)
 	for (i = 0; i < entries; i++) {
 		struct nouveau_pm_level *perflvl = &pm->perflvl[pm->nr_perflvl];
 
+		perflvl->timing = NULL;
+
 		if (entry[0] == 0xff) {
 			entry += recordlen;
 			continue;
@@ -174,9 +238,21 @@ nouveau_perf_init(struct drm_device *dev)
 #define subent(n) entry[perf[2] + ((n) * perf[3])]
 			perflvl->fanspeed = 0; /*XXX*/
 			perflvl->voltage = entry[2];
-			perflvl->core = (ROM16(subent(0)) & 0xfff) * 1000;
-			perflvl->shader = (ROM16(subent(1)) & 0xfff) * 1000;
-			perflvl->memory = (ROM16(subent(2)) & 0xfff) * 1000;
+			if (dev_priv->card_type == NV_50) {
+				perflvl->core = ROM16(subent(0)) & 0xfff;
+				perflvl->shader = ROM16(subent(1)) & 0xfff;
+				perflvl->memory = ROM16(subent(2)) & 0xfff;
+			} else {
+				perflvl->shader = ROM16(subent(3)) & 0xfff;
+				perflvl->core   = perflvl->shader / 2;
+				perflvl->unk0a  = ROM16(subent(4)) & 0xfff;
+				perflvl->memory = ROM16(subent(5)) & 0xfff;
+			}
+
+			perflvl->core *= 1000;
+			perflvl->shader *= 1000;
+			perflvl->memory *= 1000;
+			perflvl->unk0a *= 1000;
 			break;
 		}
 
@@ -190,6 +266,16 @@ nouveau_perf_init(struct drm_device *dev)
 			}
 		}
 
+		/* get the corresponding memory timings */
+		if (version > 0x15) {
+			/* last 3 args are for < 0x40, ignored for >= 0x40 */
+			perflvl->timing =
+				nouveau_perf_timing(dev, &P,
+						    perflvl->memory / 1000,
+						    entry + perf[3],
+						    perf[5], perf[4]);
+		}
+
 		snprintf(perflvl->name, sizeof(perflvl->name),
 			 "performance_level_%d", i);
 		perflvl->id = i;
diff --git a/drivers/gpu/drm/nouveau/nouveau_pm.c b/drivers/gpu/drm/nouveau/nouveau_pm.c
index 4399e2f34db4..da8d994d5e8a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_pm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_pm.c
@@ -156,7 +156,7 @@ nouveau_pm_perflvl_get(struct drm_device *dev, struct nouveau_pm_level *perflvl)
 static void
 nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 {
-	char c[16], s[16], v[16], f[16];
+	char c[16], s[16], v[16], f[16], t[16];
 
 	c[0] = '\0';
 	if (perflvl->core)
@@ -174,8 +174,12 @@ nouveau_pm_perflvl_info(struct nouveau_pm_level *perflvl, char *ptr, int len)
 	if (perflvl->fanspeed)
 		snprintf(f, sizeof(f), " fanspeed %d%%", perflvl->fanspeed);
 
-	snprintf(ptr, len, "memory %dMHz%s%s%s%s\n", perflvl->memory / 1000,
-		 c, s, v, f);
+	t[0] = '\0';
+	if (perflvl->timing)
+		snprintf(t, sizeof(t), " timing %d", perflvl->timing->id);
+
+	snprintf(ptr, len, "memory %dMHz%s%s%s%s%s\n", perflvl->memory / 1000,
+		 c, s, v, f, t);
 }
 
 static ssize_t
@@ -449,7 +453,7 @@ nouveau_hwmon_fini(struct drm_device *dev)
 #endif
 }
 
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI) && defined(CONFIG_POWER_SUPPLY)
 static int
 nouveau_pm_acpi_event(struct notifier_block *nb, unsigned long val, void *data)
 {
@@ -476,10 +480,10 @@ nouveau_pm_init(struct drm_device *dev)
 	char info[256];
 	int ret, i;
 
+	nouveau_mem_timing_init(dev);
 	nouveau_volt_init(dev);
 	nouveau_perf_init(dev);
 	nouveau_temp_init(dev);
-	nouveau_mem_timing_init(dev);
 
 	NV_INFO(dev, "%d available performance level(s)\n", pm->nr_perflvl);
 	for (i = 0; i < pm->nr_perflvl; i++) {
@@ -490,6 +494,7 @@ nouveau_pm_init(struct drm_device *dev)
 	/* determine current ("boot") performance level */
 	ret = nouveau_pm_perflvl_get(dev, &pm->boot);
 	if (ret == 0) {
+		strncpy(pm->boot.name, "boot", 4);
 		pm->cur = &pm->boot;
 
 		nouveau_pm_perflvl_info(&pm->boot, info, sizeof(info));
@@ -507,7 +512,7 @@ nouveau_pm_init(struct drm_device *dev)
 
 	nouveau_sysfs_init(dev);
 	nouveau_hwmon_init(dev);
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI) && defined(CONFIG_POWER_SUPPLY)
 	pm->acpi_nb.notifier_call = nouveau_pm_acpi_event;
 	register_acpi_notifier(&pm->acpi_nb);
 #endif
@@ -524,12 +529,12 @@ nouveau_pm_fini(struct drm_device *dev)
 	if (pm->cur != &pm->boot)
 		nouveau_pm_perflvl_set(dev, &pm->boot);
 
-	nouveau_mem_timing_fini(dev);
 	nouveau_temp_fini(dev);
 	nouveau_perf_fini(dev);
 	nouveau_volt_fini(dev);
+	nouveau_mem_timing_fini(dev);
 
-#ifdef CONFIG_ACPI
+#if defined(CONFIG_ACPI) && defined(CONFIG_POWER_SUPPLY)
 	unregister_acpi_notifier(&pm->acpi_nb);
 #endif
 	nouveau_hwmon_fini(dev);
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index 04e8fb795269..f18cdfc3400f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -639,9 +639,9 @@
 #    define NV50_PCONNECTOR_I2C_PORT_4                      0x0000e240
 #    define NV50_PCONNECTOR_I2C_PORT_5                      0x0000e258
 
-#define NV50_AUXCH_DATA_OUT(i,n)             ((n) * 4 + (i) * 0x50 + 0x0000e4c0)
+#define NV50_AUXCH_DATA_OUT(i, n)            ((n) * 4 + (i) * 0x50 + 0x0000e4c0)
 #define NV50_AUXCH_DATA_OUT__SIZE                                             4
-#define NV50_AUXCH_DATA_IN(i,n)              ((n) * 4 + (i) * 0x50 + 0x0000e4d0)
+#define NV50_AUXCH_DATA_IN(i, n)             ((n) * 4 + (i) * 0x50 + 0x0000e4d0)
 #define NV50_AUXCH_DATA_IN__SIZE                                              4
 #define NV50_AUXCH_ADDR(i)                             ((i) * 0x50 + 0x0000e4e0)
 #define NV50_AUXCH_CTRL(i)                             ((i) * 0x50 + 0x0000e4e4)
@@ -829,7 +829,7 @@
 #define NV50_PDISPLAY_SOR_BACKLIGHT                                  0x0061c084
 #define NV50_PDISPLAY_SOR_BACKLIGHT_ENABLE                           0x80000000
 #define NV50_PDISPLAY_SOR_BACKLIGHT_LEVEL                            0x00000fff
-#define NV50_SOR_DP_CTRL(i,l)            (0x0061c10c + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_CTRL(i, l)           (0x0061c10c + (i) * 0x800 + (l) * 0x80)
 #define NV50_SOR_DP_CTRL_ENABLED                                     0x00000001
 #define NV50_SOR_DP_CTRL_ENHANCED_FRAME_ENABLED                      0x00004000
 #define NV50_SOR_DP_CTRL_LANE_MASK                                   0x001f0000
@@ -841,10 +841,10 @@
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_DISABLED                   0x00000000
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_1                          0x01000000
 #define NV50_SOR_DP_CTRL_TRAINING_PATTERN_2                          0x02000000
-#define NV50_SOR_DP_UNK118(i,l)          (0x0061c118 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK120(i,l)          (0x0061c120 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK128(i,l)          (0x0061c128 + (i) * 0x800 + (l) * 0x80)
-#define NV50_SOR_DP_UNK130(i,l)          (0x0061c130 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK118(i, l)         (0x0061c118 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK120(i, l)         (0x0061c120 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK128(i, l)         (0x0061c128 + (i) * 0x800 + (l) * 0x80)
+#define NV50_SOR_DP_UNK130(i, l)         (0x0061c130 + (i) * 0x800 + (l) * 0x80)
 
 #define NV50_PDISPLAY_USER(i)                        ((i) * 0x1000 + 0x00640000)
 #define NV50_PDISPLAY_USER_PUT(i)                    ((i) * 0x1000 + 0x00640000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index 915fbce89595..38ea662568c1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -65,14 +65,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv04_fb_init;
 		engine->fb.takedown		= nv04_fb_takedown;
-		engine->graph.init		= nv04_graph_init;
-		engine->graph.takedown		= nv04_graph_takedown;
-		engine->graph.fifo_access	= nv04_graph_fifo_access;
-		engine->graph.channel		= nv04_graph_channel;
-		engine->graph.create_context	= nv04_graph_create_context;
-		engine->graph.destroy_context	= nv04_graph_destroy_context;
-		engine->graph.load_context	= nv04_graph_load_context;
-		engine->graph.unload_context	= nv04_graph_unload_context;
 		engine->fifo.channels		= 16;
 		engine->fifo.init		= nv04_fifo_init;
 		engine->fifo.takedown		= nv04_fifo_fini;
@@ -98,8 +90,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -123,15 +113,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv10_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv10_fb_free_tile_region;
-		engine->graph.init		= nv10_graph_init;
-		engine->graph.takedown		= nv10_graph_takedown;
-		engine->graph.channel		= nv10_graph_channel;
-		engine->graph.create_context	= nv10_graph_create_context;
-		engine->graph.destroy_context	= nv10_graph_destroy_context;
-		engine->graph.fifo_access	= nv04_graph_fifo_access;
-		engine->graph.load_context	= nv10_graph_load_context;
-		engine->graph.unload_context	= nv10_graph_unload_context;
-		engine->graph.set_tile_region	= nv10_graph_set_tile_region;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nv04_fifo_fini;
@@ -157,8 +138,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -182,15 +161,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv10_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv10_fb_free_tile_region;
-		engine->graph.init		= nv20_graph_init;
-		engine->graph.takedown		= nv20_graph_takedown;
-		engine->graph.channel		= nv10_graph_channel;
-		engine->graph.create_context	= nv20_graph_create_context;
-		engine->graph.destroy_context	= nv20_graph_destroy_context;
-		engine->graph.fifo_access	= nv04_graph_fifo_access;
-		engine->graph.load_context	= nv20_graph_load_context;
-		engine->graph.unload_context	= nv20_graph_unload_context;
-		engine->graph.set_tile_region	= nv20_graph_set_tile_region;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nv04_fifo_fini;
@@ -216,8 +186,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_get		= nv04_pm_clock_get;
 		engine->pm.clock_pre		= nv04_pm_clock_pre;
 		engine->pm.clock_set		= nv04_pm_clock_set;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -241,15 +209,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv10_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->graph.init		= nv30_graph_init;
-		engine->graph.takedown		= nv20_graph_takedown;
-		engine->graph.fifo_access	= nv04_graph_fifo_access;
-		engine->graph.channel		= nv10_graph_channel;
-		engine->graph.create_context	= nv20_graph_create_context;
-		engine->graph.destroy_context	= nv20_graph_destroy_context;
-		engine->graph.load_context	= nv20_graph_load_context;
-		engine->graph.unload_context	= nv20_graph_unload_context;
-		engine->graph.set_tile_region	= nv20_graph_set_tile_region;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nv04_fifo_fini;
@@ -277,8 +236,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.clock_set		= nv04_pm_clock_set;
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -303,15 +260,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fb.init_tile_region	= nv30_fb_init_tile_region;
 		engine->fb.set_tile_region	= nv40_fb_set_tile_region;
 		engine->fb.free_tile_region	= nv30_fb_free_tile_region;
-		engine->graph.init		= nv40_graph_init;
-		engine->graph.takedown		= nv40_graph_takedown;
-		engine->graph.fifo_access	= nv04_graph_fifo_access;
-		engine->graph.channel		= nv40_graph_channel;
-		engine->graph.create_context	= nv40_graph_create_context;
-		engine->graph.destroy_context	= nv40_graph_destroy_context;
-		engine->graph.load_context	= nv40_graph_load_context;
-		engine->graph.unload_context	= nv40_graph_unload_context;
-		engine->graph.set_tile_region	= nv40_graph_set_tile_region;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv40_fifo_init;
 		engine->fifo.takedown		= nv04_fifo_fini;
@@ -340,8 +288,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->pm.voltage_get		= nouveau_voltage_gpio_get;
 		engine->pm.voltage_set		= nouveau_voltage_gpio_set;
 		engine->pm.temp_get		= nv40_temp_get;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nouveau_mem_detect;
 		engine->vram.flags_valid	= nouveau_mem_flags_valid;
 		break;
@@ -368,19 +314,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv50_fb_init;
 		engine->fb.takedown		= nv50_fb_takedown;
-		engine->graph.init		= nv50_graph_init;
-		engine->graph.takedown		= nv50_graph_takedown;
-		engine->graph.fifo_access	= nv50_graph_fifo_access;
-		engine->graph.channel		= nv50_graph_channel;
-		engine->graph.create_context	= nv50_graph_create_context;
-		engine->graph.destroy_context	= nv50_graph_destroy_context;
-		engine->graph.load_context	= nv50_graph_load_context;
-		engine->graph.unload_context	= nv50_graph_unload_context;
-		if (dev_priv->chipset == 0x50 ||
-		    dev_priv->chipset == 0xac)
-			engine->graph.tlb_flush	= nv50_graph_tlb_flush;
-		else
-			engine->graph.tlb_flush	= nv84_graph_tlb_flush;
 		engine->fifo.channels		= 128;
 		engine->fifo.init		= nv50_fifo_init;
 		engine->fifo.takedown		= nv50_fifo_takedown;
@@ -432,24 +365,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 			engine->pm.temp_get	= nv84_temp_get;
 		else
 			engine->pm.temp_get	= nv40_temp_get;
-		switch (dev_priv->chipset) {
-		case 0x84:
-		case 0x86:
-		case 0x92:
-		case 0x94:
-		case 0x96:
-		case 0xa0:
-			engine->crypt.init	= nv84_crypt_init;
-			engine->crypt.takedown	= nv84_crypt_fini;
-			engine->crypt.create_context = nv84_crypt_create_context;
-			engine->crypt.destroy_context = nv84_crypt_destroy_context;
-			engine->crypt.tlb_flush	= nv84_crypt_tlb_flush;
-			break;
-		default:
-			engine->crypt.init	= nouveau_stub_init;
-			engine->crypt.takedown	= nouveau_stub_takedown;
-			break;
-		}
 		engine->vram.init		= nv50_vram_init;
 		engine->vram.get		= nv50_vram_new;
 		engine->vram.put		= nv50_vram_del;
@@ -472,14 +387,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nvc0_fb_init;
 		engine->fb.takedown		= nvc0_fb_takedown;
-		engine->graph.init		= nvc0_graph_init;
-		engine->graph.takedown		= nvc0_graph_takedown;
-		engine->graph.fifo_access	= nvc0_graph_fifo_access;
-		engine->graph.channel		= nvc0_graph_channel;
-		engine->graph.create_context	= nvc0_graph_create_context;
-		engine->graph.destroy_context	= nvc0_graph_destroy_context;
-		engine->graph.load_context	= nvc0_graph_load_context;
-		engine->graph.unload_context	= nvc0_graph_unload_context;
 		engine->fifo.channels		= 128;
 		engine->fifo.init		= nvc0_fifo_init;
 		engine->fifo.takedown		= nvc0_fifo_takedown;
@@ -503,8 +410,6 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->gpio.irq_register	= nv50_gpio_irq_register;
 		engine->gpio.irq_unregister	= nv50_gpio_irq_unregister;
 		engine->gpio.irq_enable		= nv50_gpio_irq_enable;
-		engine->crypt.init		= nouveau_stub_init;
-		engine->crypt.takedown		= nouveau_stub_takedown;
 		engine->vram.init		= nvc0_vram_init;
 		engine->vram.get		= nvc0_vram_new;
 		engine->vram.put		= nv50_vram_del;
@@ -593,7 +498,7 @@ nouveau_card_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_engine *engine;
-	int ret;
+	int ret, e = 0;
 
 	vga_client_register(dev->pdev, dev, NULL, nouveau_vga_set_decode);
 	vga_switcheroo_register_client(dev->pdev, nouveau_switcheroo_set_state,
@@ -658,23 +563,80 @@ nouveau_card_init(struct drm_device *dev)
 	if (ret)
 		goto out_timer;
 
-	if (nouveau_noaccel)
-		engine->graph.accel_blocked = true;
-	else {
-		/* PGRAPH */
-		ret = engine->graph.init(dev);
-		if (ret)
-			goto out_fb;
+	switch (dev_priv->card_type) {
+	case NV_04:
+		nv04_graph_create(dev);
+		break;
+	case NV_10:
+		nv10_graph_create(dev);
+		break;
+	case NV_20:
+	case NV_30:
+		nv20_graph_create(dev);
+		break;
+	case NV_40:
+		nv40_graph_create(dev);
+		break;
+	case NV_50:
+		nv50_graph_create(dev);
+		break;
+	case NV_C0:
+		nvc0_graph_create(dev);
+		break;
+	default:
+		break;
+	}
 
-		/* PCRYPT */
-		ret = engine->crypt.init(dev);
-		if (ret)
-			goto out_graph;
+	switch (dev_priv->chipset) {
+	case 0x84:
+	case 0x86:
+	case 0x92:
+	case 0x94:
+	case 0x96:
+	case 0xa0:
+		nv84_crypt_create(dev);
+		break;
+	}
+
+	switch (dev_priv->card_type) {
+	case NV_50:
+		switch (dev_priv->chipset) {
+		case 0xa3:
+		case 0xa5:
+		case 0xa8:
+		case 0xaf:
+			nva3_copy_create(dev);
+			break;
+		}
+		break;
+	case NV_C0:
+		nvc0_copy_create(dev, 0);
+		nvc0_copy_create(dev, 1);
+		break;
+	default:
+		break;
+	}
+
+	if (dev_priv->card_type == NV_40)
+		nv40_mpeg_create(dev);
+	else
+	if (dev_priv->card_type == NV_50 &&
+	    (dev_priv->chipset < 0x98 || dev_priv->chipset == 0xa0))
+		nv50_mpeg_create(dev);
+
+	if (!nouveau_noaccel) {
+		for (e = 0; e < NVOBJ_ENGINE_NR; e++) {
+			if (dev_priv->eng[e]) {
+				ret = dev_priv->eng[e]->init(dev, e);
+				if (ret)
+					goto out_engine;
+			}
+		}
 
 		/* PFIFO */
 		ret = engine->fifo.init(dev);
 		if (ret)
-			goto out_crypt;
+			goto out_engine;
 	}
 
 	ret = engine->display.create(dev);
@@ -691,7 +653,7 @@ nouveau_card_init(struct drm_device *dev)
 
 	/* what about PVIDEO/PCRTC/PRAMDAC etc? */
 
-	if (!engine->graph.accel_blocked) {
+	if (dev_priv->eng[NVOBJ_ENGINE_GR]) {
 		ret = nouveau_fence_init(dev);
 		if (ret)
 			goto out_irq;
@@ -715,13 +677,16 @@ out_vblank:
 out_fifo:
 	if (!nouveau_noaccel)
 		engine->fifo.takedown(dev);
-out_crypt:
-	if (!nouveau_noaccel)
-		engine->crypt.takedown(dev);
-out_graph:
-	if (!nouveau_noaccel)
-		engine->graph.takedown(dev);
-out_fb:
+out_engine:
+	if (!nouveau_noaccel) {
+		for (e = e - 1; e >= 0; e--) {
+			if (!dev_priv->eng[e])
+				continue;
+			dev_priv->eng[e]->fini(dev, e);
+			dev_priv->eng[e]->destroy(dev,e );
+		}
+	}
+
 	engine->fb.takedown(dev);
 out_timer:
 	engine->timer.takedown(dev);
@@ -751,16 +716,21 @@ static void nouveau_card_takedown(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_engine *engine = &dev_priv->engine;
+	int e;
 
-	if (!engine->graph.accel_blocked) {
+	if (dev_priv->channel) {
 		nouveau_fence_fini(dev);
 		nouveau_channel_put_unlocked(&dev_priv->channel);
 	}
 
 	if (!nouveau_noaccel) {
 		engine->fifo.takedown(dev);
-		engine->crypt.takedown(dev);
-		engine->graph.takedown(dev);
+		for (e = NVOBJ_ENGINE_NR - 1; e >= 0; e--) {
+			if (dev_priv->eng[e]) {
+				dev_priv->eng[e]->fini(dev, e);
+				dev_priv->eng[e]->destroy(dev,e );
+			}
+		}
 	}
 	engine->fb.takedown(dev);
 	engine->timer.takedown(dev);
@@ -866,7 +836,7 @@ static int nouveau_remove_conflicting_drivers(struct drm_device *dev)
 #ifdef CONFIG_X86
 	primary = dev->pdev->resource[PCI_ROM_RESOURCE].flags & IORESOURCE_ROM_SHADOW;
 #endif
-	
+
 	remove_conflicting_framebuffers(dev_priv->apertures, "nouveaufb", primary);
 	return 0;
 }
@@ -918,11 +888,13 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 
 	/* Time to determine the card architecture */
 	reg0 = nv_rd32(dev, NV03_PMC_BOOT_0);
+	dev_priv->stepping = 0; /* XXX: add stepping for pre-NV10? */
 
 	/* We're dealing with >=NV10 */
 	if ((reg0 & 0x0f000000) > 0) {
 		/* Bit 27-20 contain the architecture in hex */
 		dev_priv->chipset = (reg0 & 0xff00000) >> 20;
+		dev_priv->stepping = (reg0 & 0xff);
 	/* NV04 or NV05 */
 	} else if ((reg0 & 0xff00fff0) == 0x20004000) {
 		if (reg0 & 0x00f00000)
diff --git a/drivers/gpu/drm/nouveau/nouveau_vm.h b/drivers/gpu/drm/nouveau/nouveau_vm.h
index 2e06b55cfdc1..c48a9fc2b47b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vm.h
+++ b/drivers/gpu/drm/nouveau/nouveau_vm.h
@@ -53,8 +53,7 @@ struct nouveau_vm {
 	int refcount;
 
 	struct list_head pgd_list;
-	atomic_t pgraph_refs;
-	atomic_t pcrypt_refs;
+	atomic_t engref[16];
 
 	struct nouveau_vm_pgt *pgt;
 	u32 fpde;
diff --git a/drivers/gpu/drm/nouveau/nouveau_volt.c b/drivers/gpu/drm/nouveau/nouveau_volt.c
index 04fdc00a67d5..75e872741d92 100644
--- a/drivers/gpu/drm/nouveau/nouveau_volt.c
+++ b/drivers/gpu/drm/nouveau/nouveau_volt.c
@@ -159,8 +159,16 @@ nouveau_volt_init(struct drm_device *dev)
 		headerlen = volt[1];
 		recordlen = volt[2];
 		entries   = volt[3];
-		vidshift  = hweight8(volt[5]);
 		vidmask   = volt[4];
+		/* no longer certain what volt[5] is, if it's related to
+		 * the vid shift then it's definitely not a function of
+		 * how many bits are set.
+		 *
+		 * after looking at a number of nva3+ vbios images, they
+		 * all seem likely to have a static shift of 2.. lets
+		 * go with that for now until proven otherwise.
+		 */
+		vidshift  = 2;
 		break;
 	default:
 		NV_WARN(dev, "voltage table 0x%02x unknown\n", volt[0]);
diff --git a/drivers/gpu/drm/nouveau/nv04_crtc.c b/drivers/gpu/drm/nouveau/nv04_crtc.c
index 748b9d9c2949..3c78bc81357e 100644
--- a/drivers/gpu/drm/nouveau/nv04_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv04_crtc.c
@@ -790,8 +790,7 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		drm_fb = passed_fb;
 		fb = nouveau_framebuffer(passed_fb);
-	}
-	else {
+	} else {
 		/* If not atomic, we can go ahead and pin, and unpin the
 		 * old fb we were passed.
 		 */
@@ -944,14 +943,14 @@ nv04_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	struct drm_gem_object *gem;
 	int ret = 0;
 
-	if (width != 64 || height != 64)
-		return -EINVAL;
-
 	if (!buffer_handle) {
 		nv_crtc->cursor.hide(nv_crtc, true);
 		return 0;
 	}
 
+	if (width != 64 || height != 64)
+		return -EINVAL;
+
 	gem = drm_gem_object_lookup(dev, file_priv, buffer_handle);
 	if (!gem)
 		return -ENOENT;
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index af75015068d6..3626ee7db3ba 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -28,9 +28,11 @@
 #include "nouveau_drv.h"
 #include "nouveau_hw.h"
 #include "nouveau_util.h"
+#include "nouveau_ramht.h"
 
-static int  nv04_graph_register(struct drm_device *dev);
-static void nv04_graph_isr(struct drm_device *dev);
+struct nv04_graph_engine {
+	struct nouveau_exec_engine base;
+};
 
 static uint32_t nv04_graph_ctx_regs[] = {
 	0x0040053c,
@@ -350,7 +352,7 @@ struct graph_state {
 	uint32_t nv04[ARRAY_SIZE(nv04_graph_ctx_regs)];
 };
 
-struct nouveau_channel *
+static struct nouveau_channel *
 nv04_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -365,26 +367,6 @@ nv04_graph_channel(struct drm_device *dev)
 	return dev_priv->channels.ptr[chid];
 }
 
-static void
-nv04_graph_context_switch(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_channel *chan = NULL;
-	int chid;
-
-	nouveau_wait_for_idle(dev);
-
-	/* If previous context is valid, we need to save it */
-	pgraph->unload_context(dev);
-
-	/* Load context for next channel */
-	chid = dev_priv->engine.fifo.channel_id(dev);
-	chan = dev_priv->channels.ptr[chid];
-	if (chan)
-		nv04_graph_load_context(chan);
-}
-
 static uint32_t *ctx_reg(struct graph_state *ctx, uint32_t reg)
 {
 	int i;
@@ -397,48 +379,11 @@ static uint32_t *ctx_reg(struct graph_state *ctx, uint32_t reg)
 	return NULL;
 }
 
-int nv04_graph_create_context(struct nouveau_channel *chan)
-{
-	struct graph_state *pgraph_ctx;
-	NV_DEBUG(chan->dev, "nv04_graph_context_create %d\n", chan->id);
-
-	chan->pgraph_ctx = pgraph_ctx = kzalloc(sizeof(*pgraph_ctx),
-						GFP_KERNEL);
-	if (pgraph_ctx == NULL)
-		return -ENOMEM;
-
-	*ctx_reg(pgraph_ctx, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
-
-	return 0;
-}
-
-void nv04_graph_destroy_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pgraph->fifo_access(dev, false);
-
-	/* Unload the context if it's the currently active one */
-	if (pgraph->channel(dev) == chan)
-		pgraph->unload_context(dev);
-
-	/* Free the context resources */
-	kfree(pgraph_ctx);
-	chan->pgraph_ctx = NULL;
-
-	pgraph->fifo_access(dev, true);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-}
-
-int nv04_graph_load_context(struct nouveau_channel *chan)
+static int
+nv04_graph_load_context(struct nouveau_channel *chan)
 {
+	struct graph_state *pgraph_ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	struct drm_device *dev = chan->dev;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
 	uint32_t tmp;
 	int i;
 
@@ -456,20 +401,19 @@ int nv04_graph_load_context(struct nouveau_channel *chan)
 	return 0;
 }
 
-int
+static int
 nv04_graph_unload_context(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	struct nouveau_channel *chan = NULL;
 	struct graph_state *ctx;
 	uint32_t tmp;
 	int i;
 
-	chan = pgraph->channel(dev);
+	chan = nv04_graph_channel(dev);
 	if (!chan)
 		return 0;
-	ctx = chan->pgraph_ctx;
+	ctx = chan->engctx[NVOBJ_ENGINE_GR];
 
 	for (i = 0; i < ARRAY_SIZE(nv04_graph_ctx_regs); i++)
 		ctx->nv04[i] = nv_rd32(dev, nv04_graph_ctx_regs[i]);
@@ -481,23 +425,85 @@ nv04_graph_unload_context(struct drm_device *dev)
 	return 0;
 }
 
-int nv04_graph_init(struct drm_device *dev)
+static int
+nv04_graph_context_new(struct nouveau_channel *chan, int engine)
 {
+	struct graph_state *pgraph_ctx;
+	NV_DEBUG(chan->dev, "nv04_graph_context_create %d\n", chan->id);
+
+	pgraph_ctx = kzalloc(sizeof(*pgraph_ctx), GFP_KERNEL);
+	if (pgraph_ctx == NULL)
+		return -ENOMEM;
+
+	*ctx_reg(pgraph_ctx, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
+
+	chan->engctx[engine] = pgraph_ctx;
+	return 0;
+}
+
+static void
+nv04_graph_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
+	struct graph_state *pgraph_ctx = chan->engctx[engine];
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv04_graph_fifo_access(dev, false);
+
+	/* Unload the context if it's the currently active one */
+	if (nv04_graph_channel(dev) == chan)
+		nv04_graph_unload_context(dev);
+
+	nv04_graph_fifo_access(dev, true);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	/* Free the context resources */
+	kfree(pgraph_ctx);
+	chan->engctx[engine] = NULL;
+}
+
+int
+nv04_graph_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj *obj = NULL;
 	int ret;
 
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 1;
+	obj->class  = class;
+
+#ifdef __BIG_ENDIAN
+	nv_wo32(obj, 0x00, 0x00080000 | class);
+#else
+	nv_wo32(obj, 0x00, class);
+#endif
+	nv_wo32(obj, 0x04, 0x00000000);
+	nv_wo32(obj, 0x08, 0x00000000);
+	nv_wo32(obj, 0x0c, 0x00000000);
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
+}
+
+static int
+nv04_graph_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t tmp;
+
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
 			~NV_PMC_ENABLE_PGRAPH);
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
 			 NV_PMC_ENABLE_PGRAPH);
 
-	ret = nv04_graph_register(dev);
-	if (ret)
-		return ret;
-
 	/* Enable PGRAPH interrupts */
-	nouveau_irq_register(dev, 12, nv04_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR, 0xFFFFFFFF);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
@@ -507,7 +513,7 @@ int nv04_graph_init(struct drm_device *dev)
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_0, 0x001FFFFF);*/
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_0, 0x1231c000);
 	/*1231C000 blob, 001 haiku*/
-	//*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
+	/*V_WRITE(NV04_PGRAPH_DEBUG_1, 0xf2d91100);*/
 	nv_wr32(dev, NV04_PGRAPH_DEBUG_1, 0x72111100);
 	/*0x72111100 blob , 01 haiku*/
 	/*nv_wr32(dev, NV04_PGRAPH_DEBUG_2, 0x11d5f870);*/
@@ -531,10 +537,12 @@ int nv04_graph_init(struct drm_device *dev)
 	return 0;
 }
 
-void nv04_graph_takedown(struct drm_device *dev)
+static int
+nv04_graph_fini(struct drm_device *dev, int engine)
 {
+	nv04_graph_unload_context(dev);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000);
-	nouveau_irq_unregister(dev, 12);
+	return 0;
 }
 
 void
@@ -969,13 +977,138 @@ nv04_graph_mthd_bind_chroma(struct nouveau_channel *chan,
 	return 1;
 }
 
-static int
-nv04_graph_register(struct drm_device *dev)
+static struct nouveau_bitfield nv04_graph_intr[] = {
+	{ NV_PGRAPH_INTR_NOTIFY, "NOTIFY" },
+	{}
+};
+
+static struct nouveau_bitfield nv04_graph_nstatus[] = {
+	{ NV04_PGRAPH_NSTATUS_STATE_IN_USE,       "STATE_IN_USE" },
+	{ NV04_PGRAPH_NSTATUS_INVALID_STATE,      "INVALID_STATE" },
+	{ NV04_PGRAPH_NSTATUS_BAD_ARGUMENT,       "BAD_ARGUMENT" },
+	{ NV04_PGRAPH_NSTATUS_PROTECTION_FAULT,   "PROTECTION_FAULT" },
+	{}
+};
+
+struct nouveau_bitfield nv04_graph_nsource[] = {
+	{ NV03_PGRAPH_NSOURCE_NOTIFICATION,       "NOTIFICATION" },
+	{ NV03_PGRAPH_NSOURCE_DATA_ERROR,         "DATA_ERROR" },
+	{ NV03_PGRAPH_NSOURCE_PROTECTION_ERROR,   "PROTECTION_ERROR" },
+	{ NV03_PGRAPH_NSOURCE_RANGE_EXCEPTION,    "RANGE_EXCEPTION" },
+	{ NV03_PGRAPH_NSOURCE_LIMIT_COLOR,        "LIMIT_COLOR" },
+	{ NV03_PGRAPH_NSOURCE_LIMIT_ZETA,         "LIMIT_ZETA" },
+	{ NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD,       "ILLEGAL_MTHD" },
+	{ NV03_PGRAPH_NSOURCE_DMA_R_PROTECTION,   "DMA_R_PROTECTION" },
+	{ NV03_PGRAPH_NSOURCE_DMA_W_PROTECTION,   "DMA_W_PROTECTION" },
+	{ NV03_PGRAPH_NSOURCE_FORMAT_EXCEPTION,   "FORMAT_EXCEPTION" },
+	{ NV03_PGRAPH_NSOURCE_PATCH_EXCEPTION,    "PATCH_EXCEPTION" },
+	{ NV03_PGRAPH_NSOURCE_STATE_INVALID,      "STATE_INVALID" },
+	{ NV03_PGRAPH_NSOURCE_DOUBLE_NOTIFY,      "DOUBLE_NOTIFY" },
+	{ NV03_PGRAPH_NSOURCE_NOTIFY_IN_USE,      "NOTIFY_IN_USE" },
+	{ NV03_PGRAPH_NSOURCE_METHOD_CNT,         "METHOD_CNT" },
+	{ NV03_PGRAPH_NSOURCE_BFR_NOTIFICATION,   "BFR_NOTIFICATION" },
+	{ NV03_PGRAPH_NSOURCE_DMA_VTX_PROTECTION, "DMA_VTX_PROTECTION" },
+	{ NV03_PGRAPH_NSOURCE_DMA_WIDTH_A,        "DMA_WIDTH_A" },
+	{ NV03_PGRAPH_NSOURCE_DMA_WIDTH_B,        "DMA_WIDTH_B" },
+	{}
+};
+
+static void
+nv04_graph_context_switch(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_channel *chan = NULL;
+	int chid;
 
-	if (dev_priv->engine.graph.registered)
-		return 0;
+	nouveau_wait_for_idle(dev);
+
+	/* If previous context is valid, we need to save it */
+	nv04_graph_unload_context(dev);
+
+	/* Load context for next channel */
+	chid = dev_priv->engine.fifo.channel_id(dev);
+	chan = dev_priv->channels.ptr[chid];
+	if (chan)
+		nv04_graph_load_context(chan);
+}
+
+static void
+nv04_graph_isr(struct drm_device *dev)
+{
+	u32 stat;
+
+	while ((stat = nv_rd32(dev, NV03_PGRAPH_INTR))) {
+		u32 nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE);
+		u32 nstatus = nv_rd32(dev, NV03_PGRAPH_NSTATUS);
+		u32 addr = nv_rd32(dev, NV04_PGRAPH_TRAPPED_ADDR);
+		u32 chid = (addr & 0x0f000000) >> 24;
+		u32 subc = (addr & 0x0000e000) >> 13;
+		u32 mthd = (addr & 0x00001ffc);
+		u32 data = nv_rd32(dev, NV04_PGRAPH_TRAPPED_DATA);
+		u32 class = nv_rd32(dev, 0x400180 + subc * 4) & 0xff;
+		u32 show = stat;
+
+		if (stat & NV_PGRAPH_INTR_NOTIFY) {
+			if (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD) {
+				if (!nouveau_gpuobj_mthd_call2(dev, chid, class, mthd, data))
+					show &= ~NV_PGRAPH_INTR_NOTIFY;
+			}
+		}
+
+		if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
+			nv_wr32(dev, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
+			stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
+			show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
+			nv04_graph_context_switch(dev);
+		}
+
+		nv_wr32(dev, NV03_PGRAPH_INTR, stat);
+		nv_wr32(dev, NV04_PGRAPH_FIFO, 0x00000001);
+
+		if (show && nouveau_ratelimit()) {
+			NV_INFO(dev, "PGRAPH -");
+			nouveau_bitfield_print(nv04_graph_intr, show);
+			printk(" nsource:");
+			nouveau_bitfield_print(nv04_graph_nsource, nsource);
+			printk(" nstatus:");
+			nouveau_bitfield_print(nv04_graph_nstatus, nstatus);
+			printk("\n");
+			NV_INFO(dev, "PGRAPH - ch %d/%d class 0x%04x "
+				     "mthd 0x%04x data 0x%08x\n",
+				chid, subc, class, mthd, data);
+		}
+	}
+}
+
+static void
+nv04_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nv04_graph_engine *pgraph = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 12);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+	kfree(pgraph);
+}
+
+int
+nv04_graph_create(struct drm_device *dev)
+{
+	struct nv04_graph_engine *pgraph;
+
+	pgraph = kzalloc(sizeof(*pgraph), GFP_KERNEL);
+	if (!pgraph)
+		return -ENOMEM;
+
+	pgraph->base.destroy = nv04_graph_destroy;
+	pgraph->base.init = nv04_graph_init;
+	pgraph->base.fini = nv04_graph_fini;
+	pgraph->base.context_new = nv04_graph_context_new;
+	pgraph->base.context_del = nv04_graph_context_del;
+	pgraph->base.object_new = nv04_graph_object_new;
+
+	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
+	nouveau_irq_register(dev, 12, nv04_graph_isr);
 
 	/* dvd subpicture */
 	NVOBJ_CLASS(dev, 0x0038, GR);
@@ -1222,93 +1355,5 @@ nv04_graph_register(struct drm_device *dev)
 	NVOBJ_CLASS(dev, 0x506e, SW);
 	NVOBJ_MTHD (dev, 0x506e, 0x0150, nv04_graph_mthd_set_ref);
 	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
-	dev_priv->engine.graph.registered = true;
 	return 0;
-};
-
-static struct nouveau_bitfield nv04_graph_intr[] = {
-	{ NV_PGRAPH_INTR_NOTIFY, "NOTIFY" },
-	{}
-};
-
-static struct nouveau_bitfield nv04_graph_nstatus[] =
-{
-	{ NV04_PGRAPH_NSTATUS_STATE_IN_USE,       "STATE_IN_USE" },
-	{ NV04_PGRAPH_NSTATUS_INVALID_STATE,      "INVALID_STATE" },
-	{ NV04_PGRAPH_NSTATUS_BAD_ARGUMENT,       "BAD_ARGUMENT" },
-	{ NV04_PGRAPH_NSTATUS_PROTECTION_FAULT,   "PROTECTION_FAULT" },
-	{}
-};
-
-struct nouveau_bitfield nv04_graph_nsource[] =
-{
-	{ NV03_PGRAPH_NSOURCE_NOTIFICATION,       "NOTIFICATION" },
-	{ NV03_PGRAPH_NSOURCE_DATA_ERROR,         "DATA_ERROR" },
-	{ NV03_PGRAPH_NSOURCE_PROTECTION_ERROR,   "PROTECTION_ERROR" },
-	{ NV03_PGRAPH_NSOURCE_RANGE_EXCEPTION,    "RANGE_EXCEPTION" },
-	{ NV03_PGRAPH_NSOURCE_LIMIT_COLOR,        "LIMIT_COLOR" },
-	{ NV03_PGRAPH_NSOURCE_LIMIT_ZETA,         "LIMIT_ZETA" },
-	{ NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD,       "ILLEGAL_MTHD" },
-	{ NV03_PGRAPH_NSOURCE_DMA_R_PROTECTION,   "DMA_R_PROTECTION" },
-	{ NV03_PGRAPH_NSOURCE_DMA_W_PROTECTION,   "DMA_W_PROTECTION" },
-	{ NV03_PGRAPH_NSOURCE_FORMAT_EXCEPTION,   "FORMAT_EXCEPTION" },
-	{ NV03_PGRAPH_NSOURCE_PATCH_EXCEPTION,    "PATCH_EXCEPTION" },
-	{ NV03_PGRAPH_NSOURCE_STATE_INVALID,      "STATE_INVALID" },
-	{ NV03_PGRAPH_NSOURCE_DOUBLE_NOTIFY,      "DOUBLE_NOTIFY" },
-	{ NV03_PGRAPH_NSOURCE_NOTIFY_IN_USE,      "NOTIFY_IN_USE" },
-	{ NV03_PGRAPH_NSOURCE_METHOD_CNT,         "METHOD_CNT" },
-	{ NV03_PGRAPH_NSOURCE_BFR_NOTIFICATION,   "BFR_NOTIFICATION" },
-	{ NV03_PGRAPH_NSOURCE_DMA_VTX_PROTECTION, "DMA_VTX_PROTECTION" },
-	{ NV03_PGRAPH_NSOURCE_DMA_WIDTH_A,        "DMA_WIDTH_A" },
-	{ NV03_PGRAPH_NSOURCE_DMA_WIDTH_B,        "DMA_WIDTH_B" },
-	{}
-};
-
-static void
-nv04_graph_isr(struct drm_device *dev)
-{
-	u32 stat;
-
-	while ((stat = nv_rd32(dev, NV03_PGRAPH_INTR))) {
-		u32 nsource = nv_rd32(dev, NV03_PGRAPH_NSOURCE);
-		u32 nstatus = nv_rd32(dev, NV03_PGRAPH_NSTATUS);
-		u32 addr = nv_rd32(dev, NV04_PGRAPH_TRAPPED_ADDR);
-		u32 chid = (addr & 0x0f000000) >> 24;
-		u32 subc = (addr & 0x0000e000) >> 13;
-		u32 mthd = (addr & 0x00001ffc);
-		u32 data = nv_rd32(dev, NV04_PGRAPH_TRAPPED_DATA);
-		u32 class = nv_rd32(dev, 0x400180 + subc * 4) & 0xff;
-		u32 show = stat;
-
-		if (stat & NV_PGRAPH_INTR_NOTIFY) {
-			if (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD) {
-				if (!nouveau_gpuobj_mthd_call2(dev, chid, class, mthd, data))
-					show &= ~NV_PGRAPH_INTR_NOTIFY;
-			}
-		}
-
-		if (stat & NV_PGRAPH_INTR_CONTEXT_SWITCH) {
-			nv_wr32(dev, NV03_PGRAPH_INTR, NV_PGRAPH_INTR_CONTEXT_SWITCH);
-			stat &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
-			show &= ~NV_PGRAPH_INTR_CONTEXT_SWITCH;
-			nv04_graph_context_switch(dev);
-		}
-
-		nv_wr32(dev, NV03_PGRAPH_INTR, stat);
-		nv_wr32(dev, NV04_PGRAPH_FIFO, 0x00000001);
-
-		if (show && nouveau_ratelimit()) {
-			NV_INFO(dev, "PGRAPH -");
-			nouveau_bitfield_print(nv04_graph_intr, show);
-			printk(" nsource:");
-			nouveau_bitfield_print(nv04_graph_nsource, nsource);
-			printk(" nstatus:");
-			nouveau_bitfield_print(nv04_graph_nstatus, nstatus);
-			printk("\n");
-			NV_INFO(dev, "PGRAPH - ch %d/%d class 0x%04x "
-				     "mthd 0x%04x data 0x%08x\n",
-				chid, subc, class, mthd, data);
-		}
-	}
 }
diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c
index b8e3edb5c063..b8611b955313 100644
--- a/drivers/gpu/drm/nouveau/nv04_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv04_instmem.c
@@ -95,6 +95,9 @@ nv04_instmem_takedown(struct drm_device *dev)
 	nouveau_ramht_ref(NULL, &dev_priv->ramht, NULL);
 	nouveau_gpuobj_ref(NULL, &dev_priv->ramro);
 	nouveau_gpuobj_ref(NULL, &dev_priv->ramfc);
+
+	if (drm_mm_initialized(&dev_priv->ramin_heap))
+		drm_mm_takedown(&dev_priv->ramin_heap);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 8c92edb7bbcd..0930c6cb88e0 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -28,10 +28,9 @@
 #include "nouveau_drv.h"
 #include "nouveau_util.h"
 
-static int  nv10_graph_register(struct drm_device *);
-static void nv10_graph_isr(struct drm_device *);
-
-#define NV10_FIFO_NUMBER 32
+struct nv10_graph_engine {
+	struct nouveau_exec_engine base;
+};
 
 struct pipe_state {
 	uint32_t pipe_0x0000[0x040/4];
@@ -414,9 +413,9 @@ struct graph_state {
 
 static void nv10_graph_save_pipe(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->dev;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
+	struct graph_state *pgraph_ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	struct pipe_state *pipe = &pgraph_ctx->pipe_state;
+	struct drm_device *dev = chan->dev;
 
 	PIPE_SAVE(dev, pipe->pipe_0x4400, 0x4400);
 	PIPE_SAVE(dev, pipe->pipe_0x0200, 0x0200);
@@ -432,9 +431,9 @@ static void nv10_graph_save_pipe(struct nouveau_channel *chan)
 
 static void nv10_graph_load_pipe(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->dev;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
+	struct graph_state *pgraph_ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	struct pipe_state *pipe = &pgraph_ctx->pipe_state;
+	struct drm_device *dev = chan->dev;
 	uint32_t xfmode0, xfmode1;
 	int i;
 
@@ -482,9 +481,9 @@ static void nv10_graph_load_pipe(struct nouveau_channel *chan)
 
 static void nv10_graph_create_pipe(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->dev;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
+	struct graph_state *pgraph_ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	struct pipe_state *fifo_pipe_state = &pgraph_ctx->pipe_state;
+	struct drm_device *dev = chan->dev;
 	uint32_t *fifo_pipe_state_addr;
 	int i;
 #define PIPE_INIT(addr) \
@@ -661,8 +660,6 @@ static void nv10_graph_load_dma_vtxbuf(struct nouveau_channel *chan,
 				       uint32_t inst)
 {
 	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	uint32_t st2, st2_dl, st2_dh, fifo_ptr, fifo[0x60/4];
 	uint32_t ctx_user, ctx_switch[5];
 	int i, subchan = -1;
@@ -711,8 +708,8 @@ static void nv10_graph_load_dma_vtxbuf(struct nouveau_channel *chan,
 		0x2c000000 | chan->id << 20 | subchan << 16 | 0x18c);
 	nv_wr32(dev, NV10_PGRAPH_FFINTFC_ST2_DL, inst);
 	nv_mask(dev, NV10_PGRAPH_CTX_CONTROL, 0, 0x10000);
-	pgraph->fifo_access(dev, true);
-	pgraph->fifo_access(dev, false);
+	nv04_graph_fifo_access(dev, true);
+	nv04_graph_fifo_access(dev, false);
 
 	/* Restore the FIFO state */
 	for (i = 0; i < ARRAY_SIZE(fifo); i++)
@@ -729,11 +726,12 @@ static void nv10_graph_load_dma_vtxbuf(struct nouveau_channel *chan,
 	nv_wr32(dev, NV10_PGRAPH_CTX_USER, ctx_user);
 }
 
-int nv10_graph_load_context(struct nouveau_channel *chan)
+static int
+nv10_graph_load_context(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
+	struct graph_state *pgraph_ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	uint32_t tmp;
 	int i;
 
@@ -757,21 +755,20 @@ int nv10_graph_load_context(struct nouveau_channel *chan)
 	return 0;
 }
 
-int
+static int
 nv10_graph_unload_context(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nouveau_channel *chan;
 	struct graph_state *ctx;
 	uint32_t tmp;
 	int i;
 
-	chan = pgraph->channel(dev);
+	chan = nv10_graph_channel(dev);
 	if (!chan)
 		return 0;
-	ctx = chan->pgraph_ctx;
+	ctx = chan->engctx[NVOBJ_ENGINE_GR];
 
 	for (i = 0; i < ARRAY_SIZE(nv10_graph_ctx_regs); i++)
 		ctx->nv10[i] = nv_rd32(dev, nv10_graph_ctx_regs[i]);
@@ -805,7 +802,7 @@ nv10_graph_context_switch(struct drm_device *dev)
 	/* Load context for next channel */
 	chid = (nv_rd32(dev, NV04_PGRAPH_TRAPPED_ADDR) >> 20) & 0x1f;
 	chan = dev_priv->channels.ptr[chid];
-	if (chan && chan->pgraph_ctx)
+	if (chan && chan->engctx[NVOBJ_ENGINE_GR])
 		nv10_graph_load_context(chan);
 }
 
@@ -836,7 +833,8 @@ nv10_graph_channel(struct drm_device *dev)
 	return dev_priv->channels.ptr[chid];
 }
 
-int nv10_graph_create_context(struct nouveau_channel *chan)
+static int
+nv10_graph_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -844,11 +842,10 @@ int nv10_graph_create_context(struct nouveau_channel *chan)
 
 	NV_DEBUG(dev, "nv10_graph_context_create %d\n", chan->id);
 
-	chan->pgraph_ctx = pgraph_ctx = kzalloc(sizeof(*pgraph_ctx),
-						GFP_KERNEL);
+	pgraph_ctx = kzalloc(sizeof(*pgraph_ctx), GFP_KERNEL);
 	if (pgraph_ctx == NULL)
 		return -ENOMEM;
-
+	chan->engctx[engine] = pgraph_ctx;
 
 	NV_WRITE_CTX(0x00400e88, 0x08000000);
 	NV_WRITE_CTX(0x00400e9c, 0x4b7fffff);
@@ -873,30 +870,30 @@ int nv10_graph_create_context(struct nouveau_channel *chan)
 	return 0;
 }
 
-void nv10_graph_destroy_context(struct nouveau_channel *chan)
+static void
+nv10_graph_context_del(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct graph_state *pgraph_ctx = chan->pgraph_ctx;
+	struct graph_state *pgraph_ctx = chan->engctx[engine];
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pgraph->fifo_access(dev, false);
+	nv04_graph_fifo_access(dev, false);
 
 	/* Unload the context if it's the currently active one */
-	if (pgraph->channel(dev) == chan)
-		pgraph->unload_context(dev);
+	if (nv10_graph_channel(dev) == chan)
+		nv10_graph_unload_context(dev);
+
+	nv04_graph_fifo_access(dev, true);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
 	/* Free the context resources */
+	chan->engctx[engine] = NULL;
 	kfree(pgraph_ctx);
-	chan->pgraph_ctx = NULL;
-
-	pgraph->fifo_access(dev, true);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 }
 
-void
+static void
 nv10_graph_set_tile_region(struct drm_device *dev, int i)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -907,22 +904,18 @@ nv10_graph_set_tile_region(struct drm_device *dev, int i)
 	nv_wr32(dev, NV10_PGRAPH_TILE(i), tile->addr);
 }
 
-int nv10_graph_init(struct drm_device *dev)
+static int
+nv10_graph_init(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t tmp;
-	int ret, i;
+	u32 tmp;
+	int i;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
 			~NV_PMC_ENABLE_PGRAPH);
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) |
 			 NV_PMC_ENABLE_PGRAPH);
 
-	ret = nv10_graph_register(dev);
-	if (ret)
-		return ret;
-
-	nouveau_irq_register(dev, 12, nv10_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
@@ -963,18 +956,20 @@ int nv10_graph_init(struct drm_device *dev)
 	return 0;
 }
 
-void nv10_graph_takedown(struct drm_device *dev)
+static int
+nv10_graph_fini(struct drm_device *dev, int engine)
 {
+	nv10_graph_unload_context(dev);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000);
-	nouveau_irq_unregister(dev, 12);
+	return 0;
 }
 
 static int
 nv17_graph_mthd_lma_window(struct nouveau_channel *chan,
 			   u32 class, u32 mthd, u32 data)
 {
+	struct graph_state *ctx = chan->engctx[NVOBJ_ENGINE_GR];
 	struct drm_device *dev = chan->dev;
-	struct graph_state *ctx = chan->pgraph_ctx;
 	struct pipe_state *pipe = &ctx->pipe_state;
 	uint32_t pipe_0x0040[1], pipe_0x64c0[8], pipe_0x6a80[3], pipe_0x6ab0[3];
 	uint32_t xfmode0, xfmode1;
@@ -1061,64 +1056,13 @@ nv17_graph_mthd_lma_enable(struct nouveau_channel *chan,
 	return 0;
 }
 
-static int
-nv10_graph_register(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (dev_priv->engine.graph.registered)
-		return 0;
-
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
-	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
-	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
-	NVOBJ_CLASS(dev, 0x005f, GR); /* imageblit */
-	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
-	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
-	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
-	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
-	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
-	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
-	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
-	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
-	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
-	NVOBJ_CLASS(dev, 0x0052, GR); /* swzsurf */
-	NVOBJ_CLASS(dev, 0x0093, GR); /* surf3d */
-	NVOBJ_CLASS(dev, 0x0094, GR); /* tex_tri */
-	NVOBJ_CLASS(dev, 0x0095, GR); /* multitex_tri */
-
-	/* celcius */
-	if (dev_priv->chipset <= 0x10) {
-		NVOBJ_CLASS(dev, 0x0056, GR);
-	} else
-	if (dev_priv->chipset < 0x17 || dev_priv->chipset == 0x1a) {
-		NVOBJ_CLASS(dev, 0x0096, GR);
-	} else {
-		NVOBJ_CLASS(dev, 0x0099, GR);
-		NVOBJ_MTHD (dev, 0x0099, 0x1638, nv17_graph_mthd_lma_window);
-		NVOBJ_MTHD (dev, 0x0099, 0x163c, nv17_graph_mthd_lma_window);
-		NVOBJ_MTHD (dev, 0x0099, 0x1640, nv17_graph_mthd_lma_window);
-		NVOBJ_MTHD (dev, 0x0099, 0x1644, nv17_graph_mthd_lma_window);
-		NVOBJ_MTHD (dev, 0x0099, 0x1658, nv17_graph_mthd_lma_enable);
-	}
-
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
-	dev_priv->engine.graph.registered = true;
-	return 0;
-}
-
 struct nouveau_bitfield nv10_graph_intr[] = {
 	{ NV_PGRAPH_INTR_NOTIFY, "NOTIFY" },
 	{ NV_PGRAPH_INTR_ERROR,  "ERROR"  },
 	{}
 };
 
-struct nouveau_bitfield nv10_graph_nstatus[] =
-{
+struct nouveau_bitfield nv10_graph_nstatus[] = {
 	{ NV10_PGRAPH_NSTATUS_STATE_IN_USE,       "STATE_IN_USE" },
 	{ NV10_PGRAPH_NSTATUS_INVALID_STATE,      "INVALID_STATE" },
 	{ NV10_PGRAPH_NSTATUS_BAD_ARGUMENT,       "BAD_ARGUMENT" },
@@ -1173,3 +1117,73 @@ nv10_graph_isr(struct drm_device *dev)
 		}
 	}
 }
+
+static void
+nv10_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nv10_graph_engine *pgraph = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 12);
+	kfree(pgraph);
+}
+
+int
+nv10_graph_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv10_graph_engine *pgraph;
+
+	pgraph = kzalloc(sizeof(*pgraph), GFP_KERNEL);
+	if (!pgraph)
+		return -ENOMEM;
+
+	pgraph->base.destroy = nv10_graph_destroy;
+	pgraph->base.init = nv10_graph_init;
+	pgraph->base.fini = nv10_graph_fini;
+	pgraph->base.context_new = nv10_graph_context_new;
+	pgraph->base.context_del = nv10_graph_context_del;
+	pgraph->base.object_new = nv04_graph_object_new;
+	pgraph->base.set_tile_region = nv10_graph_set_tile_region;
+
+	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
+	nouveau_irq_register(dev, 12, nv10_graph_isr);
+
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
+	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
+	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
+	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
+	NVOBJ_CLASS(dev, 0x005f, GR); /* imageblit */
+	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
+	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
+	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
+	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
+	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
+	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
+	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
+	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
+	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
+	NVOBJ_CLASS(dev, 0x0052, GR); /* swzsurf */
+	NVOBJ_CLASS(dev, 0x0093, GR); /* surf3d */
+	NVOBJ_CLASS(dev, 0x0094, GR); /* tex_tri */
+	NVOBJ_CLASS(dev, 0x0095, GR); /* multitex_tri */
+
+	/* celcius */
+	if (dev_priv->chipset <= 0x10) {
+		NVOBJ_CLASS(dev, 0x0056, GR);
+	} else
+	if (dev_priv->chipset < 0x17 || dev_priv->chipset == 0x1a) {
+		NVOBJ_CLASS(dev, 0x0096, GR);
+	} else {
+		NVOBJ_CLASS(dev, 0x0099, GR);
+		NVOBJ_MTHD (dev, 0x0099, 0x1638, nv17_graph_mthd_lma_window);
+		NVOBJ_MTHD (dev, 0x0099, 0x163c, nv17_graph_mthd_lma_window);
+		NVOBJ_MTHD (dev, 0x0099, 0x1640, nv17_graph_mthd_lma_window);
+		NVOBJ_MTHD (dev, 0x0099, 0x1644, nv17_graph_mthd_lma_window);
+		NVOBJ_MTHD (dev, 0x0099, 0x1658, nv17_graph_mthd_lma_enable);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 8464b76798d5..affc7d7dd029 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -24,6 +24,14 @@
  *
  */
 
+struct nv20_graph_engine {
+	struct nouveau_exec_engine base;
+	struct nouveau_gpuobj *ctxtab;
+	void (*grctx_init)(struct nouveau_gpuobj *);
+	u32 grctx_size;
+	u32 grctx_user;
+};
+
 #define NV20_GRCTX_SIZE (3580*4)
 #define NV25_GRCTX_SIZE (3529*4)
 #define NV2A_GRCTX_SIZE (3500*4)
@@ -32,12 +40,54 @@
 #define NV34_GRCTX_SIZE    (18140)
 #define NV35_36_GRCTX_SIZE (22396)
 
-static int nv20_graph_register(struct drm_device *);
-static int nv30_graph_register(struct drm_device *);
-static void nv20_graph_isr(struct drm_device *);
+int
+nv20_graph_unload_context(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nouveau_channel *chan;
+	struct nouveau_gpuobj *grctx;
+	u32 tmp;
+
+	chan = nv10_graph_channel(dev);
+	if (!chan)
+		return 0;
+	grctx = chan->engctx[NVOBJ_ENGINE_GR];
+
+	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, grctx->pinst >> 4);
+	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
+		     NV20_PGRAPH_CHANNEL_CTX_XFER_SAVE);
+
+	nouveau_wait_for_idle(dev);
+
+	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
+	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
+	tmp |= (pfifo->channels - 1) << 24;
+	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
+	return 0;
+}
+
+static void
+nv20_graph_rdi(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	int i, writecount = 32;
+	uint32_t rdi_index = 0x2c80000;
+
+	if (dev_priv->chipset == 0x20) {
+		rdi_index = 0x3d0000;
+		writecount = 15;
+	}
+
+	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, rdi_index);
+	for (i = 0; i < writecount; i++)
+		nv_wr32(dev, NV10_PGRAPH_RDI_DATA, 0);
+
+	nouveau_wait_for_idle(dev);
+}
 
 static void
-nv20_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv20_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -87,7 +137,7 @@ nv20_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 static void
-nv25_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv25_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -146,7 +196,7 @@ nv25_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 static void
-nv2a_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv2a_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -196,7 +246,7 @@ nv2a_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 static void
-nv30_31_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv30_31_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -254,7 +304,7 @@ nv30_31_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 static void
-nv34_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv34_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -312,7 +362,7 @@ nv34_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 static void
-nv35_36_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
+nv35_36_graph_context_init(struct nouveau_gpuobj *ctx)
 {
 	int i;
 
@@ -370,148 +420,57 @@ nv35_36_graph_context_init(struct drm_device *dev, struct nouveau_gpuobj *ctx)
 }
 
 int
-nv20_graph_create_context(struct nouveau_channel *chan)
+nv20_graph_context_new(struct nouveau_channel *chan, int engine)
 {
+	struct nv20_graph_engine *pgraph = nv_engine(chan->dev, engine);
+	struct nouveau_gpuobj *grctx = NULL;
 	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	void (*ctx_init)(struct drm_device *, struct nouveau_gpuobj *);
-	unsigned int idoffs = 0x28;
 	int ret;
 
-	switch (dev_priv->chipset) {
-	case 0x20:
-		ctx_init = nv20_graph_context_init;
-		idoffs = 0;
-		break;
-	case 0x25:
-	case 0x28:
-		ctx_init = nv25_graph_context_init;
-		break;
-	case 0x2a:
-		ctx_init = nv2a_graph_context_init;
-		idoffs = 0;
-		break;
-	case 0x30:
-	case 0x31:
-		ctx_init = nv30_31_graph_context_init;
-		break;
-	case 0x34:
-		ctx_init = nv34_graph_context_init;
-		break;
-	case 0x35:
-	case 0x36:
-		ctx_init = nv35_36_graph_context_init;
-		break;
-	default:
-		BUG_ON(1);
-	}
-
-	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
-				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
+	ret = nouveau_gpuobj_new(dev, NULL, pgraph->grctx_size, 16,
+				 NVOBJ_FLAG_ZERO_ALLOC, &grctx);
 	if (ret)
 		return ret;
 
 	/* Initialise default context values */
-	ctx_init(dev, chan->ramin_grctx);
+	pgraph->grctx_init(grctx);
 
 	/* nv20: nv_wo32(dev, chan->ramin_grctx->gpuobj, 10, chan->id<<24); */
-	nv_wo32(chan->ramin_grctx, idoffs,
-		(chan->id << 24) | 0x1); /* CTX_USER */
+	/* CTX_USER */
+	nv_wo32(grctx, pgraph->grctx_user, (chan->id << 24) | 0x1);
 
-	nv_wo32(pgraph->ctx_table, chan->id * 4, chan->ramin_grctx->pinst >> 4);
+	nv_wo32(pgraph->ctxtab, chan->id * 4, grctx->pinst >> 4);
+	chan->engctx[engine] = grctx;
 	return 0;
 }
 
 void
-nv20_graph_destroy_context(struct nouveau_channel *chan)
+nv20_graph_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nv20_graph_engine *pgraph = nv_engine(chan->dev, engine);
+	struct nouveau_gpuobj *grctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	unsigned long flags;
 
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pgraph->fifo_access(dev, false);
+	nv04_graph_fifo_access(dev, false);
 
 	/* Unload the context if it's the currently active one */
-	if (pgraph->channel(dev) == chan)
-		pgraph->unload_context(dev);
+	if (nv10_graph_channel(dev) == chan)
+		nv20_graph_unload_context(dev);
 
-	pgraph->fifo_access(dev, true);
+	nv04_graph_fifo_access(dev, true);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
 	/* Free the context resources */
-	nv_wo32(pgraph->ctx_table, chan->id * 4, 0);
-	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
-}
-
-int
-nv20_graph_load_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	uint32_t inst;
+	nv_wo32(pgraph->ctxtab, chan->id * 4, 0);
 
-	if (!chan->ramin_grctx)
-		return -EINVAL;
-	inst = chan->ramin_grctx->pinst >> 4;
-
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
-		     NV20_PGRAPH_CHANNEL_CTX_XFER_LOAD);
-	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10010100);
-
-	nouveau_wait_for_idle(dev);
-	return 0;
-}
-
-int
-nv20_graph_unload_context(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_channel *chan;
-	uint32_t inst, tmp;
-
-	chan = pgraph->channel(dev);
-	if (!chan)
-		return 0;
-	inst = chan->ramin_grctx->pinst >> 4;
-
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_XFER,
-		     NV20_PGRAPH_CHANNEL_CTX_XFER_SAVE);
-
-	nouveau_wait_for_idle(dev);
-
-	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000000);
-	tmp  = nv_rd32(dev, NV10_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= (pfifo->channels - 1) << 24;
-	nv_wr32(dev, NV10_PGRAPH_CTX_USER, tmp);
-	return 0;
+	nouveau_gpuobj_ref(NULL, &grctx);
+	chan->engctx[engine] = NULL;
 }
 
 static void
-nv20_graph_rdi(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	int i, writecount = 32;
-	uint32_t rdi_index = 0x2c80000;
-
-	if (dev_priv->chipset == 0x20) {
-		rdi_index = 0x3d0000;
-		writecount = 15;
-	}
-
-	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, rdi_index);
-	for (i = 0; i < writecount; i++)
-		nv_wr32(dev, NV10_PGRAPH_RDI_DATA, 0);
-
-	nouveau_wait_for_idle(dev);
-}
-
-void
 nv20_graph_set_tile_region(struct drm_device *dev, int i)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -536,56 +495,22 @@ nv20_graph_set_tile_region(struct drm_device *dev, int i)
 }
 
 int
-nv20_graph_init(struct drm_device *dev)
+nv20_graph_init(struct drm_device *dev, int engine)
 {
+	struct nv20_graph_engine *pgraph = nv_engine(dev, engine);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	uint32_t tmp, vramsz;
-	int ret, i;
-
-	switch (dev_priv->chipset) {
-	case 0x20:
-		pgraph->grctx_size = NV20_GRCTX_SIZE;
-		break;
-	case 0x25:
-	case 0x28:
-		pgraph->grctx_size = NV25_GRCTX_SIZE;
-		break;
-	case 0x2a:
-		pgraph->grctx_size = NV2A_GRCTX_SIZE;
-		break;
-	default:
-		NV_ERROR(dev, "unknown chipset, disabling acceleration\n");
-		pgraph->accel_blocked = true;
-		return 0;
-	}
+	int i;
 
 	nv_wr32(dev, NV03_PMC_ENABLE,
 		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PGRAPH);
 	nv_wr32(dev, NV03_PMC_ENABLE,
 		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PGRAPH);
 
-	if (!pgraph->ctx_table) {
-		/* Create Context Pointer Table */
-		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
-					 NVOBJ_FLAG_ZERO_ALLOC,
-					 &pgraph->ctx_table);
-		if (ret)
-			return ret;
-	}
-
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
-		     pgraph->ctx_table->pinst >> 4);
+	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE, pgraph->ctxtab->pinst >> 4);
 
 	nv20_graph_rdi(dev);
 
-	ret = nv20_graph_register(dev);
-	if (ret) {
-		nouveau_gpuobj_ref(NULL, &pgraph->ctx_table);
-		return ret;
-	}
-
-	nouveau_irq_register(dev, 12, nv20_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
@@ -657,67 +582,20 @@ nv20_graph_init(struct drm_device *dev)
 	return 0;
 }
 
-void
-nv20_graph_takedown(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-
-	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000);
-	nouveau_irq_unregister(dev, 12);
-
-	nouveau_gpuobj_ref(NULL, &pgraph->ctx_table);
-}
-
 int
-nv30_graph_init(struct drm_device *dev)
+nv30_graph_init(struct drm_device *dev, int engine)
 {
+	struct nv20_graph_engine *pgraph = nv_engine(dev, engine);
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	int ret, i;
-
-	switch (dev_priv->chipset) {
-	case 0x30:
-	case 0x31:
-		pgraph->grctx_size = NV30_31_GRCTX_SIZE;
-		break;
-	case 0x34:
-		pgraph->grctx_size = NV34_GRCTX_SIZE;
-		break;
-	case 0x35:
-	case 0x36:
-		pgraph->grctx_size = NV35_36_GRCTX_SIZE;
-		break;
-	default:
-		NV_ERROR(dev, "unknown chipset, disabling acceleration\n");
-		pgraph->accel_blocked = true;
-		return 0;
-	}
+	int i;
 
 	nv_wr32(dev, NV03_PMC_ENABLE,
 		nv_rd32(dev, NV03_PMC_ENABLE) & ~NV_PMC_ENABLE_PGRAPH);
 	nv_wr32(dev, NV03_PMC_ENABLE,
 		nv_rd32(dev, NV03_PMC_ENABLE) |  NV_PMC_ENABLE_PGRAPH);
 
-	if (!pgraph->ctx_table) {
-		/* Create Context Pointer Table */
-		ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16,
-					 NVOBJ_FLAG_ZERO_ALLOC,
-					 &pgraph->ctx_table);
-		if (ret)
-			return ret;
-	}
-
-	ret = nv30_graph_register(dev);
-	if (ret) {
-		nouveau_gpuobj_ref(NULL, &pgraph->ctx_table);
-		return ret;
-	}
+	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE, pgraph->ctxtab->pinst >> 4);
 
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_TABLE,
-		     pgraph->ctx_table->pinst >> 4);
-
-	nouveau_irq_register(dev, 12, nv20_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
 	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
@@ -775,85 +653,11 @@ nv30_graph_init(struct drm_device *dev)
 	return 0;
 }
 
-static int
-nv20_graph_register(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (dev_priv->engine.graph.registered)
-		return 0;
-
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
-	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
-	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
-	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
-	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
-	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
-	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
-	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
-	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
-	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
-	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
-	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
-	NVOBJ_CLASS(dev, 0x009e, GR); /* swzsurf */
-	NVOBJ_CLASS(dev, 0x0096, GR); /* celcius */
-
-	/* kelvin */
-	if (dev_priv->chipset < 0x25)
-		NVOBJ_CLASS(dev, 0x0097, GR);
-	else
-		NVOBJ_CLASS(dev, 0x0597, GR);
-
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
-	dev_priv->engine.graph.registered = true;
-	return 0;
-}
-
-static int
-nv30_graph_register(struct drm_device *dev)
+int
+nv20_graph_fini(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (dev_priv->engine.graph.registered)
-		return 0;
-
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
-	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
-	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
-	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
-	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
-	NVOBJ_CLASS(dev, 0x038a, GR); /* ifc (nv30) */
-	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
-	NVOBJ_CLASS(dev, 0x0389, GR); /* sifm (nv30) */
-	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
-	NVOBJ_CLASS(dev, 0x0362, GR); /* surf2d (nv30) */
-	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
-	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
-	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
-	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
-	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
-	NVOBJ_CLASS(dev, 0x039e, GR); /* swzsurf */
-
-	/* rankine */
-	if (0x00000003 & (1 << (dev_priv->chipset & 0x0f)))
-		NVOBJ_CLASS(dev, 0x0397, GR);
-	else
-	if (0x00000010 & (1 << (dev_priv->chipset & 0x0f)))
-		NVOBJ_CLASS(dev, 0x0697, GR);
-	else
-	if (0x000001e0 & (1 << (dev_priv->chipset & 0x0f)))
-		NVOBJ_CLASS(dev, 0x0497, GR);
-
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
-	dev_priv->engine.graph.registered = true;
+	nv20_graph_unload_context(dev);
+	nv_wr32(dev, NV03_PGRAPH_INTR_EN, 0x00000000);
 	return 0;
 }
 
@@ -897,3 +701,135 @@ nv20_graph_isr(struct drm_device *dev)
 		}
 	}
 }
+
+static void
+nv20_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nv20_graph_engine *pgraph = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 12);
+	nouveau_gpuobj_ref(NULL, &pgraph->ctxtab);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+	kfree(pgraph);
+}
+
+int
+nv20_graph_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv20_graph_engine *pgraph;
+	int ret;
+
+	pgraph = kzalloc(sizeof(*pgraph), GFP_KERNEL);
+	if (!pgraph)
+		return -ENOMEM;
+
+	pgraph->base.destroy = nv20_graph_destroy;
+	pgraph->base.fini = nv20_graph_fini;
+	pgraph->base.context_new = nv20_graph_context_new;
+	pgraph->base.context_del = nv20_graph_context_del;
+	pgraph->base.object_new = nv04_graph_object_new;
+	pgraph->base.set_tile_region = nv20_graph_set_tile_region;
+
+	pgraph->grctx_user = 0x0028;
+	if (dev_priv->card_type == NV_20) {
+		pgraph->base.init = nv20_graph_init;
+		switch (dev_priv->chipset) {
+		case 0x20:
+			pgraph->grctx_init = nv20_graph_context_init;
+			pgraph->grctx_size = NV20_GRCTX_SIZE;
+			pgraph->grctx_user = 0x0000;
+			break;
+		case 0x25:
+		case 0x28:
+			pgraph->grctx_init = nv25_graph_context_init;
+			pgraph->grctx_size = NV25_GRCTX_SIZE;
+			break;
+		case 0x2a:
+			pgraph->grctx_init = nv2a_graph_context_init;
+			pgraph->grctx_size = NV2A_GRCTX_SIZE;
+			pgraph->grctx_user = 0x0000;
+			break;
+		default:
+			NV_ERROR(dev, "PGRAPH: unknown chipset\n");
+			return 0;
+		}
+	} else {
+		pgraph->base.init = nv30_graph_init;
+		switch (dev_priv->chipset) {
+		case 0x30:
+		case 0x31:
+			pgraph->grctx_init = nv30_31_graph_context_init;
+			pgraph->grctx_size = NV30_31_GRCTX_SIZE;
+			break;
+		case 0x34:
+			pgraph->grctx_init = nv34_graph_context_init;
+			pgraph->grctx_size = NV34_GRCTX_SIZE;
+			break;
+		case 0x35:
+		case 0x36:
+			pgraph->grctx_init = nv35_36_graph_context_init;
+			pgraph->grctx_size = NV35_36_GRCTX_SIZE;
+			break;
+		default:
+			NV_ERROR(dev, "PGRAPH: unknown chipset\n");
+			return 0;
+		}
+	}
+
+	/* Create Context Pointer Table */
+	ret = nouveau_gpuobj_new(dev, NULL, 32 * 4, 16, NVOBJ_FLAG_ZERO_ALLOC,
+				 &pgraph->ctxtab);
+	if (ret) {
+		kfree(pgraph);
+		return ret;
+	}
+
+	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
+	nouveau_irq_register(dev, 12, nv20_graph_isr);
+
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+
+	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
+	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
+	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
+	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
+	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
+	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
+	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
+	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
+	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
+	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
+	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
+	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
+	if (dev_priv->card_type == NV_20) {
+		NVOBJ_CLASS(dev, 0x009e, GR); /* swzsurf */
+		NVOBJ_CLASS(dev, 0x0096, GR); /* celcius */
+
+		/* kelvin */
+		if (dev_priv->chipset < 0x25)
+			NVOBJ_CLASS(dev, 0x0097, GR);
+		else
+			NVOBJ_CLASS(dev, 0x0597, GR);
+	} else {
+		NVOBJ_CLASS(dev, 0x038a, GR); /* ifc (nv30) */
+		NVOBJ_CLASS(dev, 0x0389, GR); /* sifm (nv30) */
+		NVOBJ_CLASS(dev, 0x0362, GR); /* surf2d (nv30) */
+		NVOBJ_CLASS(dev, 0x039e, GR); /* swzsurf */
+
+		/* rankine */
+		if (0x00000003 & (1 << (dev_priv->chipset & 0x0f)))
+			NVOBJ_CLASS(dev, 0x0397, GR);
+		else
+		if (0x00000010 & (1 << (dev_priv->chipset & 0x0f)))
+			NVOBJ_CLASS(dev, 0x0697, GR);
+		else
+		if (0x000001e0 & (1 << (dev_priv->chipset & 0x0f)))
+			NVOBJ_CLASS(dev, 0x0497, GR);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv40_fifo.c b/drivers/gpu/drm/nouveau/nv40_fifo.c
index 49b9a35a9cd6..68cb2d991c88 100644
--- a/drivers/gpu/drm/nouveau/nv40_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv40_fifo.c
@@ -115,6 +115,7 @@ nv40_fifo_do_load_context(struct drm_device *dev, int chid)
 	nv_wr32(dev, 0x32e8, nv_ri32(dev, fc + 68));
 	nv_wr32(dev, 0x2088, nv_ri32(dev, fc + 76));
 	nv_wr32(dev, 0x3300, nv_ri32(dev, fc + 80));
+	nv_wr32(dev, 0x330c, nv_ri32(dev, fc + 84));
 
 	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
@@ -186,6 +187,7 @@ nv40_fifo_unload_context(struct drm_device *dev)
 	tmp |= (nv_rd32(dev, NV04_PFIFO_CACHE1_PUT) << 16);
 	nv_wi32(dev, fc + 72, tmp);
 #endif
+	nv_wi32(dev, fc + 84, nv_rd32(dev, 0x330c));
 
 	nv40_fifo_do_load_context(dev, pfifo->channels - 1);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1,
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index fceb44c0ec74..5beb01b8ace1 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -28,14 +28,18 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 #include "nouveau_grctx.h"
+#include "nouveau_ramht.h"
 
-static int nv40_graph_register(struct drm_device *);
-static void nv40_graph_isr(struct drm_device *);
+struct nv40_graph_engine {
+	struct nouveau_exec_engine base;
+	u32 grctx_size;
+};
 
-struct nouveau_channel *
+static struct nouveau_channel *
 nv40_graph_channel(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *grctx;
 	uint32_t inst;
 	int i;
 
@@ -45,74 +49,17 @@ nv40_graph_channel(struct drm_device *dev)
 	inst = (inst & NV40_PGRAPH_CTXCTL_CUR_INSTANCE) << 4;
 
 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+		if (!dev_priv->channels.ptr[i])
+			continue;
 
-		if (chan && chan->ramin_grctx &&
-		    chan->ramin_grctx->pinst == inst)
-			return chan;
+		grctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_GR];
+		if (grctx && grctx->pinst == inst)
+			return dev_priv->channels.ptr[i];
 	}
 
 	return NULL;
 }
 
-int
-nv40_graph_create_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_grctx ctx = {};
-	unsigned long flags;
-	int ret;
-
-	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 16,
-				 NVOBJ_FLAG_ZERO_ALLOC, &chan->ramin_grctx);
-	if (ret)
-		return ret;
-
-	/* Initialise default context values */
-	ctx.dev = chan->dev;
-	ctx.mode = NOUVEAU_GRCTX_VALS;
-	ctx.data = chan->ramin_grctx;
-	nv40_grctx_init(&ctx);
-
-	nv_wo32(chan->ramin_grctx, 0, chan->ramin_grctx->pinst);
-
-	/* init grctx pointer in ramfc, and on PFIFO if channel is
-	 * already active there
-	 */
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	nv_wo32(chan->ramfc, 0x38, chan->ramin_grctx->pinst >> 4);
-	nv_mask(dev, 0x002500, 0x00000001, 0x00000000);
-	if ((nv_rd32(dev, 0x003204) & 0x0000001f) == chan->id)
-		nv_wr32(dev, 0x0032e0, chan->ramin_grctx->pinst >> 4);
-	nv_mask(dev, 0x002500, 0x00000001, 0x00000001);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-	return 0;
-}
-
-void
-nv40_graph_destroy_context(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	unsigned long flags;
-
-	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
-	pgraph->fifo_access(dev, false);
-
-	/* Unload the context if it's the currently active one */
-	if (pgraph->channel(dev) == chan)
-		pgraph->unload_context(dev);
-
-	pgraph->fifo_access(dev, true);
-	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
-
-	/* Free the context resources */
-	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
-}
-
 static int
 nv40_graph_transfer_context(struct drm_device *dev, uint32_t inst, int save)
 {
@@ -154,57 +101,115 @@ nv40_graph_transfer_context(struct drm_device *dev, uint32_t inst, int save)
 	return 0;
 }
 
-/* Restore the context for a specific channel into PGRAPH */
-int
-nv40_graph_load_context(struct nouveau_channel *chan)
+static int
+nv40_graph_unload_context(struct drm_device *dev)
 {
-	struct drm_device *dev = chan->dev;
 	uint32_t inst;
 	int ret;
 
-	if (!chan->ramin_grctx)
-		return -EINVAL;
-	inst = chan->ramin_grctx->pinst >> 4;
+	inst = nv_rd32(dev, NV40_PGRAPH_CTXCTL_CUR);
+	if (!(inst & NV40_PGRAPH_CTXCTL_CUR_LOADED))
+		return 0;
+	inst &= NV40_PGRAPH_CTXCTL_CUR_INSTANCE;
+
+	ret = nv40_graph_transfer_context(dev, inst, 1);
+
+	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, inst);
+	return ret;
+}
+
+static int
+nv40_graph_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nv40_graph_engine *pgraph = nv_engine(chan->dev, engine);
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *grctx = NULL;
+	struct nouveau_grctx ctx = {};
+	unsigned long flags;
+	int ret;
 
-	ret = nv40_graph_transfer_context(dev, inst, 0);
+	ret = nouveau_gpuobj_new(dev, NULL, pgraph->grctx_size, 16,
+				 NVOBJ_FLAG_ZERO_ALLOC, &grctx);
 	if (ret)
 		return ret;
 
-	/* 0x40032C, no idea of it's exact function.  Could simply be a
-	 * record of the currently active PGRAPH context.  It's currently
-	 * unknown as to what bit 24 does.  The nv ddx has it set, so we will
-	 * set it here too.
-	 */
-	nv_wr32(dev, NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR,
-		 (inst & NV40_PGRAPH_CTXCTL_CUR_INSTANCE) |
-		  NV40_PGRAPH_CTXCTL_CUR_LOADED);
-	/* 0x32E0 records the instance address of the active FIFO's PGRAPH
-	 * context.  If at any time this doesn't match 0x40032C, you will
-	 * receive PGRAPH_INTR_CONTEXT_SWITCH
+	/* Initialise default context values */
+	ctx.dev = chan->dev;
+	ctx.mode = NOUVEAU_GRCTX_VALS;
+	ctx.data = grctx;
+	nv40_grctx_init(&ctx);
+
+	nv_wo32(grctx, 0, grctx->vinst);
+
+	/* init grctx pointer in ramfc, and on PFIFO if channel is
+	 * already active there
 	 */
-	nv_wr32(dev, NV40_PFIFO_GRCTX_INSTANCE, inst);
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_wo32(chan->ramfc, 0x38, grctx->vinst >> 4);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000000);
+	if ((nv_rd32(dev, 0x003204) & 0x0000001f) == chan->id)
+		nv_wr32(dev, 0x0032e0, grctx->vinst >> 4);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	chan->engctx[engine] = grctx;
 	return 0;
 }
 
-int
-nv40_graph_unload_context(struct drm_device *dev)
+static void
+nv40_graph_context_del(struct nouveau_channel *chan, int engine)
 {
-	uint32_t inst;
-	int ret;
+	struct nouveau_gpuobj *grctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	unsigned long flags;
 
-	inst = nv_rd32(dev, NV40_PGRAPH_CTXCTL_CUR);
-	if (!(inst & NV40_PGRAPH_CTXCTL_CUR_LOADED))
-		return 0;
-	inst &= NV40_PGRAPH_CTXCTL_CUR_INSTANCE;
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv04_graph_fifo_access(dev, false);
 
-	ret = nv40_graph_transfer_context(dev, inst, 1);
+	/* Unload the context if it's the currently active one */
+	if (nv40_graph_channel(dev) == chan)
+		nv40_graph_unload_context(dev);
 
-	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, inst);
+	nv04_graph_fifo_access(dev, true);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	/* Free the context resources */
+	nouveau_gpuobj_ref(NULL, &grctx);
+	chan->engctx[engine] = NULL;
+}
+
+int
+nv40_graph_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 20, 16, NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 1;
+	obj->class  = class;
+
+	nv_wo32(obj, 0x00, class);
+	nv_wo32(obj, 0x04, 0x00000000);
+#ifndef __BIG_ENDIAN
+	nv_wo32(obj, 0x08, 0x00000000);
+#else
+	nv_wo32(obj, 0x08, 0x01000000);
+#endif
+	nv_wo32(obj, 0x0c, 0x00000000);
+	nv_wo32(obj, 0x10, 0x00000000);
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
 	return ret;
 }
 
-void
+static void
 nv40_graph_set_tile_region(struct drm_device *dev, int i)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -257,14 +262,14 @@ nv40_graph_set_tile_region(struct drm_device *dev, int i)
  * C51		0x4e
  */
 int
-nv40_graph_init(struct drm_device *dev)
+nv40_graph_init(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv =
-		(struct drm_nouveau_private *)dev->dev_private;
+	struct nv40_graph_engine *pgraph = nv_engine(dev, engine);
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	struct nouveau_grctx ctx = {};
 	uint32_t vramsz, *cp;
-	int ret, i, j;
+	int i, j;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
 			~NV_PMC_ENABLE_PGRAPH);
@@ -280,7 +285,7 @@ nv40_graph_init(struct drm_device *dev)
 	ctx.data = cp;
 	ctx.ctxprog_max = 256;
 	nv40_grctx_init(&ctx);
-	dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
+	pgraph->grctx_size = ctx.ctxvals_pos * 4;
 
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
 	for (i = 0; i < ctx.ctxprog_len; i++)
@@ -288,14 +293,9 @@ nv40_graph_init(struct drm_device *dev)
 
 	kfree(cp);
 
-	ret = nv40_graph_register(dev);
-	if (ret)
-		return ret;
-
 	/* No context present currently */
 	nv_wr32(dev, NV40_PGRAPH_CTXCTL_CUR, 0x00000000);
 
-	nouveau_irq_register(dev, 12, nv40_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR   , 0xFFFFFFFF);
 	nv_wr32(dev, NV40_PGRAPH_INTR_EN, 0xFFFFFFFF);
 
@@ -428,47 +428,10 @@ nv40_graph_init(struct drm_device *dev)
 	return 0;
 }
 
-void nv40_graph_takedown(struct drm_device *dev)
-{
-	nouveau_irq_unregister(dev, 12);
-}
-
 static int
-nv40_graph_register(struct drm_device *dev)
+nv40_graph_fini(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (dev_priv->engine.graph.registered)
-		return 0;
-
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
-	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
-	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
-	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
-	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
-	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
-	NVOBJ_CLASS(dev, 0x3089, GR); /* sifm (nv40) */
-	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
-	NVOBJ_CLASS(dev, 0x3062, GR); /* surf2d (nv40) */
-	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
-	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
-	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
-	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
-	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
-	NVOBJ_CLASS(dev, 0x309e, GR); /* swzsurf */
-
-	/* curie */
-	if (nv44_graph_class(dev))
-		NVOBJ_CLASS(dev, 0x4497, GR);
-	else
-		NVOBJ_CLASS(dev, 0x4097, GR);
-
-	/* nvsw */
-	NVOBJ_CLASS(dev, 0x506e, SW);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
-
-	dev_priv->engine.graph.registered = true;
+	nv40_graph_unload_context(dev);
 	return 0;
 }
 
@@ -476,17 +439,17 @@ static int
 nv40_graph_isr_chid(struct drm_device *dev, u32 inst)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_channel *chan;
+	struct nouveau_gpuobj *grctx;
 	unsigned long flags;
 	int i;
 
 	spin_lock_irqsave(&dev_priv->channels.lock, flags);
 	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		chan = dev_priv->channels.ptr[i];
-		if (!chan || !chan->ramin_grctx)
+		if (!dev_priv->channels.ptr[i])
 			continue;
+		grctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_GR];
 
-		if (inst == chan->ramin_grctx->pinst)
+		if (grctx && grctx->pinst == inst)
 			break;
 	}
 	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
@@ -537,3 +500,63 @@ nv40_graph_isr(struct drm_device *dev)
 		}
 	}
 }
+
+static void
+nv40_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nv40_graph_engine *pgraph = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 12);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+	kfree(pgraph);
+}
+
+int
+nv40_graph_create(struct drm_device *dev)
+{
+	struct nv40_graph_engine *pgraph;
+
+	pgraph = kzalloc(sizeof(*pgraph), GFP_KERNEL);
+	if (!pgraph)
+		return -ENOMEM;
+
+	pgraph->base.destroy = nv40_graph_destroy;
+	pgraph->base.init = nv40_graph_init;
+	pgraph->base.fini = nv40_graph_fini;
+	pgraph->base.context_new = nv40_graph_context_new;
+	pgraph->base.context_del = nv40_graph_context_del;
+	pgraph->base.object_new = nv40_graph_object_new;
+	pgraph->base.set_tile_region = nv40_graph_set_tile_region;
+
+	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
+	nouveau_irq_register(dev, 12, nv40_graph_isr);
+
+	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
+	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
+	NVOBJ_CLASS(dev, 0x0039, GR); /* m2mf */
+	NVOBJ_CLASS(dev, 0x004a, GR); /* gdirect */
+	NVOBJ_CLASS(dev, 0x009f, GR); /* imageblit (nv12) */
+	NVOBJ_CLASS(dev, 0x008a, GR); /* ifc */
+	NVOBJ_CLASS(dev, 0x0089, GR); /* sifm */
+	NVOBJ_CLASS(dev, 0x3089, GR); /* sifm (nv40) */
+	NVOBJ_CLASS(dev, 0x0062, GR); /* surf2d */
+	NVOBJ_CLASS(dev, 0x3062, GR); /* surf2d (nv40) */
+	NVOBJ_CLASS(dev, 0x0043, GR); /* rop */
+	NVOBJ_CLASS(dev, 0x0012, GR); /* beta1 */
+	NVOBJ_CLASS(dev, 0x0072, GR); /* beta4 */
+	NVOBJ_CLASS(dev, 0x0019, GR); /* cliprect */
+	NVOBJ_CLASS(dev, 0x0044, GR); /* pattern */
+	NVOBJ_CLASS(dev, 0x309e, GR); /* swzsurf */
+
+	/* curie */
+	if (nv44_graph_class(dev))
+		NVOBJ_CLASS(dev, 0x4497, GR);
+	else
+		NVOBJ_CLASS(dev, 0x4097, GR);
+
+	/* nvsw */
+	NVOBJ_CLASS(dev, 0x506e, SW);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv04_graph_mthd_page_flip);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv40_mpeg.c b/drivers/gpu/drm/nouveau/nv40_mpeg.c
new file mode 100644
index 000000000000..6d2af292a2e3
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv40_mpeg.c
@@ -0,0 +1,311 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_ramht.h"
+
+struct nv40_mpeg_engine {
+	struct nouveau_exec_engine base;
+};
+
+static int
+nv40_mpeg_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *ctx = NULL;
+	unsigned long flags;
+	int ret;
+
+	NV_DEBUG(dev, "ch%d\n", chan->id);
+
+	ret = nouveau_gpuobj_new(dev, NULL, 264 * 4, 16, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &ctx);
+	if (ret)
+		return ret;
+
+	nv_wo32(ctx, 0x78, 0x02001ec1);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000000);
+	if ((nv_rd32(dev, 0x003204) & 0x1f) == chan->id)
+		nv_wr32(dev, 0x00330c, ctx->pinst >> 4);
+	nv_wo32(chan->ramfc, 0x54, ctx->pinst >> 4);
+	nv_mask(dev, 0x002500, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	chan->engctx[engine] = ctx;
+	return 0;
+}
+
+static void
+nv40_mpeg_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	unsigned long flags;
+	u32 inst = 0x80000000 | (ctx->pinst >> 4);
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
+	if (nv_rd32(dev, 0x00b318) == inst)
+		nv_mask(dev, 0x00b318, 0x80000000, 0x00000000);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	nouveau_gpuobj_ref(NULL, &ctx);
+	chan->engctx[engine] = NULL;
+}
+
+static int
+nv40_mpeg_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 20, 16, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 2;
+	obj->class  = class;
+
+	nv_wo32(obj, 0x00, class);
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
+}
+
+static int
+nv40_mpeg_init(struct drm_device *dev, int engine)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+	int i;
+
+	/* VPE init */
+	nv_mask(dev, 0x000200, 0x00000002, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00000002, 0x00000002);
+	nv_wr32(dev, 0x00b0e0, 0x00000020); /* nvidia: rd 0x01, wr 0x20 */
+	nv_wr32(dev, 0x00b0e8, 0x00000020); /* nvidia: rd 0x01, wr 0x20 */
+
+	for (i = 0; i < dev_priv->engine.fb.num_tiles; i++)
+		pmpeg->base.set_tile_region(dev, i);
+
+	/* PMPEG init */
+	nv_wr32(dev, 0x00b32c, 0x00000000);
+	nv_wr32(dev, 0x00b314, 0x00000100);
+	nv_wr32(dev, 0x00b220, 0x00000044);
+	nv_wr32(dev, 0x00b300, 0x02001ec1);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
+
+	nv_wr32(dev, 0x00b100, 0xffffffff);
+	nv_wr32(dev, 0x00b140, 0xffffffff);
+
+	if (!nv_wait(dev, 0x00b200, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "PMPEG init: 0x%08x\n", nv_rd32(dev, 0x00b200));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nv40_mpeg_fini(struct drm_device *dev, int engine)
+{
+	/*XXX: context save? */
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
+	nv_wr32(dev, 0x00b140, 0x00000000);
+	return 0;
+}
+
+static int
+nv40_mpeg_mthd_dma(struct nouveau_channel *chan, u32 class, u32 mthd, u32 data)
+{
+	struct drm_device *dev = chan->dev;
+	u32 inst = data << 4;
+	u32 dma0 = nv_ri32(dev, inst + 0);
+	u32 dma1 = nv_ri32(dev, inst + 4);
+	u32 dma2 = nv_ri32(dev, inst + 8);
+	u32 base = (dma2 & 0xfffff000) | (dma0 >> 20);
+	u32 size = dma1 + 1;
+
+	/* only allow linear DMA objects */
+	if (!(dma0 & 0x00002000))
+		return -EINVAL;
+
+	if (mthd == 0x0190) {
+		/* DMA_CMD */
+		nv_mask(dev, 0x00b300, 0x00030000, (dma0 & 0x00030000));
+		nv_wr32(dev, 0x00b334, base);
+		nv_wr32(dev, 0x00b324, size);
+	} else
+	if (mthd == 0x01a0) {
+		/* DMA_DATA */
+		nv_mask(dev, 0x00b300, 0x000c0000, (dma0 & 0x00030000) << 2);
+		nv_wr32(dev, 0x00b360, base);
+		nv_wr32(dev, 0x00b364, size);
+	} else {
+		/* DMA_IMAGE, VRAM only */
+		if (dma0 & 0x000c0000)
+			return -EINVAL;
+
+		nv_wr32(dev, 0x00b370, base);
+		nv_wr32(dev, 0x00b374, size);
+	}
+
+	return 0;
+}
+
+static int
+nv40_mpeg_isr_chid(struct drm_device *dev, u32 inst)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *ctx;
+	unsigned long flags;
+	int i;
+
+	spin_lock_irqsave(&dev_priv->channels.lock, flags);
+	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+		if (!dev_priv->channels.ptr[i])
+			continue;
+
+		ctx = dev_priv->channels.ptr[i]->engctx[NVOBJ_ENGINE_MPEG];
+		if (ctx && ctx->pinst == inst)
+			break;
+	}
+	spin_unlock_irqrestore(&dev_priv->channels.lock, flags);
+	return i;
+}
+
+static void
+nv40_vpe_set_tile_region(struct drm_device *dev, int i)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
+
+	nv_wr32(dev, 0x00b008 + (i * 0x10), tile->pitch);
+	nv_wr32(dev, 0x00b004 + (i * 0x10), tile->limit);
+	nv_wr32(dev, 0x00b000 + (i * 0x10), tile->addr);
+}
+
+static void
+nv40_mpeg_isr(struct drm_device *dev)
+{
+	u32 inst = (nv_rd32(dev, 0x00b318) & 0x000fffff) << 4;
+	u32 chid = nv40_mpeg_isr_chid(dev, inst);
+	u32 stat = nv_rd32(dev, 0x00b100);
+	u32 type = nv_rd32(dev, 0x00b230);
+	u32 mthd = nv_rd32(dev, 0x00b234);
+	u32 data = nv_rd32(dev, 0x00b238);
+	u32 show = stat;
+
+	if (stat & 0x01000000) {
+		/* happens on initial binding of the object */
+		if (type == 0x00000020 && mthd == 0x0000) {
+			nv_mask(dev, 0x00b308, 0x00000000, 0x00000000);
+			show &= ~0x01000000;
+		}
+
+		if (type == 0x00000010) {
+			if (!nouveau_gpuobj_mthd_call2(dev, chid, 0x3174, mthd, data))
+				show &= ~0x01000000;
+		}
+	}
+
+	nv_wr32(dev, 0x00b100, stat);
+	nv_wr32(dev, 0x00b230, 0x00000001);
+
+	if (show && nouveau_ratelimit()) {
+		NV_INFO(dev, "PMPEG: Ch %d [0x%08x] 0x%08x 0x%08x 0x%08x 0x%08x\n",
+			chid, inst, stat, type, mthd, data);
+	}
+}
+
+static void
+nv40_vpe_isr(struct drm_device *dev)
+{
+	if (nv_rd32(dev, 0x00b100))
+		nv40_mpeg_isr(dev);
+
+	if (nv_rd32(dev, 0x00b800)) {
+		u32 stat = nv_rd32(dev, 0x00b800);
+		NV_INFO(dev, "PMSRCH: 0x%08x\n", stat);
+		nv_wr32(dev, 0xb800, stat);
+	}
+}
+
+static void
+nv40_mpeg_destroy(struct drm_device *dev, int engine)
+{
+	struct nv40_mpeg_engine *pmpeg = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 0);
+
+	NVOBJ_ENGINE_DEL(dev, MPEG);
+	kfree(pmpeg);
+}
+
+int
+nv40_mpeg_create(struct drm_device *dev)
+{
+	struct nv40_mpeg_engine *pmpeg;
+
+	pmpeg = kzalloc(sizeof(*pmpeg), GFP_KERNEL);
+	if (!pmpeg)
+		return -ENOMEM;
+
+	pmpeg->base.destroy = nv40_mpeg_destroy;
+	pmpeg->base.init = nv40_mpeg_init;
+	pmpeg->base.fini = nv40_mpeg_fini;
+	pmpeg->base.context_new = nv40_mpeg_context_new;
+	pmpeg->base.context_del = nv40_mpeg_context_del;
+	pmpeg->base.object_new = nv40_mpeg_object_new;
+
+	/* ISR vector, PMC_ENABLE bit,  and TILE regs are shared between
+	 * all VPE engines, for this driver's purposes the PMPEG engine
+	 * will be treated as the "master" and handle the global VPE
+	 * bits too
+	 */
+	pmpeg->base.set_tile_region = nv40_vpe_set_tile_region;
+	nouveau_irq_register(dev, 0, nv40_vpe_isr);
+
+	NVOBJ_ENGINE_ADD(dev, MPEG, &pmpeg->base);
+	NVOBJ_CLASS(dev, 0x3174, MPEG);
+	NVOBJ_MTHD (dev, 0x3174, 0x0190, nv40_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01a0, nv40_mpeg_mthd_dma);
+	NVOBJ_MTHD (dev, 0x3174, 0x01b0, nv40_mpeg_mthd_dma);
+
+#if 0
+	NVOBJ_ENGINE_ADD(dev, ME, &pme->base);
+	NVOBJ_CLASS(dev, 0x4075, ME);
+#endif
+	return 0;
+
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_calc.c b/drivers/gpu/drm/nouveau/nv50_calc.c
index de81151648f8..8cf63a8b30cd 100644
--- a/drivers/gpu/drm/nouveau/nv50_calc.c
+++ b/drivers/gpu/drm/nouveau/nv50_calc.c
@@ -23,7 +23,6 @@
  */
 
 #include "drmP.h"
-#include "drm_fixed.h"
 #include "nouveau_drv.h"
 #include "nouveau_hw.h"
 
@@ -47,45 +46,52 @@ nv50_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
 }
 
 int
-nv50_calc_pll2(struct drm_device *dev, struct pll_lims *pll, int clk,
-	       int *N, int *fN, int *M, int *P)
+nva3_calc_pll(struct drm_device *dev, struct pll_lims *pll, int clk,
+	      int *pN, int *pfN, int *pM, int *P)
 {
-	fixed20_12 fb_div, a, b;
-	u32 refclk = pll->refclk / 10;
-	u32 max_vco_freq = pll->vco1.maxfreq / 10;
-	u32 max_vco_inputfreq = pll->vco1.max_inputfreq / 10;
-	clk /= 10;
+	u32 best_err = ~0, err;
+	int M, lM, hM, N, fN;
 
-	*P = max_vco_freq / clk;
+	*P = pll->vco1.maxfreq / clk;
 	if (*P > pll->max_p)
 		*P = pll->max_p;
 	if (*P < pll->min_p)
 		*P = pll->min_p;
 
-	/* *M = floor((refclk + max_vco_inputfreq) / max_vco_inputfreq); */
-	a.full = dfixed_const(refclk + max_vco_inputfreq);
-	b.full = dfixed_const(max_vco_inputfreq);
-	a.full = dfixed_div(a, b);
-	a.full = dfixed_floor(a);
-	*M = dfixed_trunc(a);
+	lM = (pll->refclk + pll->vco1.max_inputfreq) / pll->vco1.max_inputfreq;
+	lM = max(lM, (int)pll->vco1.min_m);
+	hM = (pll->refclk + pll->vco1.min_inputfreq) / pll->vco1.min_inputfreq;
+	hM = min(hM, (int)pll->vco1.max_m);
 
-	/* fb_div = (vco * *M) / refclk; */
-	fb_div.full = dfixed_const(clk * *P);
-	fb_div.full = dfixed_mul(fb_div, a);
-	a.full = dfixed_const(refclk);
-	fb_div.full = dfixed_div(fb_div, a);
+	for (M = lM; M <= hM; M++) {
+		u32 tmp = clk * *P * M;
+		N  = tmp / pll->refclk;
+		fN = tmp % pll->refclk;
+		if (!pfN && fN >= pll->refclk / 2)
+			N++;
 
-	/* *N = floor(fb_div); */
-	a.full = dfixed_floor(fb_div);
-	*N = dfixed_trunc(fb_div);
+		if (N < pll->vco1.min_n)
+			continue;
+		if (N > pll->vco1.max_n)
+			break;
 
-	/* *fN = (fmod(fb_div, 1.0) * 8192) - 4096; */
-	b.full = dfixed_const(8192);
-	a.full = dfixed_mul(a, b);
-	fb_div.full = dfixed_mul(fb_div, b);
-	fb_div.full = fb_div.full - a.full;
-	*fN = dfixed_trunc(fb_div) - 4096;
-	*fN &= 0xffff;
+		err = abs(clk - (pll->refclk * N / M / *P));
+		if (err < best_err) {
+			best_err = err;
+			*pN = N;
+			*pM = M;
+		}
 
-	return clk;
+		if (pfN) {
+			*pfN = (((fN << 13) / pll->refclk) - 4096) & 0xffff;
+			return clk;
+		}
+	}
+
+	if (unlikely(best_err == ~0)) {
+		NV_ERROR(dev, "unable to find matching pll values\n");
+		return -EINVAL;
+	}
+
+	return pll->refclk * *pN / *pM / *P;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index a19ccaa025b3..ebabacf38da9 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -286,7 +286,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 8, reg2 | (P << 28) | (M2 << 16) | N2);
 	} else
 	if (dev_priv->chipset < NV_C0) {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
@@ -298,7 +298,7 @@ nv50_crtc_set_clock(struct drm_device *dev, int head, int pclk)
 		nv_wr32(dev, pll.reg + 4, reg1 | (P << 16) | (M1 << 8) | N1);
 		nv_wr32(dev, pll.reg + 8, N2);
 	} else {
-		ret = nv50_calc_pll2(dev, &pll, pclk, &N1, &N2, &M1, &P);
+		ret = nva3_calc_pll(dev, &pll, pclk, &N1, &N2, &M1, &P);
 		if (ret <= 0)
 			return 0;
 
@@ -349,14 +349,14 @@ nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	struct drm_gem_object *gem;
 	int ret = 0, i;
 
-	if (width != 64 || height != 64)
-		return -EINVAL;
-
 	if (!buffer_handle) {
 		nv_crtc->cursor.hide(nv_crtc, true);
 		return 0;
 	}
 
+	if (width != 64 || height != 64)
+		return -EINVAL;
+
 	gem = drm_gem_object_lookup(dev, file_priv, buffer_handle);
 	if (!gem)
 		return -ENOENT;
@@ -532,8 +532,7 @@ nv50_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	if (atomic) {
 		drm_fb = passed_fb;
 		fb = nouveau_framebuffer(passed_fb);
-	}
-	else {
+	} else {
 		/* If not atomic, we can go ahead and pin, and unpin the
 		 * old fb we were passed.
 		 */
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 75a376cc342a..74a3f6872701 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -517,13 +517,25 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcb,
 			if (bios->fp.if_is_24bit)
 				script |= 0x0200;
 		} else {
+			/* determine number of lvds links */
+			if (nv_connector && nv_connector->edid &&
+			    nv_connector->dcb->type == DCB_CONNECTOR_LVDS_SPWG) {
+				/* http://www.spwg.org */
+				if (((u8 *)nv_connector->edid)[121] == 2)
+					script |= 0x0100;
+			} else
 			if (pxclk >= bios->fp.duallink_transition_clk) {
 				script |= 0x0100;
+			}
+
+			/* determine panel depth */
+			if (script & 0x0100) {
 				if (bios->fp.strapless_is_24bit & 2)
 					script |= 0x0200;
-			} else
-			if (bios->fp.strapless_is_24bit & 1)
-				script |= 0x0200;
+			} else {
+				if (bios->fp.strapless_is_24bit & 1)
+					script |= 0x0200;
+			}
 
 			if (nv_connector && nv_connector->edid &&
 			    (nv_connector->edid->revision >= 4) &&
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index b02a5b1e7d37..e25cbb46789a 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -31,10 +31,95 @@
 #include "nouveau_grctx.h"
 #include "nouveau_dma.h"
 #include "nouveau_vm.h"
+#include "nouveau_ramht.h"
 #include "nv50_evo.h"
 
-static int  nv50_graph_register(struct drm_device *);
-static void nv50_graph_isr(struct drm_device *);
+struct nv50_graph_engine {
+	struct nouveau_exec_engine base;
+	u32 ctxprog[512];
+	u32 ctxprog_size;
+	u32 grctx_size;
+};
+
+static void
+nv50_graph_fifo_access(struct drm_device *dev, bool enabled)
+{
+	const uint32_t mask = 0x00010001;
+
+	if (enabled)
+		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | mask);
+	else
+		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) & ~mask);
+}
+
+static struct nouveau_channel *
+nv50_graph_channel(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t inst;
+	int i;
+
+	/* Be sure we're not in the middle of a context switch or bad things
+	 * will happen, such as unloading the wrong pgraph context.
+	 */
+	if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000))
+		NV_ERROR(dev, "Ctxprog is still running\n");
+
+	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
+	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
+		return NULL;
+	inst = (inst & NV50_PGRAPH_CTXCTL_CUR_INSTANCE) << 12;
+
+	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
+		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
+
+		if (chan && chan->ramin && chan->ramin->vinst == inst)
+			return chan;
+	}
+
+	return NULL;
+}
+
+static int
+nv50_graph_do_load_context(struct drm_device *dev, uint32_t inst)
+{
+	uint32_t fifo = nv_rd32(dev, 0x400500);
+
+	nv_wr32(dev, 0x400500, fifo & ~1);
+	nv_wr32(dev, 0x400784, inst);
+	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x40);
+	nv_wr32(dev, 0x400320, nv_rd32(dev, 0x400320) | 0x11);
+	nv_wr32(dev, 0x400040, 0xffffffff);
+	(void)nv_rd32(dev, 0x400040);
+	nv_wr32(dev, 0x400040, 0x00000000);
+	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 1);
+
+	if (nouveau_wait_for_idle(dev))
+		nv_wr32(dev, 0x40032c, inst | (1<<31));
+	nv_wr32(dev, 0x400500, fifo);
+
+	return 0;
+}
+
+static int
+nv50_graph_unload_context(struct drm_device *dev)
+{
+	uint32_t inst;
+
+	inst  = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
+	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
+		return 0;
+	inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE;
+
+	nouveau_wait_for_idle(dev);
+	nv_wr32(dev, 0x400784, inst);
+	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20);
+	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 0x01);
+	nouveau_wait_for_idle(dev);
+
+	nv_wr32(dev, NV50_PGRAPH_CTXCTL_CUR, inst);
+	return 0;
+}
 
 static void
 nv50_graph_init_reset(struct drm_device *dev)
@@ -52,7 +137,6 @@ nv50_graph_init_intr(struct drm_device *dev)
 {
 	NV_DEBUG(dev, "\n");
 
-	nouveau_irq_register(dev, 12, nv50_graph_isr);
 	nv_wr32(dev, NV03_PGRAPH_INTR, 0xffffffff);
 	nv_wr32(dev, 0x400138, 0xffffffff);
 	nv_wr32(dev, NV40_PGRAPH_INTR_EN, 0xffffffff);
@@ -135,34 +219,14 @@ nv50_graph_init_zcull(struct drm_device *dev)
 static int
 nv50_graph_init_ctxctl(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_grctx ctx = {};
-	uint32_t *cp;
+	struct nv50_graph_engine *pgraph = nv_engine(dev, NVOBJ_ENGINE_GR);
 	int i;
 
 	NV_DEBUG(dev, "\n");
 
-	cp = kmalloc(512 * 4, GFP_KERNEL);
-	if (!cp) {
-		NV_ERROR(dev, "failed to allocate ctxprog\n");
-		dev_priv->engine.graph.accel_blocked = true;
-		return 0;
-	}
-
-	ctx.dev = dev;
-	ctx.mode = NOUVEAU_GRCTX_PROG;
-	ctx.data = cp;
-	ctx.ctxprog_max = 512;
-	if (!nv50_grctx_init(&ctx)) {
-		dev_priv->engine.graph.grctx_size = ctx.ctxvals_pos * 4;
-
-		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
-		for (i = 0; i < ctx.ctxprog_len; i++)
-			nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, cp[i]);
-	} else {
-		dev_priv->engine.graph.accel_blocked = true;
-	}
-	kfree(cp);
+	nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+	for (i = 0; i < pgraph->ctxprog_size; i++)
+		nv_wr32(dev, NV40_PGRAPH_CTXCTL_UCODE_DATA, pgraph->ctxprog[i]);
 
 	nv_wr32(dev, 0x40008c, 0x00000004); /* HW_CTX_SWITCH_ENABLED */
 	nv_wr32(dev, 0x400320, 4);
@@ -171,8 +235,8 @@ nv50_graph_init_ctxctl(struct drm_device *dev)
 	return 0;
 }
 
-int
-nv50_graph_init(struct drm_device *dev)
+static int
+nv50_graph_init(struct drm_device *dev, int engine)
 {
 	int ret;
 
@@ -186,105 +250,66 @@ nv50_graph_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
-	ret = nv50_graph_register(dev);
-	if (ret)
-		return ret;
 	nv50_graph_init_intr(dev);
 	return 0;
 }
 
-void
-nv50_graph_takedown(struct drm_device *dev)
+static int
+nv50_graph_fini(struct drm_device *dev, int engine)
 {
 	NV_DEBUG(dev, "\n");
+	nv50_graph_unload_context(dev);
 	nv_wr32(dev, 0x40013c, 0x00000000);
-	nouveau_irq_unregister(dev, 12);
-}
-
-void
-nv50_graph_fifo_access(struct drm_device *dev, bool enabled)
-{
-	const uint32_t mask = 0x00010001;
-
-	if (enabled)
-		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | mask);
-	else
-		nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) & ~mask);
-}
-
-struct nouveau_channel *
-nv50_graph_channel(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t inst;
-	int i;
-
-	/* Be sure we're not in the middle of a context switch or bad things
-	 * will happen, such as unloading the wrong pgraph context.
-	 */
-	if (!nv_wait(dev, 0x400300, 0x00000001, 0x00000000))
-		NV_ERROR(dev, "Ctxprog is still running\n");
-
-	inst = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
-	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
-		return NULL;
-	inst = (inst & NV50_PGRAPH_CTXCTL_CUR_INSTANCE) << 12;
-
-	for (i = 0; i < dev_priv->engine.fifo.channels; i++) {
-		struct nouveau_channel *chan = dev_priv->channels.ptr[i];
-
-		if (chan && chan->ramin && chan->ramin->vinst == inst)
-			return chan;
-	}
-
-	return NULL;
+	return 0;
 }
 
-int
-nv50_graph_create_context(struct nouveau_channel *chan)
+static int
+nv50_graph_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramin = chan->ramin;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+	struct nouveau_gpuobj *grctx = NULL;
+	struct nv50_graph_engine *pgraph = nv_engine(dev, engine);
 	struct nouveau_grctx ctx = {};
 	int hdr, ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	ret = nouveau_gpuobj_new(dev, chan, pgraph->grctx_size, 0,
+	ret = nouveau_gpuobj_new(dev, NULL, pgraph->grctx_size, 0,
 				 NVOBJ_FLAG_ZERO_ALLOC |
-				 NVOBJ_FLAG_ZERO_FREE, &chan->ramin_grctx);
+				 NVOBJ_FLAG_ZERO_FREE, &grctx);
 	if (ret)
 		return ret;
 
 	hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
 	nv_wo32(ramin, hdr + 0x00, 0x00190002);
-	nv_wo32(ramin, hdr + 0x04, chan->ramin_grctx->vinst +
-				   pgraph->grctx_size - 1);
-	nv_wo32(ramin, hdr + 0x08, chan->ramin_grctx->vinst);
+	nv_wo32(ramin, hdr + 0x04, grctx->vinst + grctx->size - 1);
+	nv_wo32(ramin, hdr + 0x08, grctx->vinst);
 	nv_wo32(ramin, hdr + 0x0c, 0);
 	nv_wo32(ramin, hdr + 0x10, 0);
 	nv_wo32(ramin, hdr + 0x14, 0x00010000);
 
 	ctx.dev = chan->dev;
 	ctx.mode = NOUVEAU_GRCTX_VALS;
-	ctx.data = chan->ramin_grctx;
+	ctx.data = grctx;
 	nv50_grctx_init(&ctx);
 
-	nv_wo32(chan->ramin_grctx, 0x00000, chan->ramin->vinst >> 12);
+	nv_wo32(grctx, 0x00000, chan->ramin->vinst >> 12);
 
 	dev_priv->engine.instmem.flush(dev);
-	atomic_inc(&chan->vm->pgraph_refs);
+
+	atomic_inc(&chan->vm->engref[NVOBJ_ENGINE_GR]);
+	chan->engctx[NVOBJ_ENGINE_GR] = grctx;
 	return 0;
 }
 
-void
-nv50_graph_destroy_context(struct nouveau_channel *chan)
+static void
+nv50_graph_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nouveau_gpuobj *grctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	int i, hdr = (dev_priv->chipset == 0x50) ? 0x200 : 0x20;
 	unsigned long flags;
@@ -296,72 +321,49 @@ nv50_graph_destroy_context(struct nouveau_channel *chan)
 
 	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
 	pfifo->reassign(dev, false);
-	pgraph->fifo_access(dev, false);
+	nv50_graph_fifo_access(dev, false);
 
-	if (pgraph->channel(dev) == chan)
-		pgraph->unload_context(dev);
+	if (nv50_graph_channel(dev) == chan)
+		nv50_graph_unload_context(dev);
 
 	for (i = hdr; i < hdr + 24; i += 4)
 		nv_wo32(chan->ramin, i, 0);
 	dev_priv->engine.instmem.flush(dev);
 
-	pgraph->fifo_access(dev, true);
+	nv50_graph_fifo_access(dev, true);
 	pfifo->reassign(dev, true);
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 
-	nouveau_gpuobj_ref(NULL, &chan->ramin_grctx);
+	nouveau_gpuobj_ref(NULL, &grctx);
 
-	atomic_dec(&chan->vm->pgraph_refs);
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
 }
 
 static int
-nv50_graph_do_load_context(struct drm_device *dev, uint32_t inst)
-{
-	uint32_t fifo = nv_rd32(dev, 0x400500);
-
-	nv_wr32(dev, 0x400500, fifo & ~1);
-	nv_wr32(dev, 0x400784, inst);
-	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x40);
-	nv_wr32(dev, 0x400320, nv_rd32(dev, 0x400320) | 0x11);
-	nv_wr32(dev, 0x400040, 0xffffffff);
-	(void)nv_rd32(dev, 0x400040);
-	nv_wr32(dev, 0x400040, 0x00000000);
-	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 1);
-
-	if (nouveau_wait_for_idle(dev))
-		nv_wr32(dev, 0x40032c, inst | (1<<31));
-	nv_wr32(dev, 0x400500, fifo);
-
-	return 0;
-}
-
-int
-nv50_graph_load_context(struct nouveau_channel *chan)
-{
-	uint32_t inst = chan->ramin->vinst >> 12;
-
-	NV_DEBUG(chan->dev, "ch%d\n", chan->id);
-	return nv50_graph_do_load_context(chan->dev, inst);
-}
-
-int
-nv50_graph_unload_context(struct drm_device *dev)
+nv50_graph_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
 {
-	uint32_t inst;
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
 
-	inst  = nv_rd32(dev, NV50_PGRAPH_CTXCTL_CUR);
-	if (!(inst & NV50_PGRAPH_CTXCTL_CUR_LOADED))
-		return 0;
-	inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE;
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 1;
+	obj->class  = class;
 
-	nouveau_wait_for_idle(dev);
-	nv_wr32(dev, 0x400784, inst);
-	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20);
-	nv_wr32(dev, 0x400304, nv_rd32(dev, 0x400304) | 0x01);
-	nouveau_wait_for_idle(dev);
+	nv_wo32(obj, 0x00, class);
+	nv_wo32(obj, 0x04, 0x00000000);
+	nv_wo32(obj, 0x08, 0x00000000);
+	nv_wo32(obj, 0x0c, 0x00000000);
+	dev_priv->engine.instmem.flush(dev);
 
-	nv_wr32(dev, NV50_PGRAPH_CTXCTL_CUR, inst);
-	return 0;
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
 }
 
 static void
@@ -442,68 +444,15 @@ nv50_graph_nvsw_mthd_page_flip(struct nouveau_channel *chan,
 	return 0;
 }
 
-static int
-nv50_graph_register(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-
-	if (dev_priv->engine.graph.registered)
-		return 0;
-
-	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
-	NVOBJ_MTHD (dev, 0x506e, 0x018c, nv50_graph_nvsw_dma_vblsem);
-	NVOBJ_MTHD (dev, 0x506e, 0x0400, nv50_graph_nvsw_vblsem_offset);
-	NVOBJ_MTHD (dev, 0x506e, 0x0404, nv50_graph_nvsw_vblsem_release_val);
-	NVOBJ_MTHD (dev, 0x506e, 0x0408, nv50_graph_nvsw_vblsem_release);
-	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv50_graph_nvsw_mthd_page_flip);
-
-	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
-	NVOBJ_CLASS(dev, 0x5039, GR); /* m2mf */
-	NVOBJ_CLASS(dev, 0x502d, GR); /* 2d */
-
-	/* tesla */
-	if (dev_priv->chipset == 0x50)
-		NVOBJ_CLASS(dev, 0x5097, GR); /* tesla (nv50) */
-	else
-	if (dev_priv->chipset < 0xa0)
-		NVOBJ_CLASS(dev, 0x8297, GR); /* tesla (nv8x/nv9x) */
-	else {
-		switch (dev_priv->chipset) {
-		case 0xa0:
-		case 0xaa:
-		case 0xac:
-			NVOBJ_CLASS(dev, 0x8397, GR);
-			break;
-		case 0xa3:
-		case 0xa5:
-		case 0xa8:
-			NVOBJ_CLASS(dev, 0x8597, GR);
-			break;
-		case 0xaf:
-			NVOBJ_CLASS(dev, 0x8697, GR);
-			break;
-		}
-	}
-
-	/* compute */
-	NVOBJ_CLASS(dev, 0x50c0, GR);
-	if (dev_priv->chipset  > 0xa0 &&
-	    dev_priv->chipset != 0xaa &&
-	    dev_priv->chipset != 0xac)
-		NVOBJ_CLASS(dev, 0x85c0, GR);
-
-	dev_priv->engine.graph.registered = true;
-	return 0;
-}
 
-void
-nv50_graph_tlb_flush(struct drm_device *dev)
+static void
+nv50_graph_tlb_flush(struct drm_device *dev, int engine)
 {
 	nv50_vm_flush_engine(dev, 0);
 }
 
-void
-nv84_graph_tlb_flush(struct drm_device *dev)
+static void
+nv84_graph_tlb_flush(struct drm_device *dev, int engine)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
@@ -548,8 +497,7 @@ nv84_graph_tlb_flush(struct drm_device *dev)
 	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
 }
 
-static struct nouveau_enum nv50_mp_exec_error_names[] =
-{
+static struct nouveau_enum nv50_mp_exec_error_names[] = {
 	{ 3, "STACK_UNDERFLOW", NULL },
 	{ 4, "QUADON_ACTIVE", NULL },
 	{ 8, "TIMEOUT", NULL },
@@ -663,7 +611,7 @@ nv50_pgraph_mp_trap(struct drm_device *dev, int tpid, int display)
 			nv_rd32(dev, addr + 0x20);
 			pc = nv_rd32(dev, addr + 0x24);
 			oplow = nv_rd32(dev, addr + 0x70);
-			ophigh= nv_rd32(dev, addr + 0x74);
+			ophigh = nv_rd32(dev, addr + 0x74);
 			NV_INFO(dev, "PGRAPH_TRAP_MP_EXEC - "
 					"TP %d MP %d: ", tpid, i);
 			nouveau_enum_print(nv50_mp_exec_error_names, status);
@@ -991,7 +939,7 @@ nv50_pgraph_trap_handler(struct drm_device *dev, u32 display, u64 inst, u32 chid
 	return 1;
 }
 
-static int
+int
 nv50_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -1073,3 +1021,101 @@ nv50_graph_isr(struct drm_device *dev)
 	if (nv_rd32(dev, 0x400824) & (1 << 31))
 		nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31));
 }
+
+static void
+nv50_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nv50_graph_engine *pgraph = nv_engine(dev, engine);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+
+	nouveau_irq_unregister(dev, 12);
+	kfree(pgraph);
+}
+
+int
+nv50_graph_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_graph_engine *pgraph;
+	struct nouveau_grctx ctx = {};
+	int ret;
+
+	pgraph = kzalloc(sizeof(*pgraph),GFP_KERNEL);
+	if (!pgraph)
+		return -ENOMEM;
+
+	ctx.dev = dev;
+	ctx.mode = NOUVEAU_GRCTX_PROG;
+	ctx.data = pgraph->ctxprog;
+	ctx.ctxprog_max = ARRAY_SIZE(pgraph->ctxprog);
+
+	ret = nv50_grctx_init(&ctx);
+	if (ret) {
+		NV_ERROR(dev, "PGRAPH: ctxprog build failed\n");
+		kfree(pgraph);
+		return 0;
+	}
+
+	pgraph->grctx_size = ctx.ctxvals_pos * 4;
+	pgraph->ctxprog_size = ctx.ctxprog_len;
+
+	pgraph->base.destroy = nv50_graph_destroy;
+	pgraph->base.init = nv50_graph_init;
+	pgraph->base.fini = nv50_graph_fini;
+	pgraph->base.context_new = nv50_graph_context_new;
+	pgraph->base.context_del = nv50_graph_context_del;
+	pgraph->base.object_new = nv50_graph_object_new;
+	if (dev_priv->chipset == 0x50 || dev_priv->chipset == 0xac)
+		pgraph->base.tlb_flush = nv50_graph_tlb_flush;
+	else
+		pgraph->base.tlb_flush = nv84_graph_tlb_flush;
+
+	nouveau_irq_register(dev, 12, nv50_graph_isr);
+
+	/* NVSW really doesn't live here... */
+	NVOBJ_CLASS(dev, 0x506e, SW); /* nvsw */
+	NVOBJ_MTHD (dev, 0x506e, 0x018c, nv50_graph_nvsw_dma_vblsem);
+	NVOBJ_MTHD (dev, 0x506e, 0x0400, nv50_graph_nvsw_vblsem_offset);
+	NVOBJ_MTHD (dev, 0x506e, 0x0404, nv50_graph_nvsw_vblsem_release_val);
+	NVOBJ_MTHD (dev, 0x506e, 0x0408, nv50_graph_nvsw_vblsem_release);
+	NVOBJ_MTHD (dev, 0x506e, 0x0500, nv50_graph_nvsw_mthd_page_flip);
+
+	NVOBJ_ENGINE_ADD(dev, GR, &pgraph->base);
+	NVOBJ_CLASS(dev, 0x0030, GR); /* null */
+	NVOBJ_CLASS(dev, 0x5039, GR); /* m2mf */
+	NVOBJ_CLASS(dev, 0x502d, GR); /* 2d */
+
+	/* tesla */
+	if (dev_priv->chipset == 0x50)
+		NVOBJ_CLASS(dev, 0x5097, GR); /* tesla (nv50) */
+	else
+	if (dev_priv->chipset < 0xa0)
+		NVOBJ_CLASS(dev, 0x8297, GR); /* tesla (nv8x/nv9x) */
+	else {
+		switch (dev_priv->chipset) {
+		case 0xa0:
+		case 0xaa:
+		case 0xac:
+			NVOBJ_CLASS(dev, 0x8397, GR);
+			break;
+		case 0xa3:
+		case 0xa5:
+		case 0xa8:
+			NVOBJ_CLASS(dev, 0x8597, GR);
+			break;
+		case 0xaf:
+			NVOBJ_CLASS(dev, 0x8697, GR);
+			break;
+		}
+	}
+
+	/* compute */
+	NVOBJ_CLASS(dev, 0x50c0, GR);
+	if (dev_priv->chipset  > 0xa0 &&
+	    dev_priv->chipset != 0xaa &&
+	    dev_priv->chipset != 0xac)
+		NVOBJ_CLASS(dev, 0x85c0, GR);
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_grctx.c b/drivers/gpu/drm/nouveau/nv50_grctx.c
index 336aab2a24a6..de9abff12b90 100644
--- a/drivers/gpu/drm/nouveau/nv50_grctx.c
+++ b/drivers/gpu/drm/nouveau/nv50_grctx.c
@@ -747,7 +747,7 @@ nv50_graph_construct_mmio(struct nouveau_grctx *ctx)
 				gr_def(ctx, offset + 0x64, 0x0000001f);
 				gr_def(ctx, offset + 0x68, 0x0000000f);
 				gr_def(ctx, offset + 0x6c, 0x0000000f);
-			} else if(dev_priv->chipset < 0xa0) {
+			} else if (dev_priv->chipset < 0xa0) {
 				cp_ctx(ctx, offset + 0x50, 1);
 				cp_ctx(ctx, offset + 0x70, 1);
 			} else {
@@ -924,7 +924,7 @@ nv50_graph_construct_mmio_ddata(struct nouveau_grctx *ctx)
 		dd_emit(ctx, 1, 0);	/* 0000007f MULTISAMPLE_SAMPLES_LOG2 */
 	} else {
 		dd_emit(ctx, 1, 0);	/* 0000000f MULTISAMPLE_SAMPLES_LOG2 */
-	} 
+	}
 	dd_emit(ctx, 1, 0xc);		/* 000000ff SEMANTIC_COLOR.BFC0_ID */
 	if (dev_priv->chipset != 0x50)
 		dd_emit(ctx, 1, 0);	/* 00000001 SEMANTIC_COLOR.CLMP_EN */
@@ -1803,9 +1803,7 @@ nv50_graph_construct_gene_unk24xx(struct nouveau_grctx *ctx)
 		xf_emit(ctx, 1, 0);	/* 1ff */
 		xf_emit(ctx, 8, 0);	/* 0? */
 		xf_emit(ctx, 9, 0);	/* ffffffff, 7ff */
-	}
-	else
-	{
+	} else {
 		xf_emit(ctx, 0xc, 0);	/* RO */
 		/* SEEK */
 		xf_emit(ctx, 0xe10, 0); /* 190 * 9: 8*ffffffff, 7ff */
@@ -2836,7 +2834,7 @@ nv50_graph_construct_xfer_tprop(struct nouveau_grctx *ctx)
 	xf_emit(ctx, 1, 1);		/* 00000001 DST_LINEAR */
 	if (IS_NVA3F(dev_priv->chipset))
 		xf_emit(ctx, 1, 1);	/* 0000001f tesla UNK169C */
-	if(dev_priv->chipset == 0x50)
+	if (dev_priv->chipset == 0x50)
 		xf_emit(ctx, 1, 0);	/* ff */
 	else
 		xf_emit(ctx, 3, 0);	/* 1, 7, 3ff */
diff --git a/drivers/gpu/drm/nouveau/nv50_mpeg.c b/drivers/gpu/drm/nouveau/nv50_mpeg.c
new file mode 100644
index 000000000000..1dc5913f78c5
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nv50_mpeg.c
@@ -0,0 +1,256 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_ramht.h"
+
+struct nv50_mpeg_engine {
+	struct nouveau_exec_engine base;
+};
+
+static inline u32
+CTX_PTR(struct drm_device *dev, u32 offset)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	if (dev_priv->chipset == 0x50)
+		offset += 0x0260;
+	else
+		offset += 0x0060;
+
+	return offset;
+}
+
+static int
+nv50_mpeg_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *ramin = chan->ramin;
+	struct nouveau_gpuobj *ctx = NULL;
+	int ret;
+
+	NV_DEBUG(dev, "ch%d\n", chan->id);
+
+	ret = nouveau_gpuobj_new(dev, chan, 128 * 4, 0, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &ctx);
+	if (ret)
+		return ret;
+
+	nv_wo32(ramin, CTX_PTR(dev, 0x00), 0x80190002);
+	nv_wo32(ramin, CTX_PTR(dev, 0x04), ctx->vinst + ctx->size - 1);
+	nv_wo32(ramin, CTX_PTR(dev, 0x08), ctx->vinst);
+	nv_wo32(ramin, CTX_PTR(dev, 0x0c), 0);
+	nv_wo32(ramin, CTX_PTR(dev, 0x10), 0);
+	nv_wo32(ramin, CTX_PTR(dev, 0x14), 0x00010000);
+
+	nv_wo32(ctx, 0x70, 0x00801ec1);
+	nv_wo32(ctx, 0x7c, 0x0000037c);
+	dev_priv->engine.instmem.flush(dev);
+
+	chan->engctx[engine] = ctx;
+	return 0;
+}
+
+static void
+nv50_mpeg_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	unsigned long flags;
+	u32 inst, i;
+
+	if (!chan->ramin)
+		return;
+
+	inst  = chan->ramin->vinst >> 12;
+	inst |= 0x80000000;
+
+	spin_lock_irqsave(&dev_priv->context_switch_lock, flags);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
+	if (nv_rd32(dev, 0x00b318) == inst)
+		nv_mask(dev, 0x00b318, 0x80000000, 0x00000000);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
+	spin_unlock_irqrestore(&dev_priv->context_switch_lock, flags);
+
+	for (i = 0x00; i <= 0x14; i += 4)
+		nv_wo32(chan->ramin, CTX_PTR(dev, i), 0x00000000);
+	nouveau_gpuobj_ref(NULL, &ctx);
+	chan->engctx[engine] = NULL;
+}
+
+static int
+nv50_mpeg_object_new(struct nouveau_channel *chan, int engine,
+		     u32 handle, u16 class)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 2;
+	obj->class  = class;
+
+	nv_wo32(obj, 0x00, class);
+	nv_wo32(obj, 0x04, 0x00000000);
+	nv_wo32(obj, 0x08, 0x00000000);
+	nv_wo32(obj, 0x0c, 0x00000000);
+	dev_priv->engine.instmem.flush(dev);
+
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
+}
+
+static void
+nv50_mpeg_tlb_flush(struct drm_device *dev, int engine)
+{
+	nv50_vm_flush_engine(dev, 0x08);
+}
+
+static int
+nv50_mpeg_init(struct drm_device *dev, int engine)
+{
+	nv_wr32(dev, 0x00b32c, 0x00000000);
+	nv_wr32(dev, 0x00b314, 0x00000100);
+	nv_wr32(dev, 0x00b0e0, 0x0000001a);
+
+	nv_wr32(dev, 0x00b220, 0x00000044);
+	nv_wr32(dev, 0x00b300, 0x00801ec1);
+	nv_wr32(dev, 0x00b390, 0x00000000);
+	nv_wr32(dev, 0x00b394, 0x00000000);
+	nv_wr32(dev, 0x00b398, 0x00000000);
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000001);
+
+	nv_wr32(dev, 0x00b100, 0xffffffff);
+	nv_wr32(dev, 0x00b140, 0xffffffff);
+
+	if (!nv_wait(dev, 0x00b200, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "PMPEG init: 0x%08x\n", nv_rd32(dev, 0x00b200));
+		return -EBUSY;
+	}
+
+	return 0;
+}
+
+static int
+nv50_mpeg_fini(struct drm_device *dev, int engine)
+{
+	/*XXX: context save for s/r */
+	nv_mask(dev, 0x00b32c, 0x00000001, 0x00000000);
+	nv_wr32(dev, 0x00b140, 0x00000000);
+	return 0;
+}
+
+static void
+nv50_mpeg_isr(struct drm_device *dev)
+{
+	u32 stat = nv_rd32(dev, 0x00b100);
+	u32 type = nv_rd32(dev, 0x00b230);
+	u32 mthd = nv_rd32(dev, 0x00b234);
+	u32 data = nv_rd32(dev, 0x00b238);
+	u32 show = stat;
+
+	if (stat & 0x01000000) {
+		/* happens on initial binding of the object */
+		if (type == 0x00000020 && mthd == 0x0000) {
+			nv_wr32(dev, 0x00b308, 0x00000100);
+			show &= ~0x01000000;
+		}
+	}
+
+	if (show && nouveau_ratelimit()) {
+		NV_INFO(dev, "PMPEG - 0x%08x 0x%08x 0x%08x 0x%08x\n",
+			stat, type, mthd, data);
+	}
+
+	nv_wr32(dev, 0x00b100, stat);
+	nv_wr32(dev, 0x00b230, 0x00000001);
+	nv50_fb_vm_trap(dev, 1);
+}
+
+static void
+nv50_vpe_isr(struct drm_device *dev)
+{
+	if (nv_rd32(dev, 0x00b100))
+		nv50_mpeg_isr(dev);
+
+	if (nv_rd32(dev, 0x00b800)) {
+		u32 stat = nv_rd32(dev, 0x00b800);
+		NV_INFO(dev, "PMSRCH: 0x%08x\n", stat);
+		nv_wr32(dev, 0xb800, stat);
+	}
+}
+
+static void
+nv50_mpeg_destroy(struct drm_device *dev, int engine)
+{
+	struct nv50_mpeg_engine *pmpeg = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 0);
+
+	NVOBJ_ENGINE_DEL(dev, MPEG);
+	kfree(pmpeg);
+}
+
+int
+nv50_mpeg_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nv50_mpeg_engine *pmpeg;
+
+	pmpeg = kzalloc(sizeof(*pmpeg), GFP_KERNEL);
+	if (!pmpeg)
+		return -ENOMEM;
+
+	pmpeg->base.destroy = nv50_mpeg_destroy;
+	pmpeg->base.init = nv50_mpeg_init;
+	pmpeg->base.fini = nv50_mpeg_fini;
+	pmpeg->base.context_new = nv50_mpeg_context_new;
+	pmpeg->base.context_del = nv50_mpeg_context_del;
+	pmpeg->base.object_new = nv50_mpeg_object_new;
+	pmpeg->base.tlb_flush = nv50_mpeg_tlb_flush;
+
+	if (dev_priv->chipset == 0x50) {
+		nouveau_irq_register(dev, 0, nv50_vpe_isr);
+		NVOBJ_ENGINE_ADD(dev, MPEG, &pmpeg->base);
+		NVOBJ_CLASS(dev, 0x3174, MPEG);
+#if 0
+		NVOBJ_ENGINE_ADD(dev, ME, &pme->base);
+		NVOBJ_CLASS(dev, 0x4075, ME);
+#endif
+	} else {
+		nouveau_irq_register(dev, 0, nv50_mpeg_isr);
+		NVOBJ_ENGINE_ADD(dev, MPEG, &pmpeg->base);
+		NVOBJ_CLASS(dev, 0x8274, MPEG);
+	}
+
+	return 0;
+
+}
diff --git a/drivers/gpu/drm/nouveau/nv50_pm.c b/drivers/gpu/drm/nouveau/nv50_pm.c
index 7dbb305d7e63..8a2810011bda 100644
--- a/drivers/gpu/drm/nouveau/nv50_pm.c
+++ b/drivers/gpu/drm/nouveau/nv50_pm.c
@@ -47,6 +47,21 @@ nv50_pm_clock_get(struct drm_device *dev, u32 id)
 
 	reg0 = nv_rd32(dev, pll.reg + 0);
 	reg1 = nv_rd32(dev, pll.reg + 4);
+
+	if ((reg0 & 0x80000000) == 0) {
+		if (id == PLL_SHADER) {
+			NV_DEBUG(dev, "Shader PLL is disabled. "
+				"Shader clock is twice the core\n");
+			ret = nv50_pm_clock_get(dev, PLL_CORE);
+			if (ret > 0)
+				return ret << 1;
+		} else if (id == PLL_MEMORY) {
+			NV_DEBUG(dev, "Memory PLL is disabled. "
+				"Memory clock is equal to the ref_clk\n");
+			return pll.refclk;
+		}
+	}
+
 	P = (reg0 & 0x00070000) >> 16;
 	N = (reg1 & 0x0000ff00) >> 8;
 	M = (reg1 & 0x000000ff);
diff --git a/drivers/gpu/drm/nouveau/nv50_vm.c b/drivers/gpu/drm/nouveau/nv50_vm.c
index 6c2694490741..1a0dd491a0e4 100644
--- a/drivers/gpu/drm/nouveau/nv50_vm.c
+++ b/drivers/gpu/drm/nouveau/nv50_vm.c
@@ -151,8 +151,7 @@ nv50_vm_flush(struct nouveau_vm *vm)
 	struct drm_nouveau_private *dev_priv = vm->dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nouveau_crypt_engine *pcrypt = &dev_priv->engine.crypt;
+	int i;
 
 	pinstmem->flush(vm->dev);
 
@@ -163,11 +162,10 @@ nv50_vm_flush(struct nouveau_vm *vm)
 	}
 
 	pfifo->tlb_flush(vm->dev);
-
-	if (atomic_read(&vm->pgraph_refs))
-		pgraph->tlb_flush(vm->dev);
-	if (atomic_read(&vm->pcrypt_refs))
-		pcrypt->tlb_flush(vm->dev);
+	for (i = 0; i < NVOBJ_ENGINE_NR; i++) {
+		if (atomic_read(&vm->engref[i]))
+			dev_priv->eng[i]->tlb_flush(vm->dev, i);
+	}
 }
 
 void
diff --git a/drivers/gpu/drm/nouveau/nv84_crypt.c b/drivers/gpu/drm/nouveau/nv84_crypt.c
index fabc7fd30b1d..75b809a51748 100644
--- a/drivers/gpu/drm/nouveau/nv84_crypt.c
+++ b/drivers/gpu/drm/nouveau/nv84_crypt.c
@@ -26,46 +26,48 @@
 #include "nouveau_drv.h"
 #include "nouveau_util.h"
 #include "nouveau_vm.h"
+#include "nouveau_ramht.h"
 
-static void nv84_crypt_isr(struct drm_device *);
+struct nv84_crypt_engine {
+	struct nouveau_exec_engine base;
+};
 
-int
-nv84_crypt_create_context(struct nouveau_channel *chan)
+static int
+nv84_crypt_context_new(struct nouveau_channel *chan, int engine)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *ramin = chan->ramin;
+	struct nouveau_gpuobj *ctx;
 	int ret;
 
 	NV_DEBUG(dev, "ch%d\n", chan->id);
 
-	ret = nouveau_gpuobj_new(dev, chan, 256, 0,
-				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
-				 &chan->crypt_ctx);
+	ret = nouveau_gpuobj_new(dev, chan, 256, 0, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &ctx);
 	if (ret)
 		return ret;
 
 	nv_wo32(ramin, 0xa0, 0x00190000);
-	nv_wo32(ramin, 0xa4, chan->crypt_ctx->vinst + 0xff);
-	nv_wo32(ramin, 0xa8, chan->crypt_ctx->vinst);
+	nv_wo32(ramin, 0xa4, ctx->vinst + ctx->size - 1);
+	nv_wo32(ramin, 0xa8, ctx->vinst);
 	nv_wo32(ramin, 0xac, 0);
 	nv_wo32(ramin, 0xb0, 0);
 	nv_wo32(ramin, 0xb4, 0);
-
 	dev_priv->engine.instmem.flush(dev);
-	atomic_inc(&chan->vm->pcrypt_refs);
+
+	atomic_inc(&chan->vm->engref[engine]);
+	chan->engctx[engine] = ctx;
 	return 0;
 }
 
-void
-nv84_crypt_destroy_context(struct nouveau_channel *chan)
+static void
+nv84_crypt_context_del(struct nouveau_channel *chan, int engine)
 {
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
 	struct drm_device *dev = chan->dev;
 	u32 inst;
 
-	if (!chan->crypt_ctx)
-		return;
-
 	inst  = (chan->ramin->vinst >> 12);
 	inst |= 0x80000000;
 
@@ -80,43 +82,39 @@ nv84_crypt_destroy_context(struct nouveau_channel *chan)
 		nv_mask(dev, 0x10218c, 0x80000000, 0x00000000);
 	nv_wr32(dev, 0x10200c, 0x00000010);
 
-	nouveau_gpuobj_ref(NULL, &chan->crypt_ctx);
-	atomic_dec(&chan->vm->pcrypt_refs);
-}
+	nouveau_gpuobj_ref(NULL, &ctx);
 
-void
-nv84_crypt_tlb_flush(struct drm_device *dev)
-{
-	nv50_vm_flush_engine(dev, 0x0a);
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = NULL;
 }
 
-int
-nv84_crypt_init(struct drm_device *dev)
+static int
+nv84_crypt_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
 {
+	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_crypt_engine *pcrypt = &dev_priv->engine.crypt;
-
-	if (!pcrypt->registered) {
-		NVOBJ_CLASS(dev, 0x74c1, CRYPT);
-		pcrypt->registered = true;
-	}
+	struct nouveau_gpuobj *obj = NULL;
+	int ret;
 
-	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
-	nv_mask(dev, 0x000200, 0x00004000, 0x00004000);
+	ret = nouveau_gpuobj_new(dev, chan, 16, 16, NVOBJ_FLAG_ZERO_FREE, &obj);
+	if (ret)
+		return ret;
+	obj->engine = 5;
+	obj->class  = class;
 
-	nouveau_irq_register(dev, 14, nv84_crypt_isr);
-	nv_wr32(dev, 0x102130, 0xffffffff);
-	nv_wr32(dev, 0x102140, 0xffffffbf);
+	nv_wo32(obj, 0x00, class);
+	dev_priv->engine.instmem.flush(dev);
 
-	nv_wr32(dev, 0x10200c, 0x00000010);
-	return 0;
+	ret = nouveau_ramht_insert(chan, handle, obj);
+	nouveau_gpuobj_ref(NULL, &obj);
+	return ret;
 }
 
-void
-nv84_crypt_fini(struct drm_device *dev)
+static void
+nv84_crypt_tlb_flush(struct drm_device *dev, int engine)
 {
-	nv_wr32(dev, 0x102140, 0x00000000);
-	nouveau_irq_unregister(dev, 14);
+	nv50_vm_flush_engine(dev, 0x0a);
 }
 
 static void
@@ -138,3 +136,58 @@ nv84_crypt_isr(struct drm_device *dev)
 
 	nv50_fb_vm_trap(dev, show);
 }
+
+static int
+nv84_crypt_fini(struct drm_device *dev, int engine)
+{
+	nv_wr32(dev, 0x102140, 0x00000000);
+	return 0;
+}
+
+static int
+nv84_crypt_init(struct drm_device *dev, int engine)
+{
+	nv_mask(dev, 0x000200, 0x00004000, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00004000, 0x00004000);
+
+	nv_wr32(dev, 0x102130, 0xffffffff);
+	nv_wr32(dev, 0x102140, 0xffffffbf);
+
+	nv_wr32(dev, 0x10200c, 0x00000010);
+	return 0;
+}
+
+static void
+nv84_crypt_destroy(struct drm_device *dev, int engine)
+{
+	struct nv84_crypt_engine *pcrypt = nv_engine(dev, engine);
+
+	NVOBJ_ENGINE_DEL(dev, CRYPT);
+
+	nouveau_irq_unregister(dev, 14);
+	kfree(pcrypt);
+}
+
+int
+nv84_crypt_create(struct drm_device *dev)
+{
+	struct nv84_crypt_engine *pcrypt;
+
+	pcrypt = kzalloc(sizeof(*pcrypt), GFP_KERNEL);
+	if (!pcrypt)
+		return -ENOMEM;
+
+	pcrypt->base.destroy = nv84_crypt_destroy;
+	pcrypt->base.init = nv84_crypt_init;
+	pcrypt->base.fini = nv84_crypt_fini;
+	pcrypt->base.context_new = nv84_crypt_context_new;
+	pcrypt->base.context_del = nv84_crypt_context_del;
+	pcrypt->base.object_new = nv84_crypt_object_new;
+	pcrypt->base.tlb_flush = nv84_crypt_tlb_flush;
+
+	nouveau_irq_register(dev, 14, nv84_crypt_isr);
+
+	NVOBJ_ENGINE_ADD(dev, CRYPT, &pcrypt->base);
+	NVOBJ_CLASS (dev, 0x74c1, CRYPT);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.c b/drivers/gpu/drm/nouveau/nva3_copy.c
new file mode 100644
index 000000000000..b86820a61220
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nva3_copy.c
@@ -0,0 +1,226 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/firmware.h>
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_util.h"
+#include "nouveau_vm.h"
+#include "nouveau_ramht.h"
+#include "nva3_copy.fuc.h"
+
+struct nva3_copy_engine {
+	struct nouveau_exec_engine base;
+};
+
+static int
+nva3_copy_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *ramin = chan->ramin;
+	struct nouveau_gpuobj *ctx = NULL;
+	int ret;
+
+	NV_DEBUG(dev, "ch%d\n", chan->id);
+
+	ret = nouveau_gpuobj_new(dev, chan, 256, 0, NVOBJ_FLAG_ZERO_ALLOC |
+				 NVOBJ_FLAG_ZERO_FREE, &ctx);
+	if (ret)
+		return ret;
+
+	nv_wo32(ramin, 0xc0, 0x00190000);
+	nv_wo32(ramin, 0xc4, ctx->vinst + ctx->size - 1);
+	nv_wo32(ramin, 0xc8, ctx->vinst);
+	nv_wo32(ramin, 0xcc, 0x00000000);
+	nv_wo32(ramin, 0xd0, 0x00000000);
+	nv_wo32(ramin, 0xd4, 0x00000000);
+	dev_priv->engine.instmem.flush(dev);
+
+	atomic_inc(&chan->vm->engref[engine]);
+	chan->engctx[engine] = ctx;
+	return 0;
+}
+
+static int
+nva3_copy_object_new(struct nouveau_channel *chan, int engine,
+		     u32 handle, u16 class)
+{
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
+
+	/* fuc engine doesn't need an object, our ramht code does.. */
+	ctx->engine = 3;
+	ctx->class  = class;
+	return nouveau_ramht_insert(chan, handle, ctx);
+}
+
+static void
+nva3_copy_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	u32 inst;
+
+	inst  = (chan->ramin->vinst >> 12);
+	inst |= 0x40000000;
+
+	/* disable fifo access */
+	nv_wr32(dev, 0x104048, 0x00000000);
+	/* mark channel as unloaded if it's currently active */
+	if (nv_rd32(dev, 0x104050) == inst)
+		nv_mask(dev, 0x104050, 0x40000000, 0x00000000);
+	/* mark next channel as invalid if it's about to be loaded */
+	if (nv_rd32(dev, 0x104054) == inst)
+		nv_mask(dev, 0x104054, 0x40000000, 0x00000000);
+	/* restore fifo access */
+	nv_wr32(dev, 0x104048, 0x00000003);
+
+	for (inst = 0xc0; inst <= 0xd4; inst += 4)
+		nv_wo32(chan->ramin, inst, 0x00000000);
+
+	nouveau_gpuobj_ref(NULL, &ctx);
+
+	atomic_dec(&chan->vm->engref[engine]);
+	chan->engctx[engine] = ctx;
+}
+
+static void
+nva3_copy_tlb_flush(struct drm_device *dev, int engine)
+{
+	nv50_vm_flush_engine(dev, 0x0d);
+}
+
+static int
+nva3_copy_init(struct drm_device *dev, int engine)
+{
+	int i;
+
+	nv_mask(dev, 0x000200, 0x00002000, 0x00000000);
+	nv_mask(dev, 0x000200, 0x00002000, 0x00002000);
+	nv_wr32(dev, 0x104014, 0xffffffff); /* disable all interrupts */
+
+	/* upload ucode */
+	nv_wr32(dev, 0x1041c0, 0x01000000);
+	for (i = 0; i < sizeof(nva3_pcopy_data) / 4; i++)
+		nv_wr32(dev, 0x1041c4, nva3_pcopy_data[i]);
+
+	nv_wr32(dev, 0x104180, 0x01000000);
+	for (i = 0; i < sizeof(nva3_pcopy_code) / 4; i++) {
+		if ((i & 0x3f) == 0)
+			nv_wr32(dev, 0x104188, i >> 6);
+		nv_wr32(dev, 0x104184, nva3_pcopy_code[i]);
+	}
+
+	/* start it running */
+	nv_wr32(dev, 0x10410c, 0x00000000);
+	nv_wr32(dev, 0x104104, 0x00000000); /* ENTRY */
+	nv_wr32(dev, 0x104100, 0x00000002); /* TRIGGER */
+	return 0;
+}
+
+static int
+nva3_copy_fini(struct drm_device *dev, int engine)
+{
+	nv_mask(dev, 0x104048, 0x00000003, 0x00000000);
+
+	/* trigger fuc context unload */
+	nv_wait(dev, 0x104008, 0x0000000c, 0x00000000);
+	nv_mask(dev, 0x104054, 0x40000000, 0x00000000);
+	nv_wr32(dev, 0x104000, 0x00000008);
+	nv_wait(dev, 0x104008, 0x00000008, 0x00000000);
+
+	nv_wr32(dev, 0x104014, 0xffffffff);
+	return 0;
+}
+
+static struct nouveau_enum nva3_copy_isr_error_name[] = {
+	{ 0x0001, "ILLEGAL_MTHD" },
+	{ 0x0002, "INVALID_ENUM" },
+	{ 0x0003, "INVALID_BITFIELD" },
+	{}
+};
+
+static void
+nva3_copy_isr(struct drm_device *dev)
+{
+	u32 dispatch = nv_rd32(dev, 0x10401c);
+	u32 stat = nv_rd32(dev, 0x104008) & dispatch & ~(dispatch >> 16);
+	u32 inst = nv_rd32(dev, 0x104050) & 0x3fffffff;
+	u32 ssta = nv_rd32(dev, 0x104040) & 0x0000ffff;
+	u32 addr = nv_rd32(dev, 0x104040) >> 16;
+	u32 mthd = (addr & 0x07ff) << 2;
+	u32 subc = (addr & 0x3800) >> 11;
+	u32 data = nv_rd32(dev, 0x104044);
+	int chid = nv50_graph_isr_chid(dev, inst);
+
+	if (stat & 0x00000040) {
+		NV_INFO(dev, "PCOPY: DISPATCH_ERROR [");
+		nouveau_enum_print(nva3_copy_isr_error_name, ssta);
+		printk("] ch %d [0x%08x] subc %d mthd 0x%04x data 0x%08x\n",
+			chid, inst, subc, mthd, data);
+		nv_wr32(dev, 0x104004, 0x00000040);
+		stat &= ~0x00000040;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PCOPY: unhandled intr 0x%08x\n", stat);
+		nv_wr32(dev, 0x104004, stat);
+	}
+	nv50_fb_vm_trap(dev, 1);
+}
+
+static void
+nva3_copy_destroy(struct drm_device *dev, int engine)
+{
+	struct nva3_copy_engine *pcopy = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, 22);
+
+	NVOBJ_ENGINE_DEL(dev, COPY0);
+	kfree(pcopy);
+}
+
+int
+nva3_copy_create(struct drm_device *dev)
+{
+	struct nva3_copy_engine *pcopy;
+
+	pcopy = kzalloc(sizeof(*pcopy), GFP_KERNEL);
+	if (!pcopy)
+		return -ENOMEM;
+
+	pcopy->base.destroy = nva3_copy_destroy;
+	pcopy->base.init = nva3_copy_init;
+	pcopy->base.fini = nva3_copy_fini;
+	pcopy->base.context_new = nva3_copy_context_new;
+	pcopy->base.context_del = nva3_copy_context_del;
+	pcopy->base.object_new = nva3_copy_object_new;
+	pcopy->base.tlb_flush = nva3_copy_tlb_flush;
+
+	nouveau_irq_register(dev, 22, nva3_copy_isr);
+
+	NVOBJ_ENGINE_ADD(dev, COPY0, &pcopy->base);
+	NVOBJ_CLASS(dev, 0x85b5, COPY0);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc b/drivers/gpu/drm/nouveau/nva3_copy.fuc
new file mode 100644
index 000000000000..eaf35f8321ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc
@@ -0,0 +1,870 @@
+/* fuc microcode for copy engine on nva3- chipsets
+ *
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+/* To build for nva3:nvc0
+ *    m4 -DNVA3 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nva3_copy.fuc.h
+ *
+ * To build for nvc0-
+ *    m4 -DNVC0 nva3_copy.fuc | envyas -a -w -m fuc -V nva3 -o nvc0_copy.fuc.h
+ */
+
+ifdef(`NVA3',
+.section nva3_pcopy_data,
+.section nvc0_pcopy_data
+)
+
+ctx_object:                   .b32 0
+ifdef(`NVA3',
+ctx_dma:
+ctx_dma_query:                .b32 0
+ctx_dma_src:                  .b32 0
+ctx_dma_dst:                  .b32 0
+,)
+.equ ctx_dma_count 3
+ctx_query_address_high:       .b32 0
+ctx_query_address_low:        .b32 0
+ctx_query_counter:            .b32 0
+ctx_src_address_high:         .b32 0
+ctx_src_address_low:          .b32 0
+ctx_src_pitch:                .b32 0
+ctx_src_tile_mode:            .b32 0
+ctx_src_xsize:                .b32 0
+ctx_src_ysize:                .b32 0
+ctx_src_zsize:                .b32 0
+ctx_src_zoff:                 .b32 0
+ctx_src_xoff:                 .b32 0
+ctx_src_yoff:                 .b32 0
+ctx_src_cpp:                  .b32 0
+ctx_dst_address_high:         .b32 0
+ctx_dst_address_low:          .b32 0
+ctx_dst_pitch:                .b32 0
+ctx_dst_tile_mode:            .b32 0
+ctx_dst_xsize:                .b32 0
+ctx_dst_ysize:                .b32 0
+ctx_dst_zsize:                .b32 0
+ctx_dst_zoff:                 .b32 0
+ctx_dst_xoff:                 .b32 0
+ctx_dst_yoff:                 .b32 0
+ctx_dst_cpp:                  .b32 0
+ctx_format:                   .b32 0
+ctx_swz_const0:               .b32 0
+ctx_swz_const1:               .b32 0
+ctx_xcnt:                     .b32 0
+ctx_ycnt:                     .b32 0
+.align 256
+
+dispatch_table:
+// mthd 0x0000, NAME
+.b16 0x000 1
+.b32 ctx_object                     ~0xffffffff
+// mthd 0x0100, NOP
+.b16 0x040 1
+.b32 0x00010000 + cmd_nop           ~0xffffffff
+// mthd 0x0140, PM_TRIGGER
+.b16 0x050 1
+.b32 0x00010000 + cmd_pm_trigger    ~0xffffffff
+ifdef(`NVA3', `
+// mthd 0x0180-0x018c, DMA_
+.b16 0x060 ctx_dma_count
+dispatch_dma:
+.b32 0x00010000 + cmd_dma           ~0xffffffff
+.b32 0x00010000 + cmd_dma           ~0xffffffff
+.b32 0x00010000 + cmd_dma           ~0xffffffff
+',)
+// mthd 0x0200-0x0218, SRC_TILE
+.b16 0x80 7
+.b32 ctx_src_tile_mode              ~0x00000fff
+.b32 ctx_src_xsize                  ~0x0007ffff
+.b32 ctx_src_ysize                  ~0x00001fff
+.b32 ctx_src_zsize                  ~0x000007ff
+.b32 ctx_src_zoff                   ~0x00000fff
+.b32 ctx_src_xoff                   ~0x0007ffff
+.b32 ctx_src_yoff                   ~0x00001fff
+// mthd 0x0220-0x0238, DST_TILE
+.b16 0x88 7
+.b32 ctx_dst_tile_mode              ~0x00000fff
+.b32 ctx_dst_xsize                  ~0x0007ffff
+.b32 ctx_dst_ysize                  ~0x00001fff
+.b32 ctx_dst_zsize                  ~0x000007ff
+.b32 ctx_dst_zoff                   ~0x00000fff
+.b32 ctx_dst_xoff                   ~0x0007ffff
+.b32 ctx_dst_yoff                   ~0x00001fff
+// mthd 0x0300-0x0304, EXEC, WRCACHE_FLUSH
+.b16 0xc0 2
+.b32 0x00010000 + cmd_exec          ~0xffffffff
+.b32 0x00010000 + cmd_wrcache_flush ~0xffffffff
+// mthd 0x030c-0x0340, various stuff
+.b16 0xc3 14
+.b32 ctx_src_address_high           ~0x000000ff
+.b32 ctx_src_address_low            ~0xfffffff0
+.b32 ctx_dst_address_high           ~0x000000ff
+.b32 ctx_dst_address_low            ~0xfffffff0
+.b32 ctx_src_pitch                  ~0x0007ffff
+.b32 ctx_dst_pitch                  ~0x0007ffff
+.b32 ctx_xcnt                       ~0x0000ffff
+.b32 ctx_ycnt                       ~0x00001fff
+.b32 ctx_format                     ~0x0333ffff
+.b32 ctx_swz_const0                 ~0xffffffff
+.b32 ctx_swz_const1                 ~0xffffffff
+.b32 ctx_query_address_high         ~0x000000ff
+.b32 ctx_query_address_low          ~0xffffffff
+.b32 ctx_query_counter              ~0xffffffff
+.b16 0x800 0
+
+ifdef(`NVA3',
+.section nva3_pcopy_code,
+.section nvc0_pcopy_code
+)
+
+main:
+   clear b32 $r0
+   mov $sp $r0
+
+   // setup i0 handler and route fifo and ctxswitch to it
+   mov $r1 ih
+   mov $iv0 $r1
+   mov $r1 0x400
+   movw $r2 0xfff3
+   sethi $r2 0
+   iowr I[$r2 + 0x300] $r2
+
+   // enable interrupts
+   or $r2 0xc
+   iowr I[$r1] $r2
+   bset $flags ie0
+
+   // enable fifo access and context switching
+   mov $r1 0x1200
+   mov $r2 3
+   iowr I[$r1] $r2
+
+   // sleep forever, waking for interrupts
+   bset $flags $p0
+   spin:
+      sleep $p0
+      bra spin
+
+// i0 handler
+ih:
+   iord $r1 I[$r0 + 0x200]
+
+   and $r2 $r1 0x00000008
+   bra e ih_no_chsw
+      call chsw
+   ih_no_chsw:
+   and $r2 $r1 0x00000004
+   bra e ih_no_cmd
+      call dispatch
+
+   ih_no_cmd:
+   and $r1 $r1 0x0000000c
+   iowr I[$r0 + 0x100] $r1
+   iret
+
+// $p1 direction (0 = unload, 1 = load)
+// $r3 channel
+swctx:
+   mov $r4 0x7700
+   mov $xtargets $r4
+ifdef(`NVA3', `
+   // target 7 hardcoded to ctx dma object
+   mov $xdbase $r0
+', ` // NVC0
+   // read SCRATCH3 to decide if we are PCOPY0 or PCOPY1
+   mov $r4 0x2100
+   iord $r4 I[$r4 + 0]
+   and $r4 1
+   shl b32 $r4 4
+   add b32 $r4 0x30
+
+   // channel is in vram
+   mov $r15 0x61c
+   shl b32 $r15 6
+   mov $r5 0x114
+   iowrs I[$r15] $r5
+
+   // read 16-byte PCOPYn info, containing context pointer, from channel
+   shl b32 $r5 $r3 4
+   add b32 $r5 2
+   mov $xdbase $r5
+   mov $r5 $sp
+   // get a chunk of stack space, aligned to 256 byte boundary
+   sub b32 $r5 0x100
+   mov $r6 0xff
+   not b32 $r6
+   and $r5 $r6
+   sethi $r5 0x00020000
+   xdld $r4 $r5
+   xdwait
+   sethi $r5 0
+
+   // set context pointer, from within channel VM
+   mov $r14 0
+   iowrs I[$r15] $r14
+   ld b32 $r4 D[$r5 + 0]
+   shr b32 $r4 8
+   ld b32 $r6 D[$r5 + 4]
+   shl b32 $r6 24
+   or $r4 $r6
+   mov $xdbase $r4
+')
+   // 256-byte context, at start of data segment
+   mov b32 $r4 $r0
+   sethi $r4 0x60000
+
+   // swap!
+   bra $p1 swctx_load
+      xdst $r0 $r4
+      bra swctx_done
+   swctx_load:
+      xdld $r0 $r4
+   swctx_done:
+   xdwait
+   ret
+
+chsw:
+   // read current channel
+   mov $r2 0x1400
+   iord $r3 I[$r2]
+
+   // if it's active, unload it and return
+   xbit $r15 $r3 0x1e
+   bra e chsw_no_unload
+      bclr $flags $p1
+      call swctx
+      bclr $r3 0x1e
+      iowr I[$r2] $r3
+      mov $r4 1
+      iowr I[$r2 + 0x200] $r4
+      ret
+
+   // read next channel
+   chsw_no_unload:
+   iord $r3 I[$r2 + 0x100]
+
+   // is there a channel waiting to be loaded?
+   xbit $r13 $r3 0x1e
+   bra e chsw_finish_load
+      bset $flags $p1
+      call swctx
+ifdef(`NVA3',
+      // load dma objects back into TARGET regs
+      mov $r5 ctx_dma
+      mov $r6 ctx_dma_count
+      chsw_load_ctx_dma:
+         ld b32 $r7 D[$r5 + $r6 * 4]
+         add b32 $r8 $r6 0x180
+         shl b32 $r8 8
+         iowr I[$r8] $r7
+         sub b32 $r6 1
+         bra nc chsw_load_ctx_dma
+,)
+
+   chsw_finish_load:
+   mov $r3 2
+   iowr I[$r2 + 0x200] $r3
+   ret
+
+dispatch:
+   // read incoming fifo command
+   mov $r3 0x1900
+   iord $r2 I[$r3 + 0x100]
+   iord $r3 I[$r3 + 0x000]
+   and $r4 $r2 0x7ff
+   // $r2 will be used to store exception data
+   shl b32 $r2 0x10
+
+   // lookup method in the dispatch table, ILLEGAL_MTHD if not found
+   mov $r5 dispatch_table
+   clear b32 $r6
+   clear b32 $r7
+   dispatch_loop:
+      ld b16 $r6 D[$r5 + 0]
+      ld b16 $r7 D[$r5 + 2]
+      add b32 $r5 4
+      cmpu b32 $r4 $r6
+      bra c dispatch_illegal_mthd
+      add b32 $r7 $r6
+      cmpu b32 $r4 $r7
+      bra c dispatch_valid_mthd
+      sub b32 $r7 $r6
+      shl b32 $r7 3
+      add b32 $r5 $r7
+      bra dispatch_loop
+
+   // ensure no bits set in reserved fields, INVALID_BITFIELD
+   dispatch_valid_mthd:
+   sub b32 $r4 $r6
+   shl b32 $r4 3
+   add b32 $r4 $r5
+   ld b32 $r5 D[$r4 + 4]
+   and $r5 $r3
+   cmpu b32 $r5 0
+   bra ne dispatch_invalid_bitfield
+
+   // depending on dispatch flags: execute method, or save data as state
+   ld b16 $r5 D[$r4 + 0]
+   ld b16 $r6 D[$r4 + 2]
+   cmpu b32 $r6 0
+   bra ne dispatch_cmd
+      st b32 D[$r5] $r3
+      bra dispatch_done
+   dispatch_cmd:
+      bclr $flags $p1
+      call $r5
+      bra $p1 dispatch_error
+      bra dispatch_done
+
+   dispatch_invalid_bitfield:
+   or $r2 2
+   dispatch_illegal_mthd:
+   or $r2 1
+
+   // store exception data in SCRATCH0/SCRATCH1, signal hostirq
+   dispatch_error:
+   mov $r4 0x1000
+   iowr I[$r4 + 0x000] $r2
+   iowr I[$r4 + 0x100] $r3
+   mov $r2 0x40
+   iowr I[$r0] $r2
+   hostirq_wait:
+      iord $r2 I[$r0 + 0x200]
+      and $r2 0x40
+      cmpu b32 $r2 0
+      bra ne hostirq_wait
+
+   dispatch_done:
+   mov $r2 0x1d00
+   mov $r3 1
+   iowr I[$r2] $r3
+   ret
+
+// No-operation
+//
+// Inputs:
+//    $r1: irqh state
+//    $r2: hostirq state
+//    $r3: data
+//    $r4: dispatch table entry
+// Outputs:
+//    $r1: irqh state
+//    $p1: set on error
+//       $r2: hostirq state
+//       $r3: data
+cmd_nop:
+   ret
+
+// PM_TRIGGER
+//
+// Inputs:
+//    $r1: irqh state
+//    $r2: hostirq state
+//    $r3: data
+//    $r4: dispatch table entry
+// Outputs:
+//    $r1: irqh state
+//    $p1: set on error
+//       $r2: hostirq state
+//       $r3: data
+cmd_pm_trigger:
+   mov $r2 0x2200
+   clear b32 $r3
+   sethi $r3 0x20000
+   iowr I[$r2] $r3
+   ret
+
+ifdef(`NVA3',
+// SET_DMA_* method handler
+//
+// Inputs:
+//    $r1: irqh state
+//    $r2: hostirq state
+//    $r3: data
+//    $r4: dispatch table entry
+// Outputs:
+//    $r1: irqh state
+//    $p1: set on error
+//       $r2: hostirq state
+//       $r3: data
+cmd_dma:
+   sub b32 $r4 dispatch_dma
+   shr b32 $r4 1
+   bset $r3 0x1e
+   st b32 D[$r4 + ctx_dma] $r3
+   add b32 $r4 0x600
+   shl b32 $r4 6
+   iowr I[$r4] $r3
+   ret
+,)
+
+// Calculates the hw swizzle mask and adjusts the surface's xcnt to match
+//
+cmd_exec_set_format:
+   // zero out a chunk of the stack to store the swizzle into
+   add $sp -0x10
+   st b32 D[$sp + 0x00] $r0
+   st b32 D[$sp + 0x04] $r0
+   st b32 D[$sp + 0x08] $r0
+   st b32 D[$sp + 0x0c] $r0
+
+   // extract cpp, src_ncomp and dst_ncomp from FORMAT
+   ld b32 $r4 D[$r0 + ctx_format]
+   extr $r5 $r4 16:17
+   add b32 $r5 1
+   extr $r6 $r4 20:21
+   add b32 $r6 1
+   extr $r7 $r4 24:25
+   add b32 $r7 1
+
+   // convert FORMAT swizzle mask to hw swizzle mask
+   bclr $flags $p2
+   clear b32 $r8
+   clear b32 $r9
+   ncomp_loop:
+      and $r10 $r4 0xf
+      shr b32 $r4 4
+      clear b32 $r11
+      bpc_loop:
+         cmpu b8 $r10 4
+         bra nc cmp_c0
+            mulu $r12 $r10 $r5
+            add b32 $r12 $r11
+            bset $flags $p2
+            bra bpc_next
+         cmp_c0:
+         bra ne cmp_c1
+            mov $r12 0x10
+            add b32 $r12 $r11
+            bra bpc_next
+         cmp_c1:
+         cmpu b8 $r10 6
+         bra nc cmp_zero
+            mov $r12 0x14
+            add b32 $r12 $r11
+            bra bpc_next
+         cmp_zero:
+            mov $r12 0x80
+         bpc_next:
+         st b8 D[$sp + $r8] $r12
+         add b32 $r8 1
+         add b32 $r11 1
+         cmpu b32 $r11 $r5
+         bra c bpc_loop
+      add b32 $r9 1
+      cmpu b32 $r9 $r7
+      bra c ncomp_loop
+
+   // SRC_XCNT = (xcnt * src_cpp), or 0 if no src ref in swz (hw will hang)
+   mulu $r6 $r5
+   st b32 D[$r0 + ctx_src_cpp] $r6
+   ld b32 $r8 D[$r0 + ctx_xcnt]
+   mulu $r6 $r8
+   bra $p2 dst_xcnt
+   clear b32 $r6
+
+   dst_xcnt:
+   mulu $r7 $r5
+   st b32 D[$r0 + ctx_dst_cpp] $r7
+   mulu $r7 $r8
+
+   mov $r5 0x810
+   shl b32 $r5 6
+   iowr I[$r5 + 0x000] $r6
+   iowr I[$r5 + 0x100] $r7
+   add b32 $r5 0x800
+   ld b32 $r6 D[$r0 + ctx_dst_cpp]
+   sub b32 $r6 1
+   shl b32 $r6 8
+   ld b32 $r7 D[$r0 + ctx_src_cpp]
+   sub b32 $r7 1
+   or $r6 $r7
+   iowr I[$r5 + 0x000] $r6
+   add b32 $r5 0x100
+   ld b32 $r6 D[$sp + 0x00]
+   iowr I[$r5 + 0x000] $r6
+   ld b32 $r6 D[$sp + 0x04]
+   iowr I[$r5 + 0x100] $r6
+   ld b32 $r6 D[$sp + 0x08]
+   iowr I[$r5 + 0x200] $r6
+   ld b32 $r6 D[$sp + 0x0c]
+   iowr I[$r5 + 0x300] $r6
+   add b32 $r5 0x400
+   ld b32 $r6 D[$r0 + ctx_swz_const0]
+   iowr I[$r5 + 0x000] $r6
+   ld b32 $r6 D[$r0 + ctx_swz_const1]
+   iowr I[$r5 + 0x100] $r6
+   add $sp 0x10
+   ret
+
+// Setup to handle a tiled surface
+//
+// Calculates a number of parameters the hardware requires in order
+// to correctly handle tiling.
+//
+// Offset calculation is performed as follows (Tp/Th/Td from TILE_MODE):
+//    nTx = round_up(w * cpp, 1 << Tp) >> Tp
+//    nTy = round_up(h, 1 << Th) >> Th
+//    Txo = (x * cpp) & ((1 << Tp) - 1)
+//     Tx = (x * cpp) >> Tp
+//    Tyo = y & ((1 << Th) - 1)
+//     Ty = y >> Th
+//    Tzo = z & ((1 << Td) - 1)
+//     Tz = z >> Td
+//
+//    off  = (Tzo << Tp << Th) + (Tyo << Tp) + Txo
+//    off += ((Tz * nTy * nTx)) + (Ty * nTx) + Tx) << Td << Th << Tp;
+//
+// Inputs:
+//    $r4: hw command (0x104800)
+//    $r5: ctx offset adjustment for src/dst selection
+//    $p2: set if dst surface
+//
+cmd_exec_set_surface_tiled:
+   // translate TILE_MODE into Tp, Th, Td shift values
+   ld b32 $r7 D[$r5 + ctx_src_tile_mode]
+   extr $r9 $r7 8:11
+   extr $r8 $r7 4:7
+ifdef(`NVA3',
+   add b32 $r8 2
+,
+   add b32 $r8 3
+)
+   extr $r7 $r7 0:3
+   cmp b32 $r7 0xe
+   bra ne xtile64
+   mov $r7 4
+   bra xtileok
+   xtile64:
+   xbit $r7 $flags $p2
+   add b32 $r7 17
+   bset $r4 $r7
+   mov $r7 6
+   xtileok:
+
+   // Op = (x * cpp) & ((1 << Tp) - 1)
+   // Tx = (x * cpp) >> Tp
+   ld b32 $r10 D[$r5 + ctx_src_xoff]
+   ld b32 $r11 D[$r5 + ctx_src_cpp]
+   mulu $r10 $r11
+   mov $r11 1
+   shl b32 $r11 $r7
+   sub b32 $r11 1
+   and $r12 $r10 $r11
+   shr b32 $r10 $r7
+
+   // Tyo = y & ((1 << Th) - 1)
+   // Ty  = y >> Th
+   ld b32 $r13 D[$r5 + ctx_src_yoff]
+   mov $r14 1
+   shl b32 $r14 $r8
+   sub b32 $r14 1
+   and $r11 $r13 $r14
+   shr b32 $r13 $r8
+
+   // YTILE = ((1 << Th) << 12) | ((1 << Th) - Tyo)
+   add b32 $r14 1
+   shl b32 $r15 $r14 12
+   sub b32 $r14 $r11
+   or $r15 $r14
+   xbit $r6 $flags $p2
+   add b32 $r6 0x208
+   shl b32 $r6 8
+   iowr I[$r6 + 0x000] $r15
+
+   // Op += Tyo << Tp
+   shl b32 $r11 $r7
+   add b32 $r12 $r11
+
+   // nTx = ((w * cpp) + ((1 << Tp) - 1) >> Tp)
+   ld b32 $r15 D[$r5 + ctx_src_xsize]
+   ld b32 $r11 D[$r5 + ctx_src_cpp]
+   mulu $r15 $r11
+   mov $r11 1
+   shl b32 $r11 $r7
+   sub b32 $r11 1
+   add b32 $r15 $r11
+   shr b32 $r15 $r7
+   push $r15
+
+   // nTy = (h + ((1 << Th) - 1)) >> Th
+   ld b32 $r15 D[$r5 + ctx_src_ysize]
+   mov $r11 1
+   shl b32 $r11 $r8
+   sub b32 $r11 1
+   add b32 $r15 $r11
+   shr b32 $r15 $r8
+   push $r15
+
+   // Tys = Tp + Th
+   // CFG_YZ_TILE_SIZE = ((1 << Th) >> 2) << Td
+   add b32 $r7 $r8
+   sub b32 $r8 2
+   mov $r11 1
+   shl b32 $r11 $r8
+   shl b32 $r11 $r9
+
+   // Tzo = z & ((1 << Td) - 1)
+   // Tz  = z >> Td
+   // Op += Tzo << Tys
+   // Ts  = Tys + Td
+   ld b32 $r8 D[$r5 + ctx_src_zoff]
+   mov $r14 1
+   shl b32 $r14 $r9
+   sub b32 $r14 1
+   and $r15 $r8 $r14
+   shl b32 $r15 $r7
+   add b32 $r12 $r15
+   add b32 $r7 $r9
+   shr b32 $r8 $r9
+
+   // Ot = ((Tz * nTy * nTx) + (Ty * nTx) + Tx) << Ts
+   pop $r15
+   pop $r9
+   mulu $r13 $r9
+   add b32 $r10 $r13
+   mulu $r8 $r9
+   mulu $r8 $r15
+   add b32 $r10 $r8
+   shl b32 $r10 $r7
+
+   // PITCH = (nTx - 1) << Ts
+   sub b32 $r9 1
+   shl b32 $r9 $r7
+   iowr I[$r6 + 0x200] $r9
+
+   // SRC_ADDRESS_LOW   = (Ot + Op) & 0xffffffff
+   // CFG_ADDRESS_HIGH |= ((Ot + Op) >> 32) << 16
+   ld b32 $r7 D[$r5 + ctx_src_address_low]
+   ld b32 $r8 D[$r5 + ctx_src_address_high]
+   add b32 $r10 $r12
+   add b32 $r7 $r10
+   adc b32 $r8 0
+   shl b32 $r8 16
+   or $r8 $r11
+   sub b32 $r6 0x600
+   iowr I[$r6 + 0x000] $r7
+   add b32 $r6 0x400
+   iowr I[$r6 + 0x000] $r8
+   ret
+
+// Setup to handle a linear surface
+//
+// Nothing to see here.. Sets ADDRESS and PITCH, pretty non-exciting
+//
+cmd_exec_set_surface_linear:
+   xbit $r6 $flags $p2
+   add b32 $r6 0x202
+   shl b32 $r6 8
+   ld b32 $r7 D[$r5 + ctx_src_address_low]
+   iowr I[$r6 + 0x000] $r7
+   add b32 $r6 0x400
+   ld b32 $r7 D[$r5 + ctx_src_address_high]
+   shl b32 $r7 16
+   iowr I[$r6 + 0x000] $r7
+   add b32 $r6 0x400
+   ld b32 $r7 D[$r5 + ctx_src_pitch]
+   iowr I[$r6 + 0x000] $r7
+   ret
+
+// wait for regs to be available for use
+cmd_exec_wait:
+   push $r0
+   push $r1
+   mov $r0 0x800
+   shl b32 $r0 6
+   loop:
+      iord $r1 I[$r0]
+      and $r1 1
+      bra ne loop
+   pop $r1
+   pop $r0
+   ret
+
+cmd_exec_query:
+   // if QUERY_SHORT not set, write out { -, 0, TIME_LO, TIME_HI }
+   xbit $r4 $r3 13
+   bra ne query_counter
+      call cmd_exec_wait
+      mov $r4 0x80c
+      shl b32 $r4 6
+      ld b32 $r5 D[$r0 + ctx_query_address_low]
+      add b32 $r5 4
+      iowr I[$r4 + 0x000] $r5
+      iowr I[$r4 + 0x100] $r0
+      mov $r5 0xc
+      iowr I[$r4 + 0x200] $r5
+      add b32 $r4 0x400
+      ld b32 $r5 D[$r0 + ctx_query_address_high]
+      shl b32 $r5 16
+      iowr I[$r4 + 0x000] $r5
+      add b32 $r4 0x500
+      mov $r5 0x00000b00
+      sethi $r5 0x00010000
+      iowr I[$r4 + 0x000] $r5
+      mov $r5 0x00004040
+      shl b32 $r5 1
+      sethi $r5 0x80800000
+      iowr I[$r4 + 0x100] $r5
+      mov $r5 0x00001110
+      sethi $r5 0x13120000
+      iowr I[$r4 + 0x200] $r5
+      mov $r5 0x00001514
+      sethi $r5 0x17160000
+      iowr I[$r4 + 0x300] $r5
+      mov $r5 0x00002601
+      sethi $r5 0x00010000
+      mov $r4 0x800
+      shl b32 $r4 6
+      iowr I[$r4 + 0x000] $r5
+
+   // write COUNTER
+   query_counter:
+   call cmd_exec_wait
+   mov $r4 0x80c
+   shl b32 $r4 6
+   ld b32 $r5 D[$r0 + ctx_query_address_low]
+   iowr I[$r4 + 0x000] $r5
+   iowr I[$r4 + 0x100] $r0
+   mov $r5 0x4
+   iowr I[$r4 + 0x200] $r5
+   add b32 $r4 0x400
+   ld b32 $r5 D[$r0 + ctx_query_address_high]
+   shl b32 $r5 16
+   iowr I[$r4 + 0x000] $r5
+   add b32 $r4 0x500
+   mov $r5 0x00000300
+   iowr I[$r4 + 0x000] $r5
+   mov $r5 0x00001110
+   sethi $r5 0x13120000
+   iowr I[$r4 + 0x100] $r5
+   ld b32 $r5 D[$r0 + ctx_query_counter]
+   add b32 $r4 0x500
+   iowr I[$r4 + 0x000] $r5
+   mov $r5 0x00002601
+   sethi $r5 0x00010000
+   mov $r4 0x800
+   shl b32 $r4 6
+   iowr I[$r4 + 0x000] $r5
+   ret
+
+// Execute a copy operation
+//
+// Inputs:
+//    $r1: irqh state
+//    $r2: hostirq state
+//    $r3: data
+//       000002000 QUERY_SHORT
+//       000001000 QUERY
+//       000000100 DST_LINEAR
+//       000000010 SRC_LINEAR
+//       000000001 FORMAT
+//    $r4: dispatch table entry
+// Outputs:
+//    $r1: irqh state
+//    $p1: set on error
+//       $r2: hostirq state
+//       $r3: data
+cmd_exec:
+   call cmd_exec_wait
+
+   // if format requested, call function to calculate it, otherwise
+   // fill in cpp/xcnt for both surfaces as if (cpp == 1)
+   xbit $r15 $r3 0
+   bra e cmd_exec_no_format
+      call cmd_exec_set_format
+      mov $r4 0x200
+      bra cmd_exec_init_src_surface
+   cmd_exec_no_format:
+      mov $r6 0x810
+      shl b32 $r6 6
+      mov $r7 1
+      st b32 D[$r0 + ctx_src_cpp] $r7
+      st b32 D[$r0 + ctx_dst_cpp] $r7
+      ld b32 $r7 D[$r0 + ctx_xcnt]
+      iowr I[$r6 + 0x000] $r7
+      iowr I[$r6 + 0x100] $r7
+      clear b32 $r4
+
+   cmd_exec_init_src_surface:
+   bclr $flags $p2
+   clear b32 $r5
+   xbit $r15 $r3 4
+   bra e src_tiled
+      call cmd_exec_set_surface_linear
+      bra cmd_exec_init_dst_surface
+   src_tiled:
+      call cmd_exec_set_surface_tiled
+      bset $r4 7
+
+   cmd_exec_init_dst_surface:
+   bset $flags $p2
+   mov $r5 ctx_dst_address_high - ctx_src_address_high
+   xbit $r15 $r3 8
+   bra e dst_tiled
+      call cmd_exec_set_surface_linear
+      bra cmd_exec_kick
+   dst_tiled:
+      call cmd_exec_set_surface_tiled
+      bset $r4 8
+
+   cmd_exec_kick:
+   mov $r5 0x800
+   shl b32 $r5 6
+   ld b32 $r6 D[$r0 + ctx_ycnt]
+   iowr I[$r5 + 0x100] $r6
+   mov $r6 0x0041
+   // SRC_TARGET = 1, DST_TARGET = 2
+   sethi $r6 0x44000000
+   or $r4 $r6
+   iowr I[$r5] $r4
+
+   // if requested, queue up a QUERY write after the copy has completed
+   xbit $r15 $r3 12
+   bra e cmd_exec_done
+      call cmd_exec_query
+
+   cmd_exec_done:
+   ret
+
+// Flush write cache
+//
+// Inputs:
+//    $r1: irqh state
+//    $r2: hostirq state
+//    $r3: data
+//    $r4: dispatch table entry
+// Outputs:
+//    $r1: irqh state
+//    $p1: set on error
+//       $r2: hostirq state
+//       $r3: data
+cmd_wrcache_flush:
+   mov $r2 0x2200
+   clear b32 $r3
+   sethi $r3 0x10000
+   iowr I[$r2] $r3
+   ret
+
+.align 0x100
diff --git a/drivers/gpu/drm/nouveau/nva3_copy.fuc.h b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
new file mode 100644
index 000000000000..2731de22ebe9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nva3_copy.fuc.h
@@ -0,0 +1,534 @@
+uint32_t nva3_pcopy_data[] = {
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00010000,
+	0x00000000,
+	0x00000000,
+	0x00010040,
+	0x00010160,
+	0x00000000,
+	0x00010050,
+	0x00010162,
+	0x00000000,
+	0x00030060,
+	0x00010170,
+	0x00000000,
+	0x00010170,
+	0x00000000,
+	0x00010170,
+	0x00000000,
+	0x00070080,
+	0x00000028,
+	0xfffff000,
+	0x0000002c,
+	0xfff80000,
+	0x00000030,
+	0xffffe000,
+	0x00000034,
+	0xfffff800,
+	0x00000038,
+	0xfffff000,
+	0x0000003c,
+	0xfff80000,
+	0x00000040,
+	0xffffe000,
+	0x00070088,
+	0x00000054,
+	0xfffff000,
+	0x00000058,
+	0xfff80000,
+	0x0000005c,
+	0xffffe000,
+	0x00000060,
+	0xfffff800,
+	0x00000064,
+	0xfffff000,
+	0x00000068,
+	0xfff80000,
+	0x0000006c,
+	0xffffe000,
+	0x000200c0,
+	0x00010492,
+	0x00000000,
+	0x0001051b,
+	0x00000000,
+	0x000e00c3,
+	0x0000001c,
+	0xffffff00,
+	0x00000020,
+	0x0000000f,
+	0x00000048,
+	0xffffff00,
+	0x0000004c,
+	0x0000000f,
+	0x00000024,
+	0xfff80000,
+	0x00000050,
+	0xfff80000,
+	0x00000080,
+	0xffff0000,
+	0x00000084,
+	0xffffe000,
+	0x00000074,
+	0xfccc0000,
+	0x00000078,
+	0x00000000,
+	0x0000007c,
+	0x00000000,
+	0x00000010,
+	0xffffff00,
+	0x00000014,
+	0x00000000,
+	0x00000018,
+	0x00000000,
+	0x00000800,
+};
+
+uint32_t nva3_pcopy_code[] = {
+	0x04fe04bd,
+	0x3517f000,
+	0xf10010fe,
+	0xf1040017,
+	0xf0fff327,
+	0x22d00023,
+	0x0c25f0c0,
+	0xf40012d0,
+	0x17f11031,
+	0x27f01200,
+	0x0012d003,
+	0xf40031f4,
+	0x0ef40028,
+	0x8001cffd,
+	0xf40812c4,
+	0x21f4060b,
+	0x0412c472,
+	0xf4060bf4,
+	0x11c4c321,
+	0x4001d00c,
+	0x47f101f8,
+	0x4bfe7700,
+	0x0007fe00,
+	0xf00204b9,
+	0x01f40643,
+	0x0604fa09,
+	0xfa060ef4,
+	0x03f80504,
+	0x27f100f8,
+	0x23cf1400,
+	0x1e3fc800,
+	0xf4170bf4,
+	0x21f40132,
+	0x1e3af052,
+	0xf00023d0,
+	0x24d00147,
+	0xcf00f880,
+	0x3dc84023,
+	0x220bf41e,
+	0xf40131f4,
+	0x57f05221,
+	0x0367f004,
+	0xa07856bc,
+	0xb6018068,
+	0x87d00884,
+	0x0162b600,
+	0xf0f018f4,
+	0x23d00237,
+	0xf100f880,
+	0xcf190037,
+	0x33cf4032,
+	0xff24e400,
+	0x1024b607,
+	0x010057f1,
+	0x74bd64bd,
+	0x58005658,
+	0x50b60157,
+	0x0446b804,
+	0xbb4d08f4,
+	0x47b80076,
+	0x0f08f404,
+	0xb60276bb,
+	0x57bb0374,
+	0xdf0ef400,
+	0xb60246bb,
+	0x45bb0344,
+	0x01459800,
+	0xb00453fd,
+	0x1bf40054,
+	0x00455820,
+	0xb0014658,
+	0x1bf40064,
+	0x00538009,
+	0xf4300ef4,
+	0x55f90132,
+	0xf40c01f4,
+	0x25f0250e,
+	0x0125f002,
+	0x100047f1,
+	0xd00042d0,
+	0x27f04043,
+	0x0002d040,
+	0xf08002cf,
+	0x24b04024,
+	0xf71bf400,
+	0x1d0027f1,
+	0xd00137f0,
+	0x00f80023,
+	0x27f100f8,
+	0x34bd2200,
+	0xd00233f0,
+	0x00f80023,
+	0x012842b7,
+	0xf00145b6,
+	0x43801e39,
+	0x0040b701,
+	0x0644b606,
+	0xf80043d0,
+	0xf030f400,
+	0xb00001b0,
+	0x01b00101,
+	0x0301b002,
+	0xc71d0498,
+	0x50b63045,
+	0x3446c701,
+	0xc70160b6,
+	0x70b63847,
+	0x0232f401,
+	0x94bd84bd,
+	0xb60f4ac4,
+	0xb4bd0445,
+	0xf404a430,
+	0xa5ff0f18,
+	0x00cbbbc0,
+	0xf40231f4,
+	0x1bf4220e,
+	0x10c7f00c,
+	0xf400cbbb,
+	0xa430160e,
+	0x0c18f406,
+	0xbb14c7f0,
+	0x0ef400cb,
+	0x80c7f107,
+	0x01c83800,
+	0xb60180b6,
+	0xb5b801b0,
+	0xc308f404,
+	0xb80190b6,
+	0x08f40497,
+	0x0065fdb2,
+	0x98110680,
+	0x68fd2008,
+	0x0502f400,
+	0x75fd64bd,
+	0x1c078000,
+	0xf10078fd,
+	0xb6081057,
+	0x56d00654,
+	0x4057d000,
+	0x080050b7,
+	0xb61c0698,
+	0x64b60162,
+	0x11079808,
+	0xfd0172b6,
+	0x56d00567,
+	0x0050b700,
+	0x0060b401,
+	0xb40056d0,
+	0x56d00160,
+	0x0260b440,
+	0xb48056d0,
+	0x56d00360,
+	0x0050b7c0,
+	0x1e069804,
+	0x980056d0,
+	0x56d01f06,
+	0x1030f440,
+	0x579800f8,
+	0x6879c70a,
+	0xb66478c7,
+	0x77c70280,
+	0x0e76b060,
+	0xf0091bf4,
+	0x0ef40477,
+	0x027cf00f,
+	0xfd1170b6,
+	0x77f00947,
+	0x0f5a9806,
+	0xfd115b98,
+	0xb7f000ab,
+	0x04b7bb01,
+	0xff01b2b6,
+	0xa7bbc4ab,
+	0x105d9805,
+	0xbb01e7f0,
+	0xe2b604e8,
+	0xb4deff01,
+	0xb605d8bb,
+	0xef9401e0,
+	0x02ebbb0c,
+	0xf005fefd,
+	0x60b7026c,
+	0x64b60208,
+	0x006fd008,
+	0xbb04b7bb,
+	0x5f9800cb,
+	0x115b980b,
+	0xf000fbfd,
+	0xb7bb01b7,
+	0x01b2b604,
+	0xbb00fbbb,
+	0xf0f905f7,
+	0xf00c5f98,
+	0xb8bb01b7,
+	0x01b2b604,
+	0xbb00fbbb,
+	0xf0f905f8,
+	0xb60078bb,
+	0xb7f00282,
+	0x04b8bb01,
+	0x9804b9bb,
+	0xe7f00e58,
+	0x04e9bb01,
+	0xff01e2b6,
+	0xf7bbf48e,
+	0x00cfbb04,
+	0xbb0079bb,
+	0xf0fc0589,
+	0xd9fd90fc,
+	0x00adbb00,
+	0xfd0089fd,
+	0xa8bb008f,
+	0x04a7bb00,
+	0xbb0192b6,
+	0x69d00497,
+	0x08579880,
+	0xbb075898,
+	0x7abb00ac,
+	0x0081b600,
+	0xfd1084b6,
+	0x62b7058b,
+	0x67d00600,
+	0x0060b700,
+	0x0068d004,
+	0x6cf000f8,
+	0x0260b702,
+	0x0864b602,
+	0xd0085798,
+	0x60b70067,
+	0x57980400,
+	0x1074b607,
+	0xb70067d0,
+	0x98040060,
+	0x67d00957,
+	0xf900f800,
+	0xf110f900,
+	0xb6080007,
+	0x01cf0604,
+	0x0114f000,
+	0xfcfa1bf4,
+	0xf800fc10,
+	0x0d34c800,
+	0xf5701bf4,
+	0xf103ab21,
+	0xb6080c47,
+	0x05980644,
+	0x0450b605,
+	0xd00045d0,
+	0x57f04040,
+	0x8045d00c,
+	0x040040b7,
+	0xb6040598,
+	0x45d01054,
+	0x0040b700,
+	0x0057f105,
+	0x0153f00b,
+	0xf10045d0,
+	0xb6404057,
+	0x53f10154,
+	0x45d08080,
+	0x1057f140,
+	0x1253f111,
+	0x8045d013,
+	0x151457f1,
+	0x171653f1,
+	0xf1c045d0,
+	0xf0260157,
+	0x47f10153,
+	0x44b60800,
+	0x0045d006,
+	0x03ab21f5,
+	0x080c47f1,
+	0x980644b6,
+	0x45d00505,
+	0x4040d000,
+	0xd00457f0,
+	0x40b78045,
+	0x05980400,
+	0x1054b604,
+	0xb70045d0,
+	0xf1050040,
+	0xd0030057,
+	0x57f10045,
+	0x53f11110,
+	0x45d01312,
+	0x06059840,
+	0x050040b7,
+	0xf10045d0,
+	0xf0260157,
+	0x47f10153,
+	0x44b60800,
+	0x0045d006,
+	0x21f500f8,
+	0x3fc803ab,
+	0x0e0bf400,
+	0x018921f5,
+	0x020047f1,
+	0xf11e0ef4,
+	0xb6081067,
+	0x77f00664,
+	0x11078001,
+	0x981c0780,
+	0x67d02007,
+	0x4067d000,
+	0x32f444bd,
+	0xc854bd02,
+	0x0bf4043f,
+	0x8221f50a,
+	0x0a0ef403,
+	0x027621f5,
+	0xf40749f0,
+	0x57f00231,
+	0x083fc82c,
+	0xf50a0bf4,
+	0xf4038221,
+	0x21f50a0e,
+	0x49f00276,
+	0x0057f108,
+	0x0654b608,
+	0xd0210698,
+	0x67f04056,
+	0x0063f141,
+	0x0546fd44,
+	0xc80054d0,
+	0x0bf40c3f,
+	0xc521f507,
+	0xf100f803,
+	0xbd220027,
+	0x0133f034,
+	0xf80023d0,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/nva3_pm.c b/drivers/gpu/drm/nouveau/nva3_pm.c
index dbbafed36406..e4b2b9e934b2 100644
--- a/drivers/gpu/drm/nouveau/nva3_pm.c
+++ b/drivers/gpu/drm/nouveau/nva3_pm.c
@@ -27,32 +27,74 @@
 #include "nouveau_bios.h"
 #include "nouveau_pm.h"
 
-/*XXX: boards using limits 0x40 need fixing, the register layout
- *     is correct here, but, there's some other funny magic
- *     that modifies things, so it's not likely we'll set/read
- *     the correct timings yet..  working on it...
+/* This is actually a lot more complex than it appears here, but hopefully
+ * this should be able to deal with what the VBIOS leaves for us..
+ *
+ * If not, well, I'll jump off that bridge when I come to it.
  */
 
 struct nva3_pm_state {
-	struct pll_lims pll;
-	int N, M, P;
+	enum pll_types type;
+	u32 src0;
+	u32 src1;
+	u32 ctrl;
+	u32 coef;
+	u32 old_pnm;
+	u32 new_pnm;
+	u32 new_div;
 };
 
+static int
+nva3_pm_pll_offset(u32 id)
+{
+	static const u32 pll_map[] = {
+		0x00, PLL_CORE,
+		0x01, PLL_SHADER,
+		0x02, PLL_MEMORY,
+		0x00, 0x00
+	};
+	const u32 *map = pll_map;
+
+	while (map[1]) {
+		if (id == map[1])
+			return map[0];
+		map += 2;
+	}
+
+	return -ENOENT;
+}
+
 int
 nva3_pm_clock_get(struct drm_device *dev, u32 id)
 {
+	u32 src0, src1, ctrl, coef;
 	struct pll_lims pll;
-	int P, N, M, ret;
-	u32 reg;
+	int ret, off;
+	int P, N, M;
 
 	ret = get_pll_limits(dev, id, &pll);
 	if (ret)
 		return ret;
 
-	reg = nv_rd32(dev, pll.reg + 4);
-	P = (reg & 0x003f0000) >> 16;
-	N = (reg & 0x0000ff00) >> 8;
-	M = (reg & 0x000000ff);
+	off = nva3_pm_pll_offset(id);
+	if (off < 0)
+		return off;
+
+	src0 = nv_rd32(dev, 0x4120 + (off * 4));
+	src1 = nv_rd32(dev, 0x4160 + (off * 4));
+	ctrl = nv_rd32(dev, pll.reg + 0);
+	coef = nv_rd32(dev, pll.reg + 4);
+	NV_DEBUG(dev, "PLL %02x: 0x%08x 0x%08x 0x%08x 0x%08x\n",
+		      id, src0, src1, ctrl, coef);
+
+	if (ctrl & 0x00000008) {
+		u32 div = ((src1 & 0x003c0000) >> 18) + 1;
+		return (pll.refclk * 2) / div;
+	}
+
+	P = (coef & 0x003f0000) >> 16;
+	N = (coef & 0x0000ff00) >> 8;
+	M = (coef & 0x000000ff);
 	return pll.refclk * N / M / P;
 }
 
@@ -60,36 +102,103 @@ void *
 nva3_pm_clock_pre(struct drm_device *dev, struct nouveau_pm_level *perflvl,
 		  u32 id, int khz)
 {
-	struct nva3_pm_state *state;
-	int dummy, ret;
+	struct nva3_pm_state *pll;
+	struct pll_lims limits;
+	int N, M, P, diff;
+	int ret, off;
+
+	ret = get_pll_limits(dev, id, &limits);
+	if (ret < 0)
+		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
+
+	off = nva3_pm_pll_offset(id);
+	if (id < 0)
+		return ERR_PTR(-EINVAL);
 
-	state = kzalloc(sizeof(*state), GFP_KERNEL);
-	if (!state)
+
+	pll = kzalloc(sizeof(*pll), GFP_KERNEL);
+	if (!pll)
 		return ERR_PTR(-ENOMEM);
+	pll->type = id;
+	pll->src0 = 0x004120 + (off * 4);
+	pll->src1 = 0x004160 + (off * 4);
+	pll->ctrl = limits.reg + 0;
+	pll->coef = limits.reg + 4;
 
-	ret = get_pll_limits(dev, id, &state->pll);
-	if (ret < 0) {
-		kfree(state);
-		return (ret == -ENOENT) ? NULL : ERR_PTR(ret);
+	/* If target clock is within [-2, 3) MHz of a divisor, we'll
+	 * use that instead of calculating MNP values
+	 */
+	pll->new_div = min((limits.refclk * 2) / (khz - 2999), 16);
+	if (pll->new_div) {
+		diff = khz - ((limits.refclk * 2) / pll->new_div);
+		if (diff < -2000 || diff >= 3000)
+			pll->new_div = 0;
 	}
 
-	ret = nv50_calc_pll2(dev, &state->pll, khz, &state->N, &dummy,
-			     &state->M, &state->P);
-	if (ret < 0) {
-		kfree(state);
-		return ERR_PTR(ret);
+	if (!pll->new_div) {
+		ret = nva3_calc_pll(dev, &limits, khz, &N, NULL, &M, &P);
+		if (ret < 0)
+			return ERR_PTR(ret);
+
+		pll->new_pnm = (P << 16) | (N << 8) | M;
+		pll->new_div = 2 - 1;
+	} else {
+		pll->new_pnm = 0;
+		pll->new_div--;
 	}
 
-	return state;
+	if ((nv_rd32(dev, pll->src1) & 0x00000101) != 0x00000101)
+		pll->old_pnm = nv_rd32(dev, pll->coef);
+	return pll;
 }
 
 void
 nva3_pm_clock_set(struct drm_device *dev, void *pre_state)
 {
-	struct nva3_pm_state *state = pre_state;
-	u32 reg = state->pll.reg;
+	struct nva3_pm_state *pll = pre_state;
+	u32 ctrl = 0;
+
+	/* For the memory clock, NVIDIA will build a "script" describing
+	 * the reclocking process and ask PDAEMON to execute it.
+	 */
+	if (pll->type == PLL_MEMORY) {
+		nv_wr32(dev, 0x100210, 0);
+		nv_wr32(dev, 0x1002dc, 1);
+		nv_wr32(dev, 0x004018, 0x00001000);
+		ctrl = 0x18000100;
+	}
+
+	if (pll->old_pnm || !pll->new_pnm) {
+		nv_mask(dev, pll->src1, 0x003c0101, 0x00000101 |
+						    (pll->new_div << 18));
+		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
+		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
+	}
+
+	if (pll->new_pnm) {
+		nv_mask(dev, pll->src0, 0x00000101, 0x00000101);
+		nv_wr32(dev, pll->coef, pll->new_pnm);
+		nv_wr32(dev, pll->ctrl, 0x0001001d | ctrl);
+		nv_mask(dev, pll->ctrl, 0x00000010, 0x00000000);
+		nv_mask(dev, pll->ctrl, 0x00020010, 0x00020010);
+		nv_wr32(dev, pll->ctrl, 0x00010015 | ctrl);
+		nv_mask(dev, pll->src1, 0x00000100, 0x00000000);
+		nv_mask(dev, pll->src1, 0x00000001, 0x00000000);
+		if (pll->type == PLL_MEMORY)
+			nv_wr32(dev, 0x4018, 0x10005000);
+	} else {
+		nv_mask(dev, pll->ctrl, 0x00000001, 0x00000000);
+		nv_mask(dev, pll->src0, 0x00000100, 0x00000000);
+		nv_mask(dev, pll->src0, 0x00000001, 0x00000000);
+		if (pll->type == PLL_MEMORY)
+			nv_wr32(dev, 0x4018, 0x1000d000);
+	}
+
+	if (pll->type == PLL_MEMORY) {
+		nv_wr32(dev, 0x1002dc, 0);
+		nv_wr32(dev, 0x100210, 0x80000000);
+	}
 
-	nv_wr32(dev, reg + 4, (state->P << 16) | (state->N << 8) | state->M);
-	kfree(state);
+	kfree(pll);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.c b/drivers/gpu/drm/nouveau/nvc0_copy.c
new file mode 100644
index 000000000000..208fa7ab3f42
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_copy.c
@@ -0,0 +1,243 @@
+/*
+ * Copyright 2011 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+
+#include <linux/firmware.h>
+#include "drmP.h"
+#include "nouveau_drv.h"
+#include "nouveau_util.h"
+#include "nouveau_vm.h"
+#include "nouveau_ramht.h"
+#include "nvc0_copy.fuc.h"
+
+struct nvc0_copy_engine {
+	struct nouveau_exec_engine base;
+	u32 irq;
+	u32 pmc;
+	u32 fuc;
+	u32 ctx;
+};
+
+static int
+nvc0_copy_context_new(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(chan->dev, engine);
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_gpuobj *ramin = chan->ramin;
+	struct nouveau_gpuobj *ctx = NULL;
+	int ret;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 256, 256,
+				 NVOBJ_FLAG_VM | NVOBJ_FLAG_VM_USER |
+				 NVOBJ_FLAG_ZERO_ALLOC, &ctx);
+	if (ret)
+		return ret;
+
+	nv_wo32(ramin, pcopy->ctx + 0, lower_32_bits(ctx->vinst));
+	nv_wo32(ramin, pcopy->ctx + 4, upper_32_bits(ctx->vinst));
+	dev_priv->engine.instmem.flush(dev);
+
+	chan->engctx[engine] = ctx;
+	return 0;
+}
+
+static int
+nvc0_copy_object_new(struct nouveau_channel *chan, int engine,
+		     u32 handle, u16 class)
+{
+	return 0;
+}
+
+static void
+nvc0_copy_context_del(struct nouveau_channel *chan, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(chan->dev, engine);
+	struct nouveau_gpuobj *ctx = chan->engctx[engine];
+	struct drm_device *dev = chan->dev;
+	u32 inst;
+
+	inst  = (chan->ramin->vinst >> 12);
+	inst |= 0x40000000;
+
+	/* disable fifo access */
+	nv_wr32(dev, pcopy->fuc + 0x048, 0x00000000);
+	/* mark channel as unloaded if it's currently active */
+	if (nv_rd32(dev, pcopy->fuc + 0x050) == inst)
+		nv_mask(dev, pcopy->fuc + 0x050, 0x40000000, 0x00000000);
+	/* mark next channel as invalid if it's about to be loaded */
+	if (nv_rd32(dev, pcopy->fuc + 0x054) == inst)
+		nv_mask(dev, pcopy->fuc + 0x054, 0x40000000, 0x00000000);
+	/* restore fifo access */
+	nv_wr32(dev, pcopy->fuc + 0x048, 0x00000003);
+
+	nv_wo32(chan->ramin, pcopy->ctx + 0, 0x00000000);
+	nv_wo32(chan->ramin, pcopy->ctx + 4, 0x00000000);
+	nouveau_gpuobj_ref(NULL, &ctx);
+
+	chan->engctx[engine] = ctx;
+}
+
+static int
+nvc0_copy_init(struct drm_device *dev, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(dev, engine);
+	int i;
+
+	nv_mask(dev, 0x000200, pcopy->pmc, 0x00000000);
+	nv_mask(dev, 0x000200, pcopy->pmc, pcopy->pmc);
+	nv_wr32(dev, pcopy->fuc + 0x014, 0xffffffff);
+
+	nv_wr32(dev, pcopy->fuc + 0x1c0, 0x01000000);
+	for (i = 0; i < sizeof(nvc0_pcopy_data) / 4; i++)
+		nv_wr32(dev, pcopy->fuc + 0x1c4, nvc0_pcopy_data[i]);
+
+	nv_wr32(dev, pcopy->fuc + 0x180, 0x01000000);
+	for (i = 0; i < sizeof(nvc0_pcopy_code) / 4; i++) {
+		if ((i & 0x3f) == 0)
+			nv_wr32(dev, pcopy->fuc + 0x188, i >> 6);
+		nv_wr32(dev, pcopy->fuc + 0x184, nvc0_pcopy_code[i]);
+	}
+
+	nv_wr32(dev, pcopy->fuc + 0x084, engine - NVOBJ_ENGINE_COPY0);
+	nv_wr32(dev, pcopy->fuc + 0x10c, 0x00000000);
+	nv_wr32(dev, pcopy->fuc + 0x104, 0x00000000); /* ENTRY */
+	nv_wr32(dev, pcopy->fuc + 0x100, 0x00000002); /* TRIGGER */
+	return 0;
+}
+
+static int
+nvc0_copy_fini(struct drm_device *dev, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(dev, engine);
+
+	nv_mask(dev, pcopy->fuc + 0x048, 0x00000003, 0x00000000);
+
+	/* trigger fuc context unload */
+	nv_wait(dev, pcopy->fuc + 0x008, 0x0000000c, 0x00000000);
+	nv_mask(dev, pcopy->fuc + 0x054, 0x40000000, 0x00000000);
+	nv_wr32(dev, pcopy->fuc + 0x000, 0x00000008);
+	nv_wait(dev, pcopy->fuc + 0x008, 0x00000008, 0x00000000);
+
+	nv_wr32(dev, pcopy->fuc + 0x014, 0xffffffff);
+	return 0;
+}
+
+static struct nouveau_enum nvc0_copy_isr_error_name[] = {
+	{ 0x0001, "ILLEGAL_MTHD" },
+	{ 0x0002, "INVALID_ENUM" },
+	{ 0x0003, "INVALID_BITFIELD" },
+	{}
+};
+
+static void
+nvc0_copy_isr(struct drm_device *dev, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(dev, engine);
+	u32 disp = nv_rd32(dev, pcopy->fuc + 0x01c);
+	u32 stat = nv_rd32(dev, pcopy->fuc + 0x008) & disp & ~(disp >> 16);
+	u64 inst = (u64)(nv_rd32(dev, pcopy->fuc + 0x050) & 0x0fffffff) << 12;
+	u32 chid = nvc0_graph_isr_chid(dev, inst);
+	u32 ssta = nv_rd32(dev, pcopy->fuc + 0x040) & 0x0000ffff;
+	u32 addr = nv_rd32(dev, pcopy->fuc + 0x040) >> 16;
+	u32 mthd = (addr & 0x07ff) << 2;
+	u32 subc = (addr & 0x3800) >> 11;
+	u32 data = nv_rd32(dev, pcopy->fuc + 0x044);
+
+	if (stat & 0x00000040) {
+		NV_INFO(dev, "PCOPY: DISPATCH_ERROR [");
+		nouveau_enum_print(nvc0_copy_isr_error_name, ssta);
+		printk("] ch %d [0x%010llx] subc %d mthd 0x%04x data 0x%08x\n",
+			chid, inst, subc, mthd, data);
+		nv_wr32(dev, pcopy->fuc + 0x004, 0x00000040);
+		stat &= ~0x00000040;
+	}
+
+	if (stat) {
+		NV_INFO(dev, "PCOPY: unhandled intr 0x%08x\n", stat);
+		nv_wr32(dev, pcopy->fuc + 0x004, stat);
+	}
+}
+
+static void
+nvc0_copy_isr_0(struct drm_device *dev)
+{
+	nvc0_copy_isr(dev, NVOBJ_ENGINE_COPY0);
+}
+
+static void
+nvc0_copy_isr_1(struct drm_device *dev)
+{
+	nvc0_copy_isr(dev, NVOBJ_ENGINE_COPY1);
+}
+
+static void
+nvc0_copy_destroy(struct drm_device *dev, int engine)
+{
+	struct nvc0_copy_engine *pcopy = nv_engine(dev, engine);
+
+	nouveau_irq_unregister(dev, pcopy->irq);
+
+	if (engine == NVOBJ_ENGINE_COPY0)
+		NVOBJ_ENGINE_DEL(dev, COPY0);
+	else
+		NVOBJ_ENGINE_DEL(dev, COPY1);
+	kfree(pcopy);
+}
+
+int
+nvc0_copy_create(struct drm_device *dev, int engine)
+{
+	struct nvc0_copy_engine *pcopy;
+
+	pcopy = kzalloc(sizeof(*pcopy), GFP_KERNEL);
+	if (!pcopy)
+		return -ENOMEM;
+
+	pcopy->base.destroy = nvc0_copy_destroy;
+	pcopy->base.init = nvc0_copy_init;
+	pcopy->base.fini = nvc0_copy_fini;
+	pcopy->base.context_new = nvc0_copy_context_new;
+	pcopy->base.context_del = nvc0_copy_context_del;
+	pcopy->base.object_new = nvc0_copy_object_new;
+
+	if (engine == 0) {
+		pcopy->irq = 5;
+		pcopy->pmc = 0x00000040;
+		pcopy->fuc = 0x104000;
+		pcopy->ctx = 0x0230;
+		nouveau_irq_register(dev, pcopy->irq, nvc0_copy_isr_0);
+		NVOBJ_ENGINE_ADD(dev, COPY0, &pcopy->base);
+		NVOBJ_CLASS(dev, 0x90b5, COPY0);
+	} else {
+		pcopy->irq = 6;
+		pcopy->pmc = 0x00000080;
+		pcopy->fuc = 0x105000;
+		pcopy->ctx = 0x0240;
+		nouveau_irq_register(dev, pcopy->irq, nvc0_copy_isr_1);
+		NVOBJ_ENGINE_ADD(dev, COPY1, &pcopy->base);
+		NVOBJ_CLASS(dev, 0x90b8, COPY1);
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
new file mode 100644
index 000000000000..419903880e9d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvc0_copy.fuc.h
@@ -0,0 +1,527 @@
+uint32_t nvc0_pcopy_data[] = {
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00010000,
+	0x00000000,
+	0x00000000,
+	0x00010040,
+	0x0001019f,
+	0x00000000,
+	0x00010050,
+	0x000101a1,
+	0x00000000,
+	0x00070080,
+	0x0000001c,
+	0xfffff000,
+	0x00000020,
+	0xfff80000,
+	0x00000024,
+	0xffffe000,
+	0x00000028,
+	0xfffff800,
+	0x0000002c,
+	0xfffff000,
+	0x00000030,
+	0xfff80000,
+	0x00000034,
+	0xffffe000,
+	0x00070088,
+	0x00000048,
+	0xfffff000,
+	0x0000004c,
+	0xfff80000,
+	0x00000050,
+	0xffffe000,
+	0x00000054,
+	0xfffff800,
+	0x00000058,
+	0xfffff000,
+	0x0000005c,
+	0xfff80000,
+	0x00000060,
+	0xffffe000,
+	0x000200c0,
+	0x000104b8,
+	0x00000000,
+	0x00010541,
+	0x00000000,
+	0x000e00c3,
+	0x00000010,
+	0xffffff00,
+	0x00000014,
+	0x0000000f,
+	0x0000003c,
+	0xffffff00,
+	0x00000040,
+	0x0000000f,
+	0x00000018,
+	0xfff80000,
+	0x00000044,
+	0xfff80000,
+	0x00000074,
+	0xffff0000,
+	0x00000078,
+	0xffffe000,
+	0x00000068,
+	0xfccc0000,
+	0x0000006c,
+	0x00000000,
+	0x00000070,
+	0x00000000,
+	0x00000004,
+	0xffffff00,
+	0x00000008,
+	0x00000000,
+	0x0000000c,
+	0x00000000,
+	0x00000800,
+};
+
+uint32_t nvc0_pcopy_code[] = {
+	0x04fe04bd,
+	0x3517f000,
+	0xf10010fe,
+	0xf1040017,
+	0xf0fff327,
+	0x22d00023,
+	0x0c25f0c0,
+	0xf40012d0,
+	0x17f11031,
+	0x27f01200,
+	0x0012d003,
+	0xf40031f4,
+	0x0ef40028,
+	0x8001cffd,
+	0xf40812c4,
+	0x21f4060b,
+	0x0412c4ca,
+	0xf5070bf4,
+	0xc4010221,
+	0x01d00c11,
+	0xf101f840,
+	0xfe770047,
+	0x47f1004b,
+	0x44cf2100,
+	0x0144f000,
+	0xb60444b6,
+	0xf7f13040,
+	0xf4b6061c,
+	0x1457f106,
+	0x00f5d101,
+	0xb6043594,
+	0x57fe0250,
+	0x0145fe00,
+	0x010052b7,
+	0x00ff67f1,
+	0x56fd60bd,
+	0x0253f004,
+	0xf80545fa,
+	0x0053f003,
+	0xd100e7f0,
+	0x549800fe,
+	0x0845b600,
+	0xb6015698,
+	0x46fd1864,
+	0x0047fe05,
+	0xf00204b9,
+	0x01f40643,
+	0x0604fa09,
+	0xfa060ef4,
+	0x03f80504,
+	0x27f100f8,
+	0x23cf1400,
+	0x1e3fc800,
+	0xf4170bf4,
+	0x21f40132,
+	0x1e3af053,
+	0xf00023d0,
+	0x24d00147,
+	0xcf00f880,
+	0x3dc84023,
+	0x090bf41e,
+	0xf40131f4,
+	0x37f05321,
+	0x8023d002,
+	0x37f100f8,
+	0x32cf1900,
+	0x0033cf40,
+	0x07ff24e4,
+	0xf11024b6,
+	0xbd010057,
+	0x5874bd64,
+	0x57580056,
+	0x0450b601,
+	0xf40446b8,
+	0x76bb4d08,
+	0x0447b800,
+	0xbb0f08f4,
+	0x74b60276,
+	0x0057bb03,
+	0xbbdf0ef4,
+	0x44b60246,
+	0x0045bb03,
+	0xfd014598,
+	0x54b00453,
+	0x201bf400,
+	0x58004558,
+	0x64b00146,
+	0x091bf400,
+	0xf4005380,
+	0x32f4300e,
+	0xf455f901,
+	0x0ef40c01,
+	0x0225f025,
+	0xf10125f0,
+	0xd0100047,
+	0x43d00042,
+	0x4027f040,
+	0xcf0002d0,
+	0x24f08002,
+	0x0024b040,
+	0xf1f71bf4,
+	0xf01d0027,
+	0x23d00137,
+	0xf800f800,
+	0x0027f100,
+	0xf034bd22,
+	0x23d00233,
+	0xf400f800,
+	0x01b0f030,
+	0x0101b000,
+	0xb00201b0,
+	0x04980301,
+	0x3045c71a,
+	0xc70150b6,
+	0x60b63446,
+	0x3847c701,
+	0xf40170b6,
+	0x84bd0232,
+	0x4ac494bd,
+	0x0445b60f,
+	0xa430b4bd,
+	0x0f18f404,
+	0xbbc0a5ff,
+	0x31f400cb,
+	0x220ef402,
+	0xf00c1bf4,
+	0xcbbb10c7,
+	0x160ef400,
+	0xf406a430,
+	0xc7f00c18,
+	0x00cbbb14,
+	0xf1070ef4,
+	0x380080c7,
+	0x80b601c8,
+	0x01b0b601,
+	0xf404b5b8,
+	0x90b6c308,
+	0x0497b801,
+	0xfdb208f4,
+	0x06800065,
+	0x1d08980e,
+	0xf40068fd,
+	0x64bd0502,
+	0x800075fd,
+	0x78fd1907,
+	0x1057f100,
+	0x0654b608,
+	0xd00056d0,
+	0x50b74057,
+	0x06980800,
+	0x0162b619,
+	0x980864b6,
+	0x72b60e07,
+	0x0567fd01,
+	0xb70056d0,
+	0xb4010050,
+	0x56d00060,
+	0x0160b400,
+	0xb44056d0,
+	0x56d00260,
+	0x0360b480,
+	0xb7c056d0,
+	0x98040050,
+	0x56d01b06,
+	0x1c069800,
+	0xf44056d0,
+	0x00f81030,
+	0xc7075798,
+	0x78c76879,
+	0x0380b664,
+	0xb06077c7,
+	0x1bf40e76,
+	0x0477f009,
+	0xf00f0ef4,
+	0x70b6027c,
+	0x0947fd11,
+	0x980677f0,
+	0x5b980c5a,
+	0x00abfd0e,
+	0xbb01b7f0,
+	0xb2b604b7,
+	0xc4abff01,
+	0x9805a7bb,
+	0xe7f00d5d,
+	0x04e8bb01,
+	0xff01e2b6,
+	0xd8bbb4de,
+	0x01e0b605,
+	0xbb0cef94,
+	0xfefd02eb,
+	0x026cf005,
+	0x020860b7,
+	0xd00864b6,
+	0xb7bb006f,
+	0x00cbbb04,
+	0x98085f98,
+	0xfbfd0e5b,
+	0x01b7f000,
+	0xb604b7bb,
+	0xfbbb01b2,
+	0x05f7bb00,
+	0x5f98f0f9,
+	0x01b7f009,
+	0xb604b8bb,
+	0xfbbb01b2,
+	0x05f8bb00,
+	0x78bbf0f9,
+	0x0282b600,
+	0xbb01b7f0,
+	0xb9bb04b8,
+	0x0b589804,
+	0xbb01e7f0,
+	0xe2b604e9,
+	0xf48eff01,
+	0xbb04f7bb,
+	0x79bb00cf,
+	0x0589bb00,
+	0x90fcf0fc,
+	0xbb00d9fd,
+	0x89fd00ad,
+	0x008ffd00,
+	0xbb00a8bb,
+	0x92b604a7,
+	0x0497bb01,
+	0x988069d0,
+	0x58980557,
+	0x00acbb04,
+	0xb6007abb,
+	0x84b60081,
+	0x058bfd10,
+	0x060062b7,
+	0xb70067d0,
+	0xd0040060,
+	0x00f80068,
+	0xb7026cf0,
+	0xb6020260,
+	0x57980864,
+	0x0067d005,
+	0x040060b7,
+	0xb6045798,
+	0x67d01074,
+	0x0060b700,
+	0x06579804,
+	0xf80067d0,
+	0xf900f900,
+	0x0007f110,
+	0x0604b608,
+	0xf00001cf,
+	0x1bf40114,
+	0xfc10fcfa,
+	0xc800f800,
+	0x1bf40d34,
+	0xd121f570,
+	0x0c47f103,
+	0x0644b608,
+	0xb6020598,
+	0x45d00450,
+	0x4040d000,
+	0xd00c57f0,
+	0x40b78045,
+	0x05980400,
+	0x1054b601,
+	0xb70045d0,
+	0xf1050040,
+	0xf00b0057,
+	0x45d00153,
+	0x4057f100,
+	0x0154b640,
+	0x808053f1,
+	0xf14045d0,
+	0xf1111057,
+	0xd0131253,
+	0x57f18045,
+	0x53f11514,
+	0x45d01716,
+	0x0157f1c0,
+	0x0153f026,
+	0x080047f1,
+	0xd00644b6,
+	0x21f50045,
+	0x47f103d1,
+	0x44b6080c,
+	0x02059806,
+	0xd00045d0,
+	0x57f04040,
+	0x8045d004,
+	0x040040b7,
+	0xb6010598,
+	0x45d01054,
+	0x0040b700,
+	0x0057f105,
+	0x0045d003,
+	0x111057f1,
+	0x131253f1,
+	0x984045d0,
+	0x40b70305,
+	0x45d00500,
+	0x0157f100,
+	0x0153f026,
+	0x080047f1,
+	0xd00644b6,
+	0x00f80045,
+	0x03d121f5,
+	0xf4003fc8,
+	0x21f50e0b,
+	0x47f101af,
+	0x0ef40200,
+	0x1067f11e,
+	0x0664b608,
+	0x800177f0,
+	0x07800e07,
+	0x1d079819,
+	0xd00067d0,
+	0x44bd4067,
+	0xbd0232f4,
+	0x043fc854,
+	0xf50a0bf4,
+	0xf403a821,
+	0x21f50a0e,
+	0x49f0029c,
+	0x0231f407,
+	0xc82c57f0,
+	0x0bf4083f,
+	0xa821f50a,
+	0x0a0ef403,
+	0x029c21f5,
+	0xf10849f0,
+	0xb6080057,
+	0x06980654,
+	0x4056d01e,
+	0xf14167f0,
+	0xfd440063,
+	0x54d00546,
+	0x0c3fc800,
+	0xf5070bf4,
+	0xf803eb21,
+	0x0027f100,
+	0xf034bd22,
+	0x23d00133,
+	0x0000f800,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+	0x00000000,
+};
diff --git a/drivers/gpu/drm/nouveau/nvc0_fifo.c b/drivers/gpu/drm/nouveau/nvc0_fifo.c
index 2886f2726a9e..fb4f5943e01b 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fifo.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fifo.c
@@ -37,7 +37,7 @@ struct nvc0_fifo_priv {
 };
 
 struct nvc0_fifo_chan {
-	struct nouveau_bo *user;
+	struct nouveau_gpuobj *user;
 	struct nouveau_gpuobj *ramfc;
 };
 
@@ -106,7 +106,7 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
 	struct nvc0_fifo_priv *priv = pfifo->priv;
 	struct nvc0_fifo_chan *fifoch;
-	u64 ib_virt, user_vinst;
+	u64 ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
 	int ret;
 
 	chan->fifo_priv = kzalloc(sizeof(*fifoch), GFP_KERNEL);
@@ -115,28 +115,13 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 	fifoch = chan->fifo_priv;
 
 	/* allocate vram for control regs, map into polling area */
-	ret = nouveau_bo_new(dev, NULL, 0x1000, 0, TTM_PL_FLAG_VRAM,
-			     0, 0, &fifoch->user);
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 0x1000,
+				 NVOBJ_FLAG_ZERO_ALLOC, &fifoch->user);
 	if (ret)
 		goto error;
 
-	ret = nouveau_bo_pin(fifoch->user, TTM_PL_FLAG_VRAM);
-	if (ret) {
-		nouveau_bo_ref(NULL, &fifoch->user);
-		goto error;
-	}
-
-	user_vinst = fifoch->user->bo.mem.start << PAGE_SHIFT;
-
-	ret = nouveau_bo_map(fifoch->user);
-	if (ret) {
-		nouveau_bo_unpin(fifoch->user);
-		nouveau_bo_ref(NULL, &fifoch->user);
-		goto error;
-	}
-
 	nouveau_vm_map_at(&priv->user_vma, chan->id * 0x1000,
-			  fifoch->user->bo.mem.mm_node);
+			  *(struct nouveau_mem **)fifoch->user->node);
 
 	chan->user = ioremap_wc(pci_resource_start(dev->pdev, 1) +
 				priv->user_vma.offset + (chan->id * 0x1000),
@@ -146,20 +131,6 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 		goto error;
 	}
 
-	ib_virt = chan->pushbuf_base + chan->dma.ib_base * 4;
-
-	/* zero channel regs */
-	nouveau_bo_wr32(fifoch->user, 0x0040/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0044/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0048/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x004c/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0050/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0058/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x005c/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0060/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x0088/4, 0);
-	nouveau_bo_wr32(fifoch->user, 0x008c/4, 0);
-
 	/* ramfc */
 	ret = nouveau_gpuobj_new_fake(dev, chan->ramin->pinst,
 				      chan->ramin->vinst, 0x100,
@@ -167,8 +138,8 @@ nvc0_fifo_create_context(struct nouveau_channel *chan)
 	if (ret)
 		goto error;
 
-	nv_wo32(fifoch->ramfc, 0x08, lower_32_bits(user_vinst));
-	nv_wo32(fifoch->ramfc, 0x0c, upper_32_bits(user_vinst));
+	nv_wo32(fifoch->ramfc, 0x08, lower_32_bits(fifoch->user->vinst));
+	nv_wo32(fifoch->ramfc, 0x0c, upper_32_bits(fifoch->user->vinst));
 	nv_wo32(fifoch->ramfc, 0x10, 0x0000face);
 	nv_wo32(fifoch->ramfc, 0x30, 0xfffff902);
 	nv_wo32(fifoch->ramfc, 0x48, lower_32_bits(ib_virt));
@@ -223,11 +194,7 @@ nvc0_fifo_destroy_context(struct nouveau_channel *chan)
 		return;
 
 	nouveau_gpuobj_ref(NULL, &fifoch->ramfc);
-	if (fifoch->user) {
-		nouveau_bo_unmap(fifoch->user);
-		nouveau_bo_unpin(fifoch->user);
-		nouveau_bo_ref(NULL, &fifoch->user);
-	}
+	nouveau_gpuobj_ref(NULL, &fifoch->user);
 	kfree(fifoch);
 }
 
@@ -240,6 +207,21 @@ nvc0_fifo_load_context(struct nouveau_channel *chan)
 int
 nvc0_fifo_unload_context(struct drm_device *dev)
 {
+	int i;
+
+	for (i = 0; i < 128; i++) {
+		if (!(nv_rd32(dev, 0x003004 + (i * 4)) & 1))
+			continue;
+
+		nv_mask(dev, 0x003004 + (i * 4), 0x00000001, 0x00000000);
+		nv_wr32(dev, 0x002634, i);
+		if (!nv_wait(dev, 0x002634, 0xffffffff, i)) {
+			NV_INFO(dev, "PFIFO: kick ch %d failed: 0x%08x\n",
+				i, nv_rd32(dev, 0x002634));
+			return -EBUSY;
+		}
+	}
+
 	return 0;
 }
 
@@ -309,6 +291,7 @@ nvc0_fifo_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nouveau_channel *chan;
 	struct nvc0_fifo_priv *priv;
 	int ret, i;
 
@@ -351,23 +334,74 @@ nvc0_fifo_init(struct drm_device *dev)
 	nv_wr32(dev, 0x002a00, 0xffffffff); /* clears PFIFO.INTR bit 30 */
 	nv_wr32(dev, 0x002100, 0xffffffff);
 	nv_wr32(dev, 0x002140, 0xbfffffff);
+
+	/* restore PFIFO context table */
+	for (i = 0; i < 128; i++) {
+		chan = dev_priv->channels.ptr[i];
+		if (!chan || !chan->fifo_priv)
+			continue;
+
+		nv_wr32(dev, 0x003000 + (i * 8), 0xc0000000 |
+						 (chan->ramin->vinst >> 12));
+		nv_wr32(dev, 0x003004 + (i * 8), 0x001f0001);
+	}
+	nvc0_fifo_playlist_update(dev);
+
 	return 0;
 }
 
 struct nouveau_enum nvc0_fifo_fault_unit[] = {
-	{ 0, "PGRAPH" },
-	{ 3, "PEEPHOLE" },
-	{ 4, "BAR1" },
-	{ 5, "BAR3" },
-	{ 7, "PFIFO" },
+	{ 0x00, "PGRAPH" },
+	{ 0x03, "PEEPHOLE" },
+	{ 0x04, "BAR1" },
+	{ 0x05, "BAR3" },
+	{ 0x07, "PFIFO" },
+	{ 0x10, "PBSP" },
+	{ 0x11, "PPPP" },
+	{ 0x13, "PCOUNTER" },
+	{ 0x14, "PVP" },
+	{ 0x15, "PCOPY0" },
+	{ 0x16, "PCOPY1" },
+	{ 0x17, "PDAEMON" },
 	{}
 };
 
 struct nouveau_enum nvc0_fifo_fault_reason[] = {
-	{ 0, "PT_NOT_PRESENT" },
-	{ 1, "PT_TOO_SHORT" },
-	{ 2, "PAGE_NOT_PRESENT" },
-	{ 3, "VM_LIMIT_EXCEEDED" },
+	{ 0x00, "PT_NOT_PRESENT" },
+	{ 0x01, "PT_TOO_SHORT" },
+	{ 0x02, "PAGE_NOT_PRESENT" },
+	{ 0x03, "VM_LIMIT_EXCEEDED" },
+	{ 0x04, "NO_CHANNEL" },
+	{ 0x05, "PAGE_SYSTEM_ONLY" },
+	{ 0x06, "PAGE_READ_ONLY" },
+	{ 0x0a, "COMPRESSED_SYSRAM" },
+	{ 0x0c, "INVALID_STORAGE_TYPE" },
+	{}
+};
+
+struct nouveau_enum nvc0_fifo_fault_hubclient[] = {
+	{ 0x01, "PCOPY0" },
+	{ 0x02, "PCOPY1" },
+	{ 0x04, "DISPATCH" },
+	{ 0x05, "CTXCTL" },
+	{ 0x06, "PFIFO" },
+	{ 0x07, "BAR_READ" },
+	{ 0x08, "BAR_WRITE" },
+	{ 0x0b, "PVP" },
+	{ 0x0c, "PPPP" },
+	{ 0x0d, "PBSP" },
+	{ 0x11, "PCOUNTER" },
+	{ 0x12, "PDAEMON" },
+	{ 0x14, "CCACHE" },
+	{ 0x15, "CCACHE_POST" },
+	{}
+};
+
+struct nouveau_enum nvc0_fifo_fault_gpcclient[] = {
+	{ 0x01, "TEX" },
+	{ 0x0c, "ESETUP" },
+	{ 0x0e, "CTXCTL" },
+	{ 0x0f, "PROP" },
 	{}
 };
 
@@ -385,12 +419,20 @@ nvc0_fifo_isr_vm_fault(struct drm_device *dev, int unit)
 	u32 valo = nv_rd32(dev, 0x2804 + (unit * 0x10));
 	u32 vahi = nv_rd32(dev, 0x2808 + (unit * 0x10));
 	u32 stat = nv_rd32(dev, 0x280c + (unit * 0x10));
+	u32 client = (stat & 0x00001f00) >> 8;
 
 	NV_INFO(dev, "PFIFO: %s fault at 0x%010llx [",
 		(stat & 0x00000080) ? "write" : "read", (u64)vahi << 32 | valo);
 	nouveau_enum_print(nvc0_fifo_fault_reason, stat & 0x0000000f);
 	printk("] from ");
 	nouveau_enum_print(nvc0_fifo_fault_unit, unit);
+	if (stat & 0x00000040) {
+		printk("/");
+		nouveau_enum_print(nvc0_fifo_fault_hubclient, client);
+	} else {
+		printk("/GPC%d/", (stat & 0x1f000000) >> 24);
+		nouveau_enum_print(nvc0_fifo_fault_gpcclient, client);
+	}
 	printk(" on channel 0x%010llx\n", (u64)inst << 12);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.c b/drivers/gpu/drm/nouveau/nvc0_graph.c
index 3de9b721d8db..ca6db204d644 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.c
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.c
@@ -30,27 +30,40 @@
 #include "nouveau_mm.h"
 #include "nvc0_graph.h"
 
-static void nvc0_graph_isr(struct drm_device *);
-static void nvc0_runk140_isr(struct drm_device *);
-static int  nvc0_graph_unload_context_to(struct drm_device *dev, u64 chan);
-
-void
-nvc0_graph_fifo_access(struct drm_device *dev, bool enabled)
+static int
+nvc0_graph_load_context(struct nouveau_channel *chan)
 {
+	struct drm_device *dev = chan->dev;
+
+	nv_wr32(dev, 0x409840, 0x00000030);
+	nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->vinst >> 12);
+	nv_wr32(dev, 0x409504, 0x00000003);
+	if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
+		NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");
+
+	return 0;
 }
 
-struct nouveau_channel *
-nvc0_graph_channel(struct drm_device *dev)
+static int
+nvc0_graph_unload_context_to(struct drm_device *dev, u64 chan)
 {
-	return NULL;
+	nv_wr32(dev, 0x409840, 0x00000003);
+	nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12);
+	nv_wr32(dev, 0x409504, 0x00000009);
+	if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
+		NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
+		return -EBUSY;
+	}
+
+	return 0;
 }
 
 static int
 nvc0_graph_construct_context(struct nouveau_channel *chan)
 {
 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
-	struct nvc0_graph_chan *grch = chan->pgraph_ctx;
+	struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
 	struct drm_device *dev = chan->dev;
 	int ret, i;
 	u32 *ctx;
@@ -89,9 +102,8 @@ nvc0_graph_construct_context(struct nouveau_channel *chan)
 static int
 nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
-	struct nvc0_graph_chan *grch = chan->pgraph_ctx;
+	struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
 	struct drm_device *dev = chan->dev;
 	int i = 0, gpc, tp, ret;
 	u32 magic;
@@ -158,29 +170,27 @@ nvc0_graph_create_context_mmio_list(struct nouveau_channel *chan)
 	return 0;
 }
 
-int
-nvc0_graph_create_context(struct nouveau_channel *chan)
+static int
+nvc0_graph_context_new(struct nouveau_channel *chan, int engine)
 {
-	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
+	struct drm_device *dev = chan->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_instmem_engine *pinstmem = &dev_priv->engine.instmem;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nvc0_graph_priv *priv = pgraph->priv;
+	struct nvc0_graph_priv *priv = nv_engine(dev, engine);
 	struct nvc0_graph_chan *grch;
-	struct drm_device *dev = chan->dev;
 	struct nouveau_gpuobj *grctx;
 	int ret, i;
 
-	chan->pgraph_ctx = kzalloc(sizeof(*grch), GFP_KERNEL);
-	if (!chan->pgraph_ctx)
+	grch = kzalloc(sizeof(*grch), GFP_KERNEL);
+	if (!grch)
 		return -ENOMEM;
-	grch = chan->pgraph_ctx;
+	chan->engctx[NVOBJ_ENGINE_GR] = grch;
 
 	ret = nouveau_gpuobj_new(dev, NULL, priv->grctx_size, 256,
 				 NVOBJ_FLAG_VM | NVOBJ_FLAG_ZERO_ALLOC,
 				 &grch->grctx);
 	if (ret)
 		goto error;
-	chan->ramin_grctx = grch->grctx;
 	grctx = grch->grctx;
 
 	ret = nvc0_graph_create_context_mmio_list(chan);
@@ -200,104 +210,49 @@ nvc0_graph_create_context(struct nouveau_channel *chan)
 	for (i = 0; i < priv->grctx_size; i += 4)
 		nv_wo32(grctx, i, priv->grctx_vals[i / 4]);
 
-        nv_wo32(grctx, 0xf4, 0);
-        nv_wo32(grctx, 0xf8, 0);
-        nv_wo32(grctx, 0x10, grch->mmio_nr);
-        nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->vinst));
-        nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->vinst));
-        nv_wo32(grctx, 0x1c, 1);
-        nv_wo32(grctx, 0x20, 0);
-        nv_wo32(grctx, 0x28, 0);
-        nv_wo32(grctx, 0x2c, 0);
+	nv_wo32(grctx, 0xf4, 0);
+	nv_wo32(grctx, 0xf8, 0);
+	nv_wo32(grctx, 0x10, grch->mmio_nr);
+	nv_wo32(grctx, 0x14, lower_32_bits(grch->mmio->vinst));
+	nv_wo32(grctx, 0x18, upper_32_bits(grch->mmio->vinst));
+	nv_wo32(grctx, 0x1c, 1);
+	nv_wo32(grctx, 0x20, 0);
+	nv_wo32(grctx, 0x28, 0);
+	nv_wo32(grctx, 0x2c, 0);
 	pinstmem->flush(dev);
 	return 0;
 
 error:
-	pgraph->destroy_context(chan);
+	priv->base.context_del(chan, engine);
 	return ret;
 }
 
-void
-nvc0_graph_destroy_context(struct nouveau_channel *chan)
+static void
+nvc0_graph_context_del(struct nouveau_channel *chan, int engine)
 {
-	struct nvc0_graph_chan *grch;
-
-	grch = chan->pgraph_ctx;
-	chan->pgraph_ctx = NULL;
-	if (!grch)
-		return;
+	struct nvc0_graph_chan *grch = chan->engctx[engine];
 
 	nouveau_gpuobj_ref(NULL, &grch->mmio);
 	nouveau_gpuobj_ref(NULL, &grch->unk418810);
 	nouveau_gpuobj_ref(NULL, &grch->unk40800c);
 	nouveau_gpuobj_ref(NULL, &grch->unk408004);
 	nouveau_gpuobj_ref(NULL, &grch->grctx);
-	chan->ramin_grctx = NULL;
+	chan->engctx[engine] = NULL;
 }
 
-int
-nvc0_graph_load_context(struct nouveau_channel *chan)
+static int
+nvc0_graph_object_new(struct nouveau_channel *chan, int engine,
+		      u32 handle, u16 class)
 {
-	struct drm_device *dev = chan->dev;
-
-	nv_wr32(dev, 0x409840, 0x00000030);
-	nv_wr32(dev, 0x409500, 0x80000000 | chan->ramin->vinst >> 12);
-	nv_wr32(dev, 0x409504, 0x00000003);
-	if (!nv_wait(dev, 0x409800, 0x00000010, 0x00000010))
-		NV_ERROR(dev, "PGRAPH: load_ctx timeout\n");
-
 	return 0;
 }
 
 static int
-nvc0_graph_unload_context_to(struct drm_device *dev, u64 chan)
+nvc0_graph_fini(struct drm_device *dev, int engine)
 {
-	nv_wr32(dev, 0x409840, 0x00000003);
-	nv_wr32(dev, 0x409500, 0x80000000 | chan >> 12);
-	nv_wr32(dev, 0x409504, 0x00000009);
-	if (!nv_wait(dev, 0x409800, 0x00000001, 0x00000000)) {
-		NV_ERROR(dev, "PGRAPH: unload_ctx timeout\n");
-		return -EBUSY;
-	}
-
 	return 0;
 }
 
-int
-nvc0_graph_unload_context(struct drm_device *dev)
-{
-	u64 inst = (u64)(nv_rd32(dev, 0x409b00) & 0x0fffffff) << 12;
-	return nvc0_graph_unload_context_to(dev, inst);
-}
-
-static void
-nvc0_graph_destroy(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nvc0_graph_priv *priv;
-
-	priv = pgraph->priv;
-	if (!priv)
-		return;
-
-	nouveau_irq_unregister(dev, 12);
-	nouveau_irq_unregister(dev, 25);
-
-	nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
-	nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
-
-	if (priv->grctx_vals)
-		kfree(priv->grctx_vals);
-	kfree(priv);
-}
-
-void
-nvc0_graph_takedown(struct drm_device *dev)
-{
-	nvc0_graph_destroy(dev);
-}
-
 static int
 nvc0_graph_mthd_page_flip(struct nouveau_channel *chan,
 			  u32 class, u32 mthd, u32 data)
@@ -306,119 +261,10 @@ nvc0_graph_mthd_page_flip(struct nouveau_channel *chan,
 	return 0;
 }
 
-static int
-nvc0_graph_create(struct drm_device *dev)
-{
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nvc0_graph_priv *priv;
-	int ret, gpc, i;
-
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-	pgraph->priv = priv;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
-	if (ret)
-		goto error;
-
-	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b8);
-	if (ret)
-		goto error;
-
-	for (i = 0; i < 0x1000; i += 4) {
-		nv_wo32(priv->unk4188b4, i, 0x00000010);
-		nv_wo32(priv->unk4188b8, i, 0x00000010);
-	}
-
-	priv->gpc_nr  =  nv_rd32(dev, 0x409604) & 0x0000001f;
-	priv->rop_nr = (nv_rd32(dev, 0x409604) & 0x001f0000) >> 16;
-	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
-		priv->tp_nr[gpc] = nv_rd32(dev, GPC_UNIT(gpc, 0x2608));
-		priv->tp_total += priv->tp_nr[gpc];
-	}
-
-	/*XXX: these need figuring out... */
-	switch (dev_priv->chipset) {
-	case 0xc0:
-		if (priv->tp_total == 11) { /* 465, 3/4/4/0, 4 */
-			priv->magic_not_rop_nr = 0x07;
-			/* filled values up to tp_total, the rest 0 */
-			priv->magicgpc980[0]   = 0x22111000;
-			priv->magicgpc980[1]   = 0x00000233;
-			priv->magicgpc980[2]   = 0x00000000;
-			priv->magicgpc980[3]   = 0x00000000;
-			priv->magicgpc918      = 0x000ba2e9;
-		} else
-		if (priv->tp_total == 14) { /* 470, 3/3/4/4, 5 */
-			priv->magic_not_rop_nr = 0x05;
-			priv->magicgpc980[0]   = 0x11110000;
-			priv->magicgpc980[1]   = 0x00233222;
-			priv->magicgpc980[2]   = 0x00000000;
-			priv->magicgpc980[3]   = 0x00000000;
-			priv->magicgpc918      = 0x00092493;
-		} else
-		if (priv->tp_total == 15) { /* 480, 3/4/4/4, 6 */
-			priv->magic_not_rop_nr = 0x06;
-			priv->magicgpc980[0]   = 0x11110000;
-			priv->magicgpc980[1]   = 0x03332222;
-			priv->magicgpc980[2]   = 0x00000000;
-			priv->magicgpc980[3]   = 0x00000000;
-			priv->magicgpc918      = 0x00088889;
-		}
-		break;
-	case 0xc3: /* 450, 4/0/0/0, 2 */
-		priv->magic_not_rop_nr = 0x03;
-		priv->magicgpc980[0]   = 0x00003210;
-		priv->magicgpc980[1]   = 0x00000000;
-		priv->magicgpc980[2]   = 0x00000000;
-		priv->magicgpc980[3]   = 0x00000000;
-		priv->magicgpc918      = 0x00200000;
-		break;
-	case 0xc4: /* 460, 3/4/0/0, 4 */
-		priv->magic_not_rop_nr = 0x01;
-		priv->magicgpc980[0]   = 0x02321100;
-		priv->magicgpc980[1]   = 0x00000000;
-		priv->magicgpc980[2]   = 0x00000000;
-		priv->magicgpc980[3]   = 0x00000000;
-		priv->magicgpc918      = 0x00124925;
-		break;
-	}
-
-	if (!priv->magic_not_rop_nr) {
-		NV_ERROR(dev, "PGRAPH: unknown config: %d/%d/%d/%d, %d\n",
-			 priv->tp_nr[0], priv->tp_nr[1], priv->tp_nr[2],
-			 priv->tp_nr[3], priv->rop_nr);
-		/* use 0xc3's values... */
-		priv->magic_not_rop_nr = 0x03;
-		priv->magicgpc980[0]   = 0x00003210;
-		priv->magicgpc980[1]   = 0x00000000;
-		priv->magicgpc980[2]   = 0x00000000;
-		priv->magicgpc980[3]   = 0x00000000;
-		priv->magicgpc918      = 0x00200000;
-	}
-
-	nouveau_irq_register(dev, 12, nvc0_graph_isr);
-	nouveau_irq_register(dev, 25, nvc0_runk140_isr);
-	NVOBJ_CLASS(dev, 0x902d, GR); /* 2D */
-	NVOBJ_CLASS(dev, 0x9039, GR); /* M2MF */
-	NVOBJ_MTHD (dev, 0x9039, 0x0500, nvc0_graph_mthd_page_flip);
-	NVOBJ_CLASS(dev, 0x9097, GR); /* 3D */
-	NVOBJ_CLASS(dev, 0x90c0, GR); /* COMPUTE */
-	return 0;
-
-error:
-	nvc0_graph_destroy(dev);
-	return ret;
-}
-
 static void
 nvc0_graph_init_obj418880(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
-	struct nvc0_graph_priv *priv = pgraph->priv;
+	struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
 	int i;
 
 	nv_wr32(dev, GPC_BCAST(0x0880), 0x00000000);
@@ -449,35 +295,42 @@ nvc0_graph_init_regs(struct drm_device *dev)
 static void
 nvc0_graph_init_gpc_0(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
-	int gpc;
-	
-	//      TP      ROP UNKVAL(magic_not_rop_nr)
-	// 450: 4/0/0/0 2        3
-	// 460: 3/4/0/0 4        1
-	// 465: 3/4/4/0 4        7
-	// 470: 3/3/4/4 5        5
-	// 480: 3/4/4/4 6        6
-
-	// magicgpc918
-	// 450: 00200000 00000000001000000000000000000000
-	// 460: 00124925 00000000000100100100100100100101
-	// 465: 000ba2e9 00000000000010111010001011101001
-	// 470: 00092493 00000000000010010010010010010011
-	// 480: 00088889 00000000000010001000100010001001
-
-	/* filled values up to tp_total, remainder 0 */
-	// 450: 00003210 00000000 00000000 00000000
-	// 460: 02321100 00000000 00000000 00000000
-	// 465: 22111000 00000233 00000000 00000000
-	// 470: 11110000 00233222 00000000 00000000
-	// 480: 11110000 03332222 00000000 00000000
-	
-	nv_wr32(dev, GPC_BCAST(0x0980), priv->magicgpc980[0]);
-	nv_wr32(dev, GPC_BCAST(0x0984), priv->magicgpc980[1]);
-	nv_wr32(dev, GPC_BCAST(0x0988), priv->magicgpc980[2]);
-	nv_wr32(dev, GPC_BCAST(0x098c), priv->magicgpc980[3]);
+	struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
+	u32 data[TP_MAX / 8];
+	u8  tpnr[GPC_MAX];
+	int i, gpc, tpc;
+
+	/*
+	 *      TP      ROP UNKVAL(magic_not_rop_nr)
+	 * 450: 4/0/0/0 2        3
+	 * 460: 3/4/0/0 4        1
+	 * 465: 3/4/4/0 4        7
+	 * 470: 3/3/4/4 5        5
+	 * 480: 3/4/4/4 6        6
+	 *
+	 * magicgpc918
+	 * 450: 00200000 00000000001000000000000000000000
+	 * 460: 00124925 00000000000100100100100100100101
+	 * 465: 000ba2e9 00000000000010111010001011101001
+	 * 470: 00092493 00000000000010010010010010010011
+	 * 480: 00088889 00000000000010001000100010001001
+	 */
+
+	memset(data, 0x00, sizeof(data));
+	memcpy(tpnr, priv->tp_nr, sizeof(priv->tp_nr));
+	for (i = 0, gpc = -1; i < priv->tp_total; i++) {
+		do {
+			gpc = (gpc + 1) % priv->gpc_nr;
+		} while (!tpnr[gpc]);
+		tpc = priv->tp_nr[gpc] - tpnr[gpc]--;
+
+		data[i / 8] |= tpc << ((i % 8) * 4);
+	}
+
+	nv_wr32(dev, GPC_BCAST(0x0980), data[0]);
+	nv_wr32(dev, GPC_BCAST(0x0984), data[1]);
+	nv_wr32(dev, GPC_BCAST(0x0988), data[2]);
+	nv_wr32(dev, GPC_BCAST(0x098c), data[3]);
 
 	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
 		nv_wr32(dev, GPC_UNIT(gpc, 0x0914), priv->magic_not_rop_nr << 8 |
@@ -509,8 +362,7 @@ nvc0_graph_init_units(struct drm_device *dev)
 static void
 nvc0_graph_init_gpc_1(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
+	struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
 	int gpc, tp;
 
 	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
@@ -535,8 +387,7 @@ nvc0_graph_init_gpc_1(struct drm_device *dev)
 static void
 nvc0_graph_init_rop(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
+	struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
 	int rop;
 
 	for (rop = 0; rop < priv->rop_nr; rop++) {
@@ -547,62 +398,36 @@ nvc0_graph_init_rop(struct drm_device *dev)
 	}
 }
 
-static int
-nvc0_fuc_load_fw(struct drm_device *dev, u32 fuc_base,
-		 const char *code_fw, const char *data_fw)
+static void
+nvc0_graph_init_fuc(struct drm_device *dev, u32 fuc_base,
+		    struct nvc0_graph_fuc *code, struct nvc0_graph_fuc *data)
 {
-	const struct firmware *fw;
-	char name[32];
-	int ret, i;
-
-	snprintf(name, sizeof(name), "nouveau/%s", data_fw);
-	ret = request_firmware(&fw, name, &dev->pdev->dev);
-	if (ret) {
-		NV_ERROR(dev, "failed to load %s\n", data_fw);
-		return ret;
-	}
+	int i;
 
 	nv_wr32(dev, fuc_base + 0x01c0, 0x01000000);
-	for (i = 0; i < fw->size / 4; i++)
-		nv_wr32(dev, fuc_base + 0x01c4, ((u32 *)fw->data)[i]);
-	release_firmware(fw);
-
-	snprintf(name, sizeof(name), "nouveau/%s", code_fw);
-	ret = request_firmware(&fw, name, &dev->pdev->dev);
-	if (ret) {
-		NV_ERROR(dev, "failed to load %s\n", code_fw);
-		return ret;
-	}
+	for (i = 0; i < data->size / 4; i++)
+		nv_wr32(dev, fuc_base + 0x01c4, data->data[i]);
 
 	nv_wr32(dev, fuc_base + 0x0180, 0x01000000);
-	for (i = 0; i < fw->size / 4; i++) {
+	for (i = 0; i < code->size / 4; i++) {
 		if ((i & 0x3f) == 0)
 			nv_wr32(dev, fuc_base + 0x0188, i >> 6);
-		nv_wr32(dev, fuc_base + 0x0184, ((u32 *)fw->data)[i]);
+		nv_wr32(dev, fuc_base + 0x0184, code->data[i]);
 	}
-	release_firmware(fw);
-
-	return 0;
 }
 
 static int
 nvc0_graph_init_ctxctl(struct drm_device *dev)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
+	struct nvc0_graph_priv *priv = nv_engine(dev, NVOBJ_ENGINE_GR);
 	u32 r000260;
-	int ret;
 
 	/* load fuc microcode */
 	r000260 = nv_mask(dev, 0x000260, 0x00000001, 0x00000000);
-	ret = nvc0_fuc_load_fw(dev, 0x409000, "fuc409c", "fuc409d");
-	if (ret == 0)
-		ret = nvc0_fuc_load_fw(dev, 0x41a000, "fuc41ac", "fuc41ad");
+	nvc0_graph_init_fuc(dev, 0x409000, &priv->fuc409c, &priv->fuc409d);
+	nvc0_graph_init_fuc(dev, 0x41a000, &priv->fuc41ac, &priv->fuc41ad);
 	nv_wr32(dev, 0x000260, r000260);
 
-	if (ret)
-		return ret;
-
 	/* start both of them running */
 	nv_wr32(dev, 0x409840, 0xffffffff);
 	nv_wr32(dev, 0x41a10c, 0x00000000);
@@ -644,41 +469,19 @@ nvc0_graph_init_ctxctl(struct drm_device *dev)
 	return 0;
 }
 
-int
-nvc0_graph_init(struct drm_device *dev)
+static int
+nvc0_graph_init(struct drm_device *dev, int engine)
 {
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
 	int ret;
 
-	dev_priv->engine.graph.accel_blocked = true;
-
-	switch (dev_priv->chipset) {
-	case 0xc0:
-	case 0xc3:
-	case 0xc4:
-		break;
-	default:
-		NV_ERROR(dev, "PGRAPH: unsupported chipset, please report!\n");
-		if (nouveau_noaccel != 0)
-			return 0;
-		break;
-	}
-
 	nv_mask(dev, 0x000200, 0x18001000, 0x00000000);
 	nv_mask(dev, 0x000200, 0x18001000, 0x18001000);
 
-	if (!pgraph->priv) {
-		ret = nvc0_graph_create(dev);
-		if (ret)
-			return ret;
-	}
-
 	nvc0_graph_init_obj418880(dev);
 	nvc0_graph_init_regs(dev);
-	//nvc0_graph_init_unitplemented_magics(dev);
+	/*nvc0_graph_init_unitplemented_magics(dev);*/
 	nvc0_graph_init_gpc_0(dev);
-	//nvc0_graph_init_unitplemented_c242(dev);
+	/*nvc0_graph_init_unitplemented_c242(dev);*/
 
 	nv_wr32(dev, 0x400500, 0x00010001);
 	nv_wr32(dev, 0x400100, 0xffffffff);
@@ -697,12 +500,13 @@ nvc0_graph_init(struct drm_device *dev)
 	nv_wr32(dev, 0x400054, 0x34ce3464);
 
 	ret = nvc0_graph_init_ctxctl(dev);
-	if (ret == 0)
-		dev_priv->engine.graph.accel_blocked = false;
+	if (ret)
+		return ret;
+
 	return 0;
 }
 
-static int
+int
 nvc0_graph_isr_chid(struct drm_device *dev, u64 inst)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -806,3 +610,187 @@ nvc0_runk140_isr(struct drm_device *dev)
 		units &= ~(1 << unit);
 	}
 }
+
+static int
+nvc0_graph_create_fw(struct drm_device *dev, const char *fwname,
+		     struct nvc0_graph_fuc *fuc)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	const struct firmware *fw;
+	char f[32];
+	int ret;
+
+	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", dev_priv->chipset, fwname);
+	ret = request_firmware(&fw, f, &dev->pdev->dev);
+	if (ret) {
+		snprintf(f, sizeof(f), "nouveau/%s", fwname);
+		ret = request_firmware(&fw, f, &dev->pdev->dev);
+		if (ret) {
+			NV_ERROR(dev, "failed to load %s\n", fwname);
+			return ret;
+		}
+	}
+
+	fuc->size = fw->size;
+	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
+	release_firmware(fw);
+	return (fuc->data != NULL) ? 0 : -ENOMEM;
+}
+
+static void
+nvc0_graph_destroy_fw(struct nvc0_graph_fuc *fuc)
+{
+	if (fuc->data) {
+		kfree(fuc->data);
+		fuc->data = NULL;
+	}
+}
+
+static void
+nvc0_graph_destroy(struct drm_device *dev, int engine)
+{
+	struct nvc0_graph_priv *priv = nv_engine(dev, engine);
+
+	nvc0_graph_destroy_fw(&priv->fuc409c);
+	nvc0_graph_destroy_fw(&priv->fuc409d);
+	nvc0_graph_destroy_fw(&priv->fuc41ac);
+	nvc0_graph_destroy_fw(&priv->fuc41ad);
+
+	nouveau_irq_unregister(dev, 12);
+	nouveau_irq_unregister(dev, 25);
+
+	nouveau_gpuobj_ref(NULL, &priv->unk4188b8);
+	nouveau_gpuobj_ref(NULL, &priv->unk4188b4);
+
+	if (priv->grctx_vals)
+		kfree(priv->grctx_vals);
+
+	NVOBJ_ENGINE_DEL(dev, GR);
+	kfree(priv);
+}
+
+int
+nvc0_graph_create(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nvc0_graph_priv *priv;
+	int ret, gpc, i;
+
+	switch (dev_priv->chipset) {
+	case 0xc0:
+	case 0xc3:
+	case 0xc4:
+		break;
+	default:
+		NV_ERROR(dev, "PGRAPH: unsupported chipset, please report!\n");
+		return 0;
+	}
+
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->base.destroy = nvc0_graph_destroy;
+	priv->base.init = nvc0_graph_init;
+	priv->base.fini = nvc0_graph_fini;
+	priv->base.context_new = nvc0_graph_context_new;
+	priv->base.context_del = nvc0_graph_context_del;
+	priv->base.object_new = nvc0_graph_object_new;
+
+	NVOBJ_ENGINE_ADD(dev, GR, &priv->base);
+	nouveau_irq_register(dev, 12, nvc0_graph_isr);
+	nouveau_irq_register(dev, 25, nvc0_runk140_isr);
+
+	if (nvc0_graph_create_fw(dev, "fuc409c", &priv->fuc409c) ||
+	    nvc0_graph_create_fw(dev, "fuc409d", &priv->fuc409d) ||
+	    nvc0_graph_create_fw(dev, "fuc41ac", &priv->fuc41ac) ||
+	    nvc0_graph_create_fw(dev, "fuc41ad", &priv->fuc41ad)) {
+		ret = 0;
+		goto error;
+	}
+
+
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b4);
+	if (ret)
+		goto error;
+
+	ret = nouveau_gpuobj_new(dev, NULL, 0x1000, 256, 0, &priv->unk4188b8);
+	if (ret)
+		goto error;
+
+	for (i = 0; i < 0x1000; i += 4) {
+		nv_wo32(priv->unk4188b4, i, 0x00000010);
+		nv_wo32(priv->unk4188b8, i, 0x00000010);
+	}
+
+	priv->gpc_nr  =  nv_rd32(dev, 0x409604) & 0x0000001f;
+	priv->rop_nr = (nv_rd32(dev, 0x409604) & 0x001f0000) >> 16;
+	for (gpc = 0; gpc < priv->gpc_nr; gpc++) {
+		priv->tp_nr[gpc] = nv_rd32(dev, GPC_UNIT(gpc, 0x2608));
+		priv->tp_total += priv->tp_nr[gpc];
+	}
+
+	/*XXX: these need figuring out... */
+	switch (dev_priv->chipset) {
+	case 0xc0:
+		if (priv->tp_total == 11) { /* 465, 3/4/4/0, 4 */
+			priv->magic_not_rop_nr = 0x07;
+			/* filled values up to tp_total, the rest 0 */
+			priv->magicgpc918      = 0x000ba2e9;
+		} else
+		if (priv->tp_total == 14) { /* 470, 3/3/4/4, 5 */
+			priv->magic_not_rop_nr = 0x05;
+			priv->magicgpc918      = 0x00092493;
+		} else
+		if (priv->tp_total == 15) { /* 480, 3/4/4/4, 6 */
+			priv->magic_not_rop_nr = 0x06;
+			priv->magicgpc918      = 0x00088889;
+		}
+		break;
+	case 0xc3: /* 450, 4/0/0/0, 2 */
+		priv->magic_not_rop_nr = 0x03;
+		priv->magicgpc918      = 0x00200000;
+		break;
+	case 0xc4: /* 460, 3/4/0/0, 4 */
+		priv->magic_not_rop_nr = 0x01;
+		priv->magicgpc918      = 0x00124925;
+		break;
+	}
+
+	if (!priv->magic_not_rop_nr) {
+		NV_ERROR(dev, "PGRAPH: unknown config: %d/%d/%d/%d, %d\n",
+			 priv->tp_nr[0], priv->tp_nr[1], priv->tp_nr[2],
+			 priv->tp_nr[3], priv->rop_nr);
+		/* use 0xc3's values... */
+		priv->magic_not_rop_nr = 0x03;
+		priv->magicgpc918      = 0x00200000;
+	}
+
+	NVOBJ_CLASS(dev, 0x902d, GR); /* 2D */
+	NVOBJ_CLASS(dev, 0x9039, GR); /* M2MF */
+	NVOBJ_MTHD (dev, 0x9039, 0x0500, nvc0_graph_mthd_page_flip);
+	NVOBJ_CLASS(dev, 0x9097, GR); /* 3D */
+	NVOBJ_CLASS(dev, 0x90c0, GR); /* COMPUTE */
+	return 0;
+
+error:
+	nvc0_graph_destroy(dev, NVOBJ_ENGINE_GR);
+	return ret;
+}
+
+MODULE_FIRMWARE("nouveau/nvc0_fuc409c");
+MODULE_FIRMWARE("nouveau/nvc0_fuc409d");
+MODULE_FIRMWARE("nouveau/nvc0_fuc41ac");
+MODULE_FIRMWARE("nouveau/nvc0_fuc41ad");
+MODULE_FIRMWARE("nouveau/nvc3_fuc409c");
+MODULE_FIRMWARE("nouveau/nvc3_fuc409d");
+MODULE_FIRMWARE("nouveau/nvc3_fuc41ac");
+MODULE_FIRMWARE("nouveau/nvc3_fuc41ad");
+MODULE_FIRMWARE("nouveau/nvc4_fuc409c");
+MODULE_FIRMWARE("nouveau/nvc4_fuc409d");
+MODULE_FIRMWARE("nouveau/nvc4_fuc41ac");
+MODULE_FIRMWARE("nouveau/nvc4_fuc41ad");
+MODULE_FIRMWARE("nouveau/fuc409c");
+MODULE_FIRMWARE("nouveau/fuc409d");
+MODULE_FIRMWARE("nouveau/fuc41ac");
+MODULE_FIRMWARE("nouveau/fuc41ad");
diff --git a/drivers/gpu/drm/nouveau/nvc0_graph.h b/drivers/gpu/drm/nouveau/nvc0_graph.h
index 40e26f9c56c4..f5d184e0689d 100644
--- a/drivers/gpu/drm/nouveau/nvc0_graph.h
+++ b/drivers/gpu/drm/nouveau/nvc0_graph.h
@@ -28,13 +28,25 @@
 #define GPC_MAX 4
 #define TP_MAX 32
 
-#define ROP_BCAST(r)   (0x408800 + (r))
-#define ROP_UNIT(u,r)  (0x410000 + (u) * 0x400 + (r))
-#define GPC_BCAST(r)   (0x418000 + (r))
-#define GPC_UNIT(t,r)  (0x500000 + (t) * 0x8000 + (r))
-#define TP_UNIT(t,m,r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
+#define ROP_BCAST(r)     (0x408800 + (r))
+#define ROP_UNIT(u, r)   (0x410000 + (u) * 0x400 + (r))
+#define GPC_BCAST(r)     (0x418000 + (r))
+#define GPC_UNIT(t, r)   (0x500000 + (t) * 0x8000 + (r))
+#define TP_UNIT(t, m, r) (0x504000 + (t) * 0x8000 + (m) * 0x800 + (r))
+
+struct nvc0_graph_fuc {
+	u32 *data;
+	u32  size;
+};
 
 struct nvc0_graph_priv {
+	struct nouveau_exec_engine base;
+
+	struct nvc0_graph_fuc fuc409c;
+	struct nvc0_graph_fuc fuc409d;
+	struct nvc0_graph_fuc fuc41ac;
+	struct nvc0_graph_fuc fuc41ad;
+
 	u8 gpc_nr;
 	u8 rop_nr;
 	u8 tp_nr[GPC_MAX];
@@ -46,15 +58,14 @@ struct nvc0_graph_priv {
 	struct nouveau_gpuobj *unk4188b8;
 
 	u8  magic_not_rop_nr;
-	u32 magicgpc980[4];
 	u32 magicgpc918;
 };
 
 struct nvc0_graph_chan {
 	struct nouveau_gpuobj *grctx;
-	struct nouveau_gpuobj *unk408004; // 0x418810 too
-	struct nouveau_gpuobj *unk40800c; // 0x419004 too
-	struct nouveau_gpuobj *unk418810; // 0x419848 too
+	struct nouveau_gpuobj *unk408004; /* 0x418810 too */
+	struct nouveau_gpuobj *unk40800c; /* 0x419004 too */
+	struct nouveau_gpuobj *unk418810; /* 0x419848 too */
 	struct nouveau_gpuobj *mmio;
 	int mmio_nr;
 };
diff --git a/drivers/gpu/drm/nouveau/nvc0_grctx.c b/drivers/gpu/drm/nouveau/nvc0_grctx.c
index f880ff776db8..6df066114133 100644
--- a/drivers/gpu/drm/nouveau/nvc0_grctx.c
+++ b/drivers/gpu/drm/nouveau/nvc0_grctx.c
@@ -1623,7 +1623,7 @@ nvc0_grctx_generate_rop(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	// ROPC_BROADCAST
+	/* ROPC_BROADCAST */
 	nv_wr32(dev, 0x408800, 0x02802a3c);
 	nv_wr32(dev, 0x408804, 0x00000040);
 	nv_wr32(dev, 0x408808, 0x0003e00d);
@@ -1647,7 +1647,7 @@ nvc0_grctx_generate_gpc(struct drm_device *dev)
 {
 	int i;
 
-	// GPC_BROADCAST
+	/* GPC_BROADCAST */
 	nv_wr32(dev, 0x418380, 0x00000016);
 	nv_wr32(dev, 0x418400, 0x38004e00);
 	nv_wr32(dev, 0x418404, 0x71e0ffff);
@@ -1728,7 +1728,7 @@ nvc0_grctx_generate_tp(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	// GPC_BROADCAST.TP_BROADCAST
+	/* GPC_BROADCAST.TP_BROADCAST */
 	nv_wr32(dev, 0x419848, 0x00000000);
 	nv_wr32(dev, 0x419864, 0x0000012a);
 	nv_wr32(dev, 0x419888, 0x00000000);
@@ -1741,7 +1741,7 @@ nvc0_grctx_generate_tp(struct drm_device *dev)
 	nv_wr32(dev, 0x419a1c, 0x00000000);
 	nv_wr32(dev, 0x419a20, 0x00000800);
 	if (dev_priv->chipset != 0xc0)
-		nv_wr32(dev, 0x00419ac4, 0x0007f440); // 0xc3
+		nv_wr32(dev, 0x00419ac4, 0x0007f440); /* 0xc3 */
 	nv_wr32(dev, 0x419b00, 0x0a418820);
 	nv_wr32(dev, 0x419b04, 0x062080e6);
 	nv_wr32(dev, 0x419b08, 0x020398a4);
@@ -1797,8 +1797,8 @@ int
 nvc0_grctx_generate(struct nouveau_channel *chan)
 {
 	struct drm_nouveau_private *dev_priv = chan->dev->dev_private;
-	struct nvc0_graph_priv *priv = dev_priv->engine.graph.priv;
-	struct nvc0_graph_chan *grch = chan->pgraph_ctx;
+	struct nvc0_graph_priv *priv = nv_engine(chan->dev, NVOBJ_ENGINE_GR);
+	struct nvc0_graph_chan *grch = chan->engctx[NVOBJ_ENGINE_GR];
 	struct drm_device *dev = chan->dev;
 	int i, gpc, tp, id;
 	u32 r000260, tmp;
@@ -1912,13 +1912,13 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		for (i = 1; i < 7; i++)
 			data2[1] |= ((1 << (i + 5)) % ntpcv) << ((i - 1) * 5);
 
-		// GPC_BROADCAST
+		/* GPC_BROADCAST */
 		nv_wr32(dev, 0x418bb8, (priv->tp_total << 8) |
 					priv->magic_not_rop_nr);
 		for (i = 0; i < 6; i++)
 			nv_wr32(dev, 0x418b08 + (i * 4), data[i]);
 
-		// GPC_BROADCAST.TP_BROADCAST
+		/* GPC_BROADCAST.TP_BROADCAST */
 		nv_wr32(dev, 0x419bd0, (priv->tp_total << 8) |
 				       priv->magic_not_rop_nr |
 				       data2[0]);
@@ -1926,7 +1926,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		for (i = 0; i < 6; i++)
 			nv_wr32(dev, 0x419b00 + (i * 4), data[i]);
 
-		// UNK78xx
+		/* UNK78xx */
 		nv_wr32(dev, 0x4078bc, (priv->tp_total << 8) |
 					priv->magic_not_rop_nr);
 		for (i = 0; i < 6; i++)
@@ -1944,7 +1944,7 @@ nvc0_grctx_generate(struct nouveau_channel *chan)
 		gpc = -1;
 		for (i = 0, gpc = -1; i < 32; i++) {
 			int ltp = i * (priv->tp_total - 1) / 32;
-			
+
 			do {
 				gpc = (gpc + 1) % priv->gpc_nr;
 			} while (!tpnr[gpc]);
diff --git a/drivers/gpu/drm/radeon/atom.c b/drivers/gpu/drm/radeon/atom.c
index 7bd745689097..ebdb0fdb8348 100644
--- a/drivers/gpu/drm/radeon/atom.c
+++ b/drivers/gpu/drm/radeon/atom.c
@@ -652,12 +652,12 @@ static void atom_op_compare(atom_exec_context *ctx, int *ptr, int arg)
 
 static void atom_op_delay(atom_exec_context *ctx, int *ptr, int arg)
 {
-	uint8_t count = U8((*ptr)++);
+	unsigned count = U8((*ptr)++);
 	SDEBUG("   count: %d\n", count);
 	if (arg == ATOM_UNIT_MICROSEC)
 		udelay(count);
 	else
-		schedule_timeout_uninterruptible(msecs_to_jiffies(count));
+		msleep(count);
 }
 
 static void atom_op_div(atom_exec_context *ctx, int *ptr, int arg)
diff --git a/drivers/gpu/drm/radeon/atombios.h b/drivers/gpu/drm/radeon/atombios.h
index 7fd88497b930..49611e2365d9 100644
--- a/drivers/gpu/drm/radeon/atombios.h
+++ b/drivers/gpu/drm/radeon/atombios.h
@@ -726,6 +726,7 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V2
 #define ATOM_ENCODER_CMD_DP_VIDEO_ON                  0x0d
 #define ATOM_ENCODER_CMD_QUERY_DP_LINK_TRAINING_STATUS    0x0e
 #define ATOM_ENCODER_CMD_SETUP                        0x0f
+#define ATOM_ENCODER_CMD_SETUP_PANEL_MODE             0x10
 
 // ucStatus
 #define ATOM_ENCODER_STATUS_LINK_TRAINING_COMPLETE    0x10
@@ -765,13 +766,19 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V3
   USHORT usPixelClock;      // in 10KHz; for bios convenient
   ATOM_DIG_ENCODER_CONFIG_V3 acConfig;
   UCHAR ucAction;                              
-  UCHAR ucEncoderMode;
+  union {
+    UCHAR ucEncoderMode;
                             // =0: DP   encoder      
                             // =1: LVDS encoder          
                             // =2: DVI  encoder  
                             // =3: HDMI encoder
                             // =4: SDVO encoder
                             // =5: DP audio
+    UCHAR ucPanelMode;      // only valid when ucAction == ATOM_ENCODER_CMD_SETUP_PANEL_MODE
+	                    // =0:     external DP
+	                    // =1:     internal DP2
+	                    // =0x11:  internal DP1 for NutMeg/Travis DP translator
+  };
   UCHAR ucLaneNum;          // how many lanes to enable
   UCHAR ucBitPerColor;      // only valid for DP mode when ucAction = ATOM_ENCODER_CMD_SETUP
   UCHAR ucReserved;
@@ -816,13 +823,19 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4
   UCHAR ucConfig;
   };
   UCHAR ucAction;                              
-  UCHAR ucEncoderMode;
+  union {
+    UCHAR ucEncoderMode;
                             // =0: DP   encoder      
                             // =1: LVDS encoder          
                             // =2: DVI  encoder  
                             // =3: HDMI encoder
                             // =4: SDVO encoder
                             // =5: DP audio
+    UCHAR ucPanelMode;      // only valid when ucAction == ATOM_ENCODER_CMD_SETUP_PANEL_MODE
+	                    // =0:     external DP
+	                    // =1:     internal DP2
+	                    // =0x11:  internal DP1 for NutMeg/Travis DP translator
+  };
   UCHAR ucLaneNum;          // how many lanes to enable
   UCHAR ucBitPerColor;      // only valid for DP mode when ucAction = ATOM_ENCODER_CMD_SETUP
   UCHAR ucHPD_ID;           // HPD ID (1-6). =0 means to skip HDP programming. New comparing to previous version
@@ -836,6 +849,11 @@ typedef struct _DIG_ENCODER_CONTROL_PARAMETERS_V4
 #define PANEL_12BIT_PER_COLOR                            0x04
 #define PANEL_16BIT_PER_COLOR                            0x05
 
+//define ucPanelMode
+#define DP_PANEL_MODE_EXTERNAL_DP_MODE                   0x00
+#define DP_PANEL_MODE_INTERNAL_DP2_MODE                  0x01
+#define DP_PANEL_MODE_INTERNAL_DP1_MODE                  0x11
+
 /****************************************************************************/	
 // Structures used by UNIPHYTransmitterControlTable
 //                    LVTMATransmitterControlTable
diff --git a/drivers/gpu/drm/radeon/atombios_crtc.c b/drivers/gpu/drm/radeon/atombios_crtc.c
index 529a3a704731..ec848787d7d9 100644
--- a/drivers/gpu/drm/radeon/atombios_crtc.c
+++ b/drivers/gpu/drm/radeon/atombios_crtc.c
@@ -420,7 +420,7 @@ static void atombios_crtc_program_ss(struct drm_crtc *crtc,
 
 	if (ASIC_IS_DCE5(rdev)) {
 		args.v3.usSpreadSpectrumAmountFrac = cpu_to_le16(0);
-		args.v3.ucSpreadSpectrumType = ss->type;
+		args.v3.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v3.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V3_P1PLL;
@@ -440,10 +440,12 @@ static void atombios_crtc_program_ss(struct drm_crtc *crtc,
 		case ATOM_PPLL_INVALID:
 			return;
 		}
-		args.v2.ucEnable = enable;
+		args.v3.ucEnable = enable;
+		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK))
+			args.v3.ucEnable = ATOM_DISABLE;
 	} else if (ASIC_IS_DCE4(rdev)) {
 		args.v2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage);
-		args.v2.ucSpreadSpectrumType = ss->type;
+		args.v2.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
 		switch (pll_id) {
 		case ATOM_PPLL1:
 			args.v2.ucSpreadSpectrumType |= ATOM_PPLL_SS_TYPE_V2_P1PLL;
@@ -464,32 +466,36 @@ static void atombios_crtc_program_ss(struct drm_crtc *crtc,
 			return;
 		}
 		args.v2.ucEnable = enable;
+		if ((ss->percentage == 0) || (ss->type & ATOM_EXTERNAL_SS_MASK))
+			args.v2.ucEnable = ATOM_DISABLE;
 	} else if (ASIC_IS_DCE3(rdev)) {
 		args.v1.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage);
-		args.v1.ucSpreadSpectrumType = ss->type;
+		args.v1.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
 		args.v1.ucSpreadSpectrumStep = ss->step;
 		args.v1.ucSpreadSpectrumDelay = ss->delay;
 		args.v1.ucSpreadSpectrumRange = ss->range;
 		args.v1.ucPpll = pll_id;
 		args.v1.ucEnable = enable;
 	} else if (ASIC_IS_AVIVO(rdev)) {
-		if (enable == ATOM_DISABLE) {
+		if ((enable == ATOM_DISABLE) || (ss->percentage == 0) ||
+		    (ss->type & ATOM_EXTERNAL_SS_MASK)) {
 			atombios_disable_ss(crtc);
 			return;
 		}
 		args.lvds_ss_2.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage);
-		args.lvds_ss_2.ucSpreadSpectrumType = ss->type;
+		args.lvds_ss_2.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
 		args.lvds_ss_2.ucSpreadSpectrumStep = ss->step;
 		args.lvds_ss_2.ucSpreadSpectrumDelay = ss->delay;
 		args.lvds_ss_2.ucSpreadSpectrumRange = ss->range;
 		args.lvds_ss_2.ucEnable = enable;
 	} else {
-		if (enable == ATOM_DISABLE) {
+		if ((enable == ATOM_DISABLE) || (ss->percentage == 0) ||
+		    (ss->type & ATOM_EXTERNAL_SS_MASK)) {
 			atombios_disable_ss(crtc);
 			return;
 		}
 		args.lvds_ss.usSpreadSpectrumPercentage = cpu_to_le16(ss->percentage);
-		args.lvds_ss.ucSpreadSpectrumType = ss->type;
+		args.lvds_ss.ucSpreadSpectrumType = ss->type & ATOM_SS_CENTRE_SPREAD_MODE_MASK;
 		args.lvds_ss.ucSpreadSpectrumStepSize_Delay = (ss->step & 3) << 2;
 		args.lvds_ss.ucSpreadSpectrumStepSize_Delay |= (ss->delay & 7) << 4;
 		args.lvds_ss.ucEnable = enable;
@@ -512,6 +518,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 	struct radeon_device *rdev = dev->dev_private;
 	struct drm_encoder *encoder = NULL;
 	struct radeon_encoder *radeon_encoder = NULL;
+	struct drm_connector *connector = NULL;
 	u32 adjusted_clock = mode->clock;
 	int encoder_mode = 0;
 	u32 dp_clock = mode->clock;
@@ -546,9 +553,12 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
 			radeon_encoder = to_radeon_encoder(encoder);
+			connector = radeon_get_connector_for_encoder(encoder);
+			if (connector)
+				bpc = connector->display_info.bpc;
 			encoder_mode = atombios_get_encoder_mode(encoder);
-			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) {
-				struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+			if ((radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT | ATOM_DEVICE_DFP_SUPPORT)) ||
+			    radeon_encoder_is_dp_bridge(encoder)) {
 				if (connector) {
 					struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 					struct radeon_connector_atom_dig *dig_connector =
@@ -612,7 +622,7 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 				args.v1.usPixelClock = cpu_to_le16(mode->clock / 10);
 				args.v1.ucTransmitterID = radeon_encoder->encoder_id;
 				args.v1.ucEncodeMode = encoder_mode;
-				if (ss_enabled)
+				if (ss_enabled && ss->percentage)
 					args.v1.ucConfig |=
 						ADJUST_DISPLAY_CONFIG_SS_ENABLE;
 
@@ -625,10 +635,11 @@ static u32 atombios_adjust_pll(struct drm_crtc *crtc,
 				args.v3.sInput.ucTransmitterID = radeon_encoder->encoder_id;
 				args.v3.sInput.ucEncodeMode = encoder_mode;
 				args.v3.sInput.ucDispPllConfig = 0;
-				if (ss_enabled)
+				if (ss_enabled && ss->percentage)
 					args.v3.sInput.ucDispPllConfig |=
 						DISPPLL_CONFIG_SS_ENABLE;
-				if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT)) {
+				if (radeon_encoder->devices & (ATOM_DEVICE_DFP_SUPPORT) ||
+				    radeon_encoder_is_dp_bridge(encoder)) {
 					struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 					if (encoder_mode == ATOM_ENCODER_MODE_DP) {
 						args.v3.sInput.ucDispPllConfig |=
@@ -754,7 +765,10 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc,
 				      u32 ref_div,
 				      u32 fb_div,
 				      u32 frac_fb_div,
-				      u32 post_div)
+				      u32 post_div,
+				      int bpc,
+				      bool ss_enabled,
+				      struct radeon_atom_ss *ss)
 {
 	struct drm_device *dev = crtc->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -801,6 +815,8 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc,
 			args.v3.ucPostDiv = post_div;
 			args.v3.ucPpll = pll_id;
 			args.v3.ucMiscInfo = (pll_id << 2);
+			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
+				args.v3.ucMiscInfo |= PIXEL_CLOCK_MISC_REF_DIV_SRC;
 			args.v3.ucTransmitterId = encoder_id;
 			args.v3.ucEncoderMode = encoder_mode;
 			break;
@@ -812,6 +828,17 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc,
 			args.v5.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
 			args.v5.ucPostDiv = post_div;
 			args.v5.ucMiscInfo = 0; /* HDMI depth, etc. */
+			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
+				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_REF_DIV_SRC;
+			switch (bpc) {
+			case 8:
+			default:
+				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_24BPP;
+				break;
+			case 10:
+				args.v5.ucMiscInfo |= PIXEL_CLOCK_V5_MISC_HDMI_30BPP;
+				break;
+			}
 			args.v5.ucTransmitterID = encoder_id;
 			args.v5.ucEncoderMode = encoder_mode;
 			args.v5.ucPpll = pll_id;
@@ -824,6 +851,23 @@ static void atombios_crtc_program_pll(struct drm_crtc *crtc,
 			args.v6.ulFbDivDecFrac = cpu_to_le32(frac_fb_div * 100000);
 			args.v6.ucPostDiv = post_div;
 			args.v6.ucMiscInfo = 0; /* HDMI depth, etc. */
+			if (ss_enabled && (ss->type & ATOM_EXTERNAL_SS_MASK))
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_REF_DIV_SRC;
+			switch (bpc) {
+			case 8:
+			default:
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_24BPP;
+				break;
+			case 10:
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_30BPP;
+				break;
+			case 12:
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_36BPP;
+				break;
+			case 16:
+				args.v6.ucMiscInfo |= PIXEL_CLOCK_V6_MISC_HDMI_48BPP;
+				break;
+			}
 			args.v6.ucTransmitterID = encoder_id;
 			args.v6.ucEncoderMode = encoder_mode;
 			args.v6.ucPpll = pll_id;
@@ -855,6 +899,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 	int encoder_mode = 0;
 	struct radeon_atom_ss ss;
 	bool ss_enabled = false;
+	int bpc = 8;
 
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		if (encoder->crtc == crtc) {
@@ -891,41 +936,30 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 		struct radeon_connector_atom_dig *dig_connector =
 			radeon_connector->con_priv;
 		int dp_clock;
+		bpc = connector->display_info.bpc;
 
 		switch (encoder_mode) {
 		case ATOM_ENCODER_MODE_DP:
 			/* DP/eDP */
 			dp_clock = dig_connector->dp_clock / 10;
-			if (radeon_encoder->active_device & (ATOM_DEVICE_LCD_SUPPORT)) {
-				if (ASIC_IS_DCE4(rdev))
-					ss_enabled =
-						radeon_atombios_get_asic_ss_info(rdev, &ss,
-										 dig->lcd_ss_id,
-										 dp_clock);
-				else
+			if (ASIC_IS_DCE4(rdev))
+				ss_enabled =
+					radeon_atombios_get_asic_ss_info(rdev, &ss,
+									 ASIC_INTERNAL_SS_ON_DP,
+									 dp_clock);
+			else {
+				if (dp_clock == 16200) {
 					ss_enabled =
 						radeon_atombios_get_ppll_ss_info(rdev, &ss,
-										 dig->lcd_ss_id);
-			} else {
-				if (ASIC_IS_DCE4(rdev))
-					ss_enabled =
-						radeon_atombios_get_asic_ss_info(rdev, &ss,
-										 ASIC_INTERNAL_SS_ON_DP,
-										 dp_clock);
-				else {
-					if (dp_clock == 16200) {
-						ss_enabled =
-							radeon_atombios_get_ppll_ss_info(rdev, &ss,
-											 ATOM_DP_SS_ID2);
-						if (!ss_enabled)
-							ss_enabled =
-								radeon_atombios_get_ppll_ss_info(rdev, &ss,
-												 ATOM_DP_SS_ID1);
-					} else
+										 ATOM_DP_SS_ID2);
+					if (!ss_enabled)
 						ss_enabled =
 							radeon_atombios_get_ppll_ss_info(rdev, &ss,
 											 ATOM_DP_SS_ID1);
-				}
+				} else
+					ss_enabled =
+						radeon_atombios_get_ppll_ss_info(rdev, &ss,
+										 ATOM_DP_SS_ID1);
 			}
 			break;
 		case ATOM_ENCODER_MODE_LVDS:
@@ -974,7 +1008,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 
 	atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
 				  encoder_mode, radeon_encoder->encoder_id, mode->clock,
-				  ref_div, fb_div, frac_fb_div, post_div);
+				  ref_div, fb_div, frac_fb_div, post_div, bpc, ss_enabled, &ss);
 
 	if (ss_enabled) {
 		/* calculate ss amount and step size */
@@ -982,7 +1016,7 @@ static void atombios_crtc_set_pll(struct drm_crtc *crtc, struct drm_display_mode
 			u32 step_size;
 			u32 amount = (((fb_div * 10) + frac_fb_div) * ss.percentage) / 10000;
 			ss.amount = (amount / 10) & ATOM_PPLL_SS_AMOUNT_V2_FBDIV_MASK;
-			ss.amount |= ((amount - (ss.amount * 10)) << ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT) &
+			ss.amount |= ((amount - (amount / 10)) << ATOM_PPLL_SS_AMOUNT_V2_NFRAC_SHIFT) &
 				ATOM_PPLL_SS_AMOUNT_V2_NFRAC_MASK;
 			if (ss.type & ATOM_PPLL_SS_TYPE_V2_CENTRE_SPREAD)
 				step_size = (4 * amount * ref_div * (ss.rate * 2048)) /
@@ -1395,11 +1429,19 @@ static int radeon_atom_pick_pll(struct drm_crtc *crtc)
 	uint32_t pll_in_use = 0;
 
 	if (ASIC_IS_DCE4(rdev)) {
-		/* if crtc is driving DP and we have an ext clock, use that */
 		list_for_each_entry(test_encoder, &dev->mode_config.encoder_list, head) {
 			if (test_encoder->crtc && (test_encoder->crtc == crtc)) {
+				/* in DP mode, the DP ref clock can come from PPLL, DCPLL, or ext clock,
+				 * depending on the asic:
+				 * DCE4: PPLL or ext clock
+				 * DCE5: DCPLL or ext clock
+				 *
+				 * Setting ATOM_PPLL_INVALID will cause SetPixelClock to skip
+				 * PPLL/DCPLL programming and only program the DP DTO for the
+				 * crtc virtual pixel clock.
+				 */
 				if (atombios_get_encoder_mode(test_encoder) == ATOM_ENCODER_MODE_DP) {
-					if (rdev->clock.dp_extclk)
+					if (ASIC_IS_DCE5(rdev) || rdev->clock.dp_extclk)
 						return ATOM_PPLL_INVALID;
 				}
 			}
@@ -1515,6 +1557,8 @@ static void atombios_crtc_commit(struct drm_crtc *crtc)
 static void atombios_crtc_disable(struct drm_crtc *crtc)
 {
 	struct radeon_crtc *radeon_crtc = to_radeon_crtc(crtc);
+	struct radeon_atom_ss ss;
+
 	atombios_crtc_dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	switch (radeon_crtc->pll_id) {
@@ -1522,7 +1566,7 @@ static void atombios_crtc_disable(struct drm_crtc *crtc)
 	case ATOM_PPLL2:
 		/* disable the ppll */
 		atombios_crtc_program_pll(crtc, radeon_crtc->crtc_id, radeon_crtc->pll_id,
-					  0, 0, ATOM_DISABLE, 0, 0, 0, 0);
+					  0, 0, ATOM_DISABLE, 0, 0, 0, 0, 0, false, &ss);
 		break;
 	default:
 		break;
diff --git a/drivers/gpu/drm/radeon/atombios_dp.c b/drivers/gpu/drm/radeon/atombios_dp.c
index 695de9a38506..8c0f9e36ff8e 100644
--- a/drivers/gpu/drm/radeon/atombios_dp.c
+++ b/drivers/gpu/drm/radeon/atombios_dp.c
@@ -43,158 +43,242 @@ static char *pre_emph_names[] = {
         "0dB", "3.5dB", "6dB", "9.5dB"
 };
 
-static const int dp_clocks[] = {
-	54000,  /* 1 lane, 1.62 Ghz */
-	90000,  /* 1 lane, 2.70 Ghz */
-	108000, /* 2 lane, 1.62 Ghz */
-	180000, /* 2 lane, 2.70 Ghz */
-	216000, /* 4 lane, 1.62 Ghz */
-	360000, /* 4 lane, 2.70 Ghz */
+/***** radeon AUX functions *****/
+union aux_channel_transaction {
+	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
+	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
 };
 
-static const int num_dp_clocks = sizeof(dp_clocks) / sizeof(int);
+static int radeon_process_aux_ch(struct radeon_i2c_chan *chan,
+				 u8 *send, int send_bytes,
+				 u8 *recv, int recv_size,
+				 u8 delay, u8 *ack)
+{
+	struct drm_device *dev = chan->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	union aux_channel_transaction args;
+	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
+	unsigned char *base;
+	int recv_bytes;
+
+	memset(&args, 0, sizeof(args));
 
-/* common helper functions */
-static int dp_lanes_for_mode_clock(u8 dpcd[DP_DPCD_SIZE], int mode_clock)
+	base = (unsigned char *)rdev->mode_info.atom_context->scratch;
+
+	memcpy(base, send, send_bytes);
+
+	args.v1.lpAuxRequest = 0;
+	args.v1.lpDataOut = 16;
+	args.v1.ucDataOutLen = 0;
+	args.v1.ucChannelID = chan->rec.i2c_id;
+	args.v1.ucDelay = delay / 10;
+	if (ASIC_IS_DCE4(rdev))
+		args.v2.ucHPD_ID = chan->rec.hpd;
+
+	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+	*ack = args.v1.ucReplyStatus;
+
+	/* timeout */
+	if (args.v1.ucReplyStatus == 1) {
+		DRM_DEBUG_KMS("dp_aux_ch timeout\n");
+		return -ETIMEDOUT;
+	}
+
+	/* flags not zero */
+	if (args.v1.ucReplyStatus == 2) {
+		DRM_DEBUG_KMS("dp_aux_ch flags not zero\n");
+		return -EBUSY;
+	}
+
+	/* error */
+	if (args.v1.ucReplyStatus == 3) {
+		DRM_DEBUG_KMS("dp_aux_ch error\n");
+		return -EIO;
+	}
+
+	recv_bytes = args.v1.ucDataOutLen;
+	if (recv_bytes > recv_size)
+		recv_bytes = recv_size;
+
+	if (recv && recv_size)
+		memcpy(recv, base + 16, recv_bytes);
+
+	return recv_bytes;
+}
+
+static int radeon_dp_aux_native_write(struct radeon_connector *radeon_connector,
+				      u16 address, u8 *send, u8 send_bytes, u8 delay)
 {
-	int i;
-	u8 max_link_bw;
-	u8 max_lane_count;
+	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+	int ret;
+	u8 msg[20];
+	int msg_bytes = send_bytes + 4;
+	u8 ack;
 
-	if (!dpcd)
-		return 0;
+	if (send_bytes > 16)
+		return -1;
 
-	max_link_bw = dpcd[DP_MAX_LINK_RATE];
-	max_lane_count = dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
+	msg[0] = address;
+	msg[1] = address >> 8;
+	msg[2] = AUX_NATIVE_WRITE << 4;
+	msg[3] = (msg_bytes << 4) | (send_bytes - 1);
+	memcpy(&msg[4], send, send_bytes);
 
-	switch (max_link_bw) {
-	case DP_LINK_BW_1_62:
-	default:
-		for (i = 0; i < num_dp_clocks; i++) {
-			if (i % 2)
-				continue;
-			switch (max_lane_count) {
-			case 1:
-				if (i > 1)
-					return 0;
-				break;
-			case 2:
-				if (i > 3)
-					return 0;
-				break;
-			case 4:
-			default:
-				break;
-			}
-			if (dp_clocks[i] > mode_clock) {
-				if (i < 2)
-					return 1;
-				else if (i < 4)
-					return 2;
-				else
-					return 4;
-			}
-		}
-		break;
-	case DP_LINK_BW_2_7:
-		for (i = 0; i < num_dp_clocks; i++) {
-			switch (max_lane_count) {
-			case 1:
-				if (i > 1)
-					return 0;
-				break;
-			case 2:
-				if (i > 3)
-					return 0;
-				break;
-			case 4:
-			default:
-				break;
-			}
-			if (dp_clocks[i] > mode_clock) {
-				if (i < 2)
-					return 1;
-				else if (i < 4)
-					return 2;
-				else
-					return 4;
-			}
-		}
-		break;
+	while (1) {
+		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
+					    msg, msg_bytes, NULL, 0, delay, &ack);
+		if (ret < 0)
+			return ret;
+		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
+			break;
+		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
+			udelay(400);
+		else
+			return -EIO;
 	}
 
-	return 0;
+	return send_bytes;
 }
 
-static int dp_link_clock_for_mode_clock(u8 dpcd[DP_DPCD_SIZE], int mode_clock)
+static int radeon_dp_aux_native_read(struct radeon_connector *radeon_connector,
+				     u16 address, u8 *recv, int recv_bytes, u8 delay)
 {
-	int i;
-	u8 max_link_bw;
-	u8 max_lane_count;
+	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+	u8 msg[4];
+	int msg_bytes = 4;
+	u8 ack;
+	int ret;
 
-	if (!dpcd)
-		return 0;
+	msg[0] = address;
+	msg[1] = address >> 8;
+	msg[2] = AUX_NATIVE_READ << 4;
+	msg[3] = (msg_bytes << 4) | (recv_bytes - 1);
+
+	while (1) {
+		ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus,
+					    msg, msg_bytes, recv, recv_bytes, delay, &ack);
+		if (ret == 0)
+			return -EPROTO;
+		if (ret < 0)
+			return ret;
+		if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_ACK)
+			return ret;
+		else if ((ack & AUX_NATIVE_REPLY_MASK) == AUX_NATIVE_REPLY_DEFER)
+			udelay(400);
+		else
+			return -EIO;
+	}
+}
 
-	max_link_bw = dpcd[DP_MAX_LINK_RATE];
-	max_lane_count = dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
+static void radeon_write_dpcd_reg(struct radeon_connector *radeon_connector,
+				 u16 reg, u8 val)
+{
+	radeon_dp_aux_native_write(radeon_connector, reg, &val, 1, 0);
+}
 
-	switch (max_link_bw) {
-	case DP_LINK_BW_1_62:
+static u8 radeon_read_dpcd_reg(struct radeon_connector *radeon_connector,
+			       u16 reg)
+{
+	u8 val = 0;
+
+	radeon_dp_aux_native_read(radeon_connector, reg, &val, 1, 0);
+
+	return val;
+}
+
+int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
+			 u8 write_byte, u8 *read_byte)
+{
+	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
+	struct radeon_i2c_chan *auxch = (struct radeon_i2c_chan *)adapter;
+	u16 address = algo_data->address;
+	u8 msg[5];
+	u8 reply[2];
+	unsigned retry;
+	int msg_bytes;
+	int reply_bytes = 1;
+	int ret;
+	u8 ack;
+
+	/* Set up the command byte */
+	if (mode & MODE_I2C_READ)
+		msg[2] = AUX_I2C_READ << 4;
+	else
+		msg[2] = AUX_I2C_WRITE << 4;
+
+	if (!(mode & MODE_I2C_STOP))
+		msg[2] |= AUX_I2C_MOT << 4;
+
+	msg[0] = address;
+	msg[1] = address >> 8;
+
+	switch (mode) {
+	case MODE_I2C_WRITE:
+		msg_bytes = 5;
+		msg[3] = msg_bytes << 4;
+		msg[4] = write_byte;
+		break;
+	case MODE_I2C_READ:
+		msg_bytes = 4;
+		msg[3] = msg_bytes << 4;
+		break;
 	default:
-		for (i = 0; i < num_dp_clocks; i++) {
-			if (i % 2)
-				continue;
-			switch (max_lane_count) {
-			case 1:
-				if (i > 1)
-					return 0;
-				break;
-			case 2:
-				if (i > 3)
-					return 0;
-				break;
-			case 4:
-			default:
-				break;
-			}
-			if (dp_clocks[i] > mode_clock)
-				return 162000;
-		}
+		msg_bytes = 4;
+		msg[3] = 3 << 4;
 		break;
-	case DP_LINK_BW_2_7:
-		for (i = 0; i < num_dp_clocks; i++) {
-			switch (max_lane_count) {
-			case 1:
-				if (i > 1)
-					return 0;
-				break;
-			case 2:
-				if (i > 3)
-					return 0;
-				break;
-			case 4:
-			default:
-				break;
-			}
-			if (dp_clocks[i] > mode_clock)
-				return (i % 2) ? 270000 : 162000;
-		}
 	}
 
-	return 0;
-}
+	for (retry = 0; retry < 4; retry++) {
+		ret = radeon_process_aux_ch(auxch,
+					    msg, msg_bytes, reply, reply_bytes, 0, &ack);
+		if (ret < 0) {
+			DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
+			return ret;
+		}
 
-int dp_mode_valid(u8 dpcd[DP_DPCD_SIZE], int mode_clock)
-{
-	int lanes = dp_lanes_for_mode_clock(dpcd, mode_clock);
-	int dp_clock = dp_link_clock_for_mode_clock(dpcd, mode_clock);
+		switch (ack & AUX_NATIVE_REPLY_MASK) {
+		case AUX_NATIVE_REPLY_ACK:
+			/* I2C-over-AUX Reply field is only valid
+			 * when paired with AUX ACK.
+			 */
+			break;
+		case AUX_NATIVE_REPLY_NACK:
+			DRM_DEBUG_KMS("aux_ch native nack\n");
+			return -EREMOTEIO;
+		case AUX_NATIVE_REPLY_DEFER:
+			DRM_DEBUG_KMS("aux_ch native defer\n");
+			udelay(400);
+			continue;
+		default:
+			DRM_ERROR("aux_ch invalid native reply 0x%02x\n", ack);
+			return -EREMOTEIO;
+		}
 
-	if ((lanes == 0) || (dp_clock == 0))
-		return MODE_CLOCK_HIGH;
+		switch (ack & AUX_I2C_REPLY_MASK) {
+		case AUX_I2C_REPLY_ACK:
+			if (mode == MODE_I2C_READ)
+				*read_byte = reply[0];
+			return ret;
+		case AUX_I2C_REPLY_NACK:
+			DRM_DEBUG_KMS("aux_i2c nack\n");
+			return -EREMOTEIO;
+		case AUX_I2C_REPLY_DEFER:
+			DRM_DEBUG_KMS("aux_i2c defer\n");
+			udelay(400);
+			break;
+		default:
+			DRM_ERROR("aux_i2c invalid reply 0x%02x\n", ack);
+			return -EREMOTEIO;
+		}
+	}
 
-	return MODE_OK;
+	DRM_ERROR("aux i2c too many retries, giving up\n");
+	return -EREMOTEIO;
 }
 
+/***** general DP utility functions *****/
+
 static u8 dp_link_status(u8 link_status[DP_LINK_STATUS_SIZE], int r)
 {
 	return link_status[r - DP_LANE0_1_STATUS];
@@ -242,7 +326,7 @@ static bool dp_channel_eq_ok(u8 link_status[DP_LINK_STATUS_SIZE],
 	return true;
 }
 
-static u8 dp_get_adjust_request_voltage(uint8_t link_status[DP_LINK_STATUS_SIZE],
+static u8 dp_get_adjust_request_voltage(u8 link_status[DP_LINK_STATUS_SIZE],
 					int lane)
 
 {
@@ -255,7 +339,7 @@ static u8 dp_get_adjust_request_voltage(uint8_t link_status[DP_LINK_STATUS_SIZE]
 	return ((l >> s) & 0x3) << DP_TRAIN_VOLTAGE_SWING_SHIFT;
 }
 
-static u8 dp_get_adjust_request_pre_emphasis(uint8_t link_status[DP_LINK_STATUS_SIZE],
+static u8 dp_get_adjust_request_pre_emphasis(u8 link_status[DP_LINK_STATUS_SIZE],
 					     int lane)
 {
 	int i = DP_ADJUST_REQUEST_LANE0_1 + (lane >> 1);
@@ -267,22 +351,8 @@ static u8 dp_get_adjust_request_pre_emphasis(uint8_t link_status[DP_LINK_STATUS_
 	return ((l >> s) & 0x3) << DP_TRAIN_PRE_EMPHASIS_SHIFT;
 }
 
-/* XXX fix me -- chip specific */
 #define DP_VOLTAGE_MAX         DP_TRAIN_VOLTAGE_SWING_1200
-static u8 dp_pre_emphasis_max(u8 voltage_swing)
-{
-	switch (voltage_swing & DP_TRAIN_VOLTAGE_SWING_MASK) {
-	case DP_TRAIN_VOLTAGE_SWING_400:
-		return DP_TRAIN_PRE_EMPHASIS_6;
-	case DP_TRAIN_VOLTAGE_SWING_600:
-		return DP_TRAIN_PRE_EMPHASIS_6;
-	case DP_TRAIN_VOLTAGE_SWING_800:
-		return DP_TRAIN_PRE_EMPHASIS_3_5;
-	case DP_TRAIN_VOLTAGE_SWING_1200:
-	default:
-		return DP_TRAIN_PRE_EMPHASIS_0;
-	}
-}
+#define DP_PRE_EMPHASIS_MAX    DP_TRAIN_PRE_EMPHASIS_9_5
 
 static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
 				int lane_count,
@@ -308,10 +378,10 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
 	}
 
 	if (v >= DP_VOLTAGE_MAX)
-		v = DP_VOLTAGE_MAX | DP_TRAIN_MAX_SWING_REACHED;
+		v |= DP_TRAIN_MAX_SWING_REACHED;
 
-	if (p >= dp_pre_emphasis_max(v))
-		p = dp_pre_emphasis_max(v) | DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
+	if (p >= DP_PRE_EMPHASIS_MAX)
+		p |= DP_TRAIN_MAX_PRE_EMPHASIS_REACHED;
 
 	DRM_DEBUG_KMS("using signal parameters: voltage %s pre_emph %s\n",
 		  voltage_names[(v & DP_TRAIN_VOLTAGE_SWING_MASK) >> DP_TRAIN_VOLTAGE_SWING_SHIFT],
@@ -321,110 +391,109 @@ static void dp_get_adjust_train(u8 link_status[DP_LINK_STATUS_SIZE],
 		train_set[lane] = v | p;
 }
 
-union aux_channel_transaction {
-	PROCESS_AUX_CHANNEL_TRANSACTION_PS_ALLOCATION v1;
-	PROCESS_AUX_CHANNEL_TRANSACTION_PARAMETERS_V2 v2;
-};
-
-/* radeon aux chan functions */
-bool radeon_process_aux_ch(struct radeon_i2c_chan *chan, u8 *req_bytes,
-			   int num_bytes, u8 *read_byte,
-			   u8 read_buf_len, u8 delay)
+/* convert bits per color to bits per pixel */
+/* get bpc from the EDID */
+static int convert_bpc_to_bpp(int bpc)
 {
-	struct drm_device *dev = chan->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	union aux_channel_transaction args;
-	int index = GetIndexIntoMasterTable(COMMAND, ProcessAuxChannelTransaction);
-	unsigned char *base;
-	int retry_count = 0;
-
-	memset(&args, 0, sizeof(args));
-
-	base = (unsigned char *)rdev->mode_info.atom_context->scratch;
-
-retry:
-	memcpy(base, req_bytes, num_bytes);
-
-	args.v1.lpAuxRequest = 0;
-	args.v1.lpDataOut = 16;
-	args.v1.ucDataOutLen = 0;
-	args.v1.ucChannelID = chan->rec.i2c_id;
-	args.v1.ucDelay = delay / 10;
-	if (ASIC_IS_DCE4(rdev))
-		args.v2.ucHPD_ID = chan->rec.hpd;
+	if (bpc == 0)
+		return 24;
+	else
+		return bpc * 3;
+}
 
-	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+/* get the max pix clock supported by the link rate and lane num */
+static int dp_get_max_dp_pix_clock(int link_rate,
+				   int lane_num,
+				   int bpp)
+{
+	return (link_rate * lane_num * 8) / bpp;
+}
 
-	if (args.v1.ucReplyStatus && !args.v1.ucDataOutLen) {
-		if (args.v1.ucReplyStatus == 0x20 && retry_count++ < 10)
-			goto retry;
-		DRM_DEBUG_KMS("failed to get auxch %02x%02x %02x %02x 0x%02x %02x after %d retries\n",
-			  req_bytes[1], req_bytes[0], req_bytes[2], req_bytes[3],
-			  chan->rec.i2c_id, args.v1.ucReplyStatus, retry_count);
-		return false;
+static int dp_get_max_link_rate(u8 dpcd[DP_DPCD_SIZE])
+{
+	switch (dpcd[DP_MAX_LINK_RATE]) {
+	case DP_LINK_BW_1_62:
+	default:
+		return 162000;
+	case DP_LINK_BW_2_7:
+		return 270000;
+	case DP_LINK_BW_5_4:
+		return 540000;
 	}
+}
 
-	if (args.v1.ucDataOutLen && read_byte && read_buf_len) {
-		if (read_buf_len < args.v1.ucDataOutLen) {
-			DRM_ERROR("Buffer to small for return answer %d %d\n",
-				  read_buf_len, args.v1.ucDataOutLen);
-			return false;
-		}
-		{
-			int len = min(read_buf_len, args.v1.ucDataOutLen);
-			memcpy(read_byte, base + 16, len);
-		}
-	}
-	return true;
+static u8 dp_get_max_lane_number(u8 dpcd[DP_DPCD_SIZE])
+{
+	return dpcd[DP_MAX_LANE_COUNT] & DP_MAX_LANE_COUNT_MASK;
 }
 
-bool radeon_dp_aux_native_write(struct radeon_connector *radeon_connector, uint16_t address,
-				uint8_t send_bytes, uint8_t *send)
+static u8 dp_get_dp_link_rate_coded(int link_rate)
 {
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
-	u8 msg[20];
-	u8 msg_len, dp_msg_len;
-	bool ret;
+	switch (link_rate) {
+	case 162000:
+	default:
+		return DP_LINK_BW_1_62;
+	case 270000:
+		return DP_LINK_BW_2_7;
+	case 540000:
+		return DP_LINK_BW_5_4;
+	}
+}
 
-	dp_msg_len = 4;
-	msg[0] = address;
-	msg[1] = address >> 8;
-	msg[2] = AUX_NATIVE_WRITE << 4;
-	dp_msg_len += send_bytes;
-	msg[3] = (dp_msg_len << 4) | (send_bytes - 1);
+/***** radeon specific DP functions *****/
 
-	if (send_bytes > 16)
-		return false;
+/* First get the min lane# when low rate is used according to pixel clock
+ * (prefer low rate), second check max lane# supported by DP panel,
+ * if the max lane# < low rate lane# then use max lane# instead.
+ */
+static int radeon_dp_get_dp_lane_number(struct drm_connector *connector,
+					u8 dpcd[DP_DPCD_SIZE],
+					int pix_clock)
+{
+	int bpp = convert_bpc_to_bpp(connector->display_info.bpc);
+	int max_link_rate = dp_get_max_link_rate(dpcd);
+	int max_lane_num = dp_get_max_lane_number(dpcd);
+	int lane_num;
+	int max_dp_pix_clock;
+
+	for (lane_num = 1; lane_num < max_lane_num; lane_num <<= 1) {
+		max_dp_pix_clock = dp_get_max_dp_pix_clock(max_link_rate, lane_num, bpp);
+		if (pix_clock <= max_dp_pix_clock)
+			break;
+	}
 
-	memcpy(&msg[4], send, send_bytes);
-	msg_len = 4 + send_bytes;
-	ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, msg, msg_len, NULL, 0, 0);
-	return ret;
+	return lane_num;
 }
 
-bool radeon_dp_aux_native_read(struct radeon_connector *radeon_connector, uint16_t address,
-			       uint8_t delay, uint8_t expected_bytes,
-			       uint8_t *read_p)
+static int radeon_dp_get_dp_link_clock(struct drm_connector *connector,
+				       u8 dpcd[DP_DPCD_SIZE],
+				       int pix_clock)
 {
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
-	u8 msg[20];
-	u8 msg_len, dp_msg_len;
-	bool ret = false;
-	msg_len = 4;
-	dp_msg_len = 4;
-	msg[0] = address;
-	msg[1] = address >> 8;
-	msg[2] = AUX_NATIVE_READ << 4;
-	msg[3] = (dp_msg_len) << 4;
-	msg[3] |= expected_bytes - 1;
+	int bpp = convert_bpc_to_bpp(connector->display_info.bpc);
+	int lane_num, max_pix_clock;
+
+	if (radeon_connector_encoder_is_dp_bridge(connector))
+		return 270000;
+
+	lane_num = radeon_dp_get_dp_lane_number(connector, dpcd, pix_clock);
+	max_pix_clock = dp_get_max_dp_pix_clock(162000, lane_num, bpp);
+	if (pix_clock <= max_pix_clock)
+		return 162000;
+	max_pix_clock = dp_get_max_dp_pix_clock(270000, lane_num, bpp);
+	if (pix_clock <= max_pix_clock)
+		return 270000;
+	if (radeon_connector_is_dp12_capable(connector)) {
+		max_pix_clock = dp_get_max_dp_pix_clock(540000, lane_num, bpp);
+		if (pix_clock <= max_pix_clock)
+			return 540000;
+	}
 
-	ret = radeon_process_aux_ch(dig_connector->dp_i2c_bus, msg, msg_len, read_p, expected_bytes, delay);
-	return ret;
+	return dp_get_max_link_rate(dpcd);
 }
 
-/* radeon dp functions */
-static u8 radeon_dp_encoder_service(struct radeon_device *rdev, int action, int dp_clock,
-				    uint8_t ucconfig, uint8_t lane_num)
+static u8 radeon_dp_encoder_service(struct radeon_device *rdev,
+				    int action, int dp_clock,
+				    u8 ucconfig, u8 lane_num)
 {
 	DP_ENCODER_SERVICE_PARAMETERS args;
 	int index = GetIndexIntoMasterTable(COMMAND, DPEncoderService);
@@ -454,60 +523,86 @@ bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector)
 {
 	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
 	u8 msg[25];
-	int ret;
+	int ret, i;
 
-	ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, 0, 8, msg);
-	if (ret) {
+	ret = radeon_dp_aux_native_read(radeon_connector, DP_DPCD_REV, msg, 8, 0);
+	if (ret > 0) {
 		memcpy(dig_connector->dpcd, msg, 8);
-		{
-			int i;
-			DRM_DEBUG_KMS("DPCD: ");
-			for (i = 0; i < 8; i++)
-				DRM_DEBUG_KMS("%02x ", msg[i]);
-			DRM_DEBUG_KMS("\n");
-		}
+		DRM_DEBUG_KMS("DPCD: ");
+		for (i = 0; i < 8; i++)
+			DRM_DEBUG_KMS("%02x ", msg[i]);
+		DRM_DEBUG_KMS("\n");
 		return true;
 	}
 	dig_connector->dpcd[0] = 0;
 	return false;
 }
 
+static void radeon_dp_set_panel_mode(struct drm_encoder *encoder,
+				     struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	int panel_mode = DP_PANEL_MODE_EXTERNAL_DP_MODE;
+
+	if (!ASIC_IS_DCE4(rdev))
+		return;
+
+	if (radeon_connector_encoder_is_dp_bridge(connector))
+		panel_mode = DP_PANEL_MODE_INTERNAL_DP1_MODE;
+
+	atombios_dig_encoder_setup(encoder,
+				   ATOM_ENCODER_CMD_SETUP_PANEL_MODE,
+				   panel_mode);
+}
+
 void radeon_dp_set_link_config(struct drm_connector *connector,
 			       struct drm_display_mode *mode)
 {
-	struct radeon_connector *radeon_connector;
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	struct radeon_connector_atom_dig *dig_connector;
 
-	if ((connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) &&
-	    (connector->connector_type != DRM_MODE_CONNECTOR_eDP))
-		return;
-
-	radeon_connector = to_radeon_connector(connector);
 	if (!radeon_connector->con_priv)
 		return;
 	dig_connector = radeon_connector->con_priv;
 
-	dig_connector->dp_clock =
-		dp_link_clock_for_mode_clock(dig_connector->dpcd, mode->clock);
-	dig_connector->dp_lane_count =
-		dp_lanes_for_mode_clock(dig_connector->dpcd, mode->clock);
+	if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+	    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP)) {
+		dig_connector->dp_clock =
+			radeon_dp_get_dp_link_clock(connector, dig_connector->dpcd, mode->clock);
+		dig_connector->dp_lane_count =
+			radeon_dp_get_dp_lane_number(connector, dig_connector->dpcd, mode->clock);
+	}
 }
 
-int radeon_dp_mode_valid_helper(struct radeon_connector *radeon_connector,
+int radeon_dp_mode_valid_helper(struct drm_connector *connector,
 				struct drm_display_mode *mode)
 {
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+	struct radeon_connector_atom_dig *dig_connector;
+	int dp_clock;
+
+	if (!radeon_connector->con_priv)
+		return MODE_CLOCK_HIGH;
+	dig_connector = radeon_connector->con_priv;
+
+	dp_clock =
+		radeon_dp_get_dp_link_clock(connector, dig_connector->dpcd, mode->clock);
+
+	if ((dp_clock == 540000) &&
+	    (!radeon_connector_is_dp12_capable(connector)))
+		return MODE_CLOCK_HIGH;
 
-	return dp_mode_valid(dig_connector->dpcd, mode->clock);
+	return MODE_OK;
 }
 
-static bool atom_dp_get_link_status(struct radeon_connector *radeon_connector,
-				    u8 link_status[DP_LINK_STATUS_SIZE])
+static bool radeon_dp_get_link_status(struct radeon_connector *radeon_connector,
+				      u8 link_status[DP_LINK_STATUS_SIZE])
 {
 	int ret;
-	ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS, 100,
-					DP_LINK_STATUS_SIZE, link_status);
-	if (!ret) {
+	ret = radeon_dp_aux_native_read(radeon_connector, DP_LANE0_1_STATUS,
+					link_status, DP_LINK_STATUS_SIZE, 100);
+	if (ret <= 0) {
 		DRM_ERROR("displayport link status failed\n");
 		return false;
 	}
@@ -518,292 +613,309 @@ static bool atom_dp_get_link_status(struct radeon_connector *radeon_connector,
 	return true;
 }
 
-bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector)
-{
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+struct radeon_dp_link_train_info {
+	struct radeon_device *rdev;
+	struct drm_encoder *encoder;
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+	int enc_id;
+	int dp_clock;
+	int dp_lane_count;
+	int rd_interval;
+	bool tp3_supported;
+	u8 dpcd[8];
+	u8 train_set[4];
 	u8 link_status[DP_LINK_STATUS_SIZE];
+	u8 tries;
+};
 
-	if (!atom_dp_get_link_status(radeon_connector, link_status))
-		return false;
-	if (dp_channel_eq_ok(link_status, dig_connector->dp_lane_count))
-		return false;
-	return true;
+static void radeon_dp_update_vs_emph(struct radeon_dp_link_train_info *dp_info)
+{
+	/* set the initial vs/emph on the source */
+	atombios_dig_transmitter_setup(dp_info->encoder,
+				       ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH,
+				       0, dp_info->train_set[0]); /* sets all lanes at once */
+
+	/* set the vs/emph on the sink */
+	radeon_dp_aux_native_write(dp_info->radeon_connector, DP_TRAINING_LANE0_SET,
+				   dp_info->train_set, dp_info->dp_lane_count, 0);
 }
 
-static void dp_set_power(struct radeon_connector *radeon_connector, u8 power_state)
+static void radeon_dp_set_tp(struct radeon_dp_link_train_info *dp_info, int tp)
 {
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
+	int rtp = 0;
 
-	if (dig_connector->dpcd[0] >= 0x11) {
-		radeon_dp_aux_native_write(radeon_connector, DP_SET_POWER, 1,
-					   &power_state);
+	/* set training pattern on the source */
+	if (ASIC_IS_DCE4(dp_info->rdev)) {
+		switch (tp) {
+		case DP_TRAINING_PATTERN_1:
+			rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1;
+			break;
+		case DP_TRAINING_PATTERN_2:
+			rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2;
+			break;
+		case DP_TRAINING_PATTERN_3:
+			rtp = ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN3;
+			break;
+		}
+		atombios_dig_encoder_setup(dp_info->encoder, rtp, 0);
+	} else {
+		switch (tp) {
+		case DP_TRAINING_PATTERN_1:
+			rtp = 0;
+			break;
+		case DP_TRAINING_PATTERN_2:
+			rtp = 1;
+			break;
+		}
+		radeon_dp_encoder_service(dp_info->rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
+					  dp_info->dp_clock, dp_info->enc_id, rtp);
 	}
-}
 
-static void dp_set_downspread(struct radeon_connector *radeon_connector, u8 downspread)
-{
-	radeon_dp_aux_native_write(radeon_connector, DP_DOWNSPREAD_CTRL, 1,
-				   &downspread);
+	/* enable training pattern on the sink */
+	radeon_write_dpcd_reg(dp_info->radeon_connector, DP_TRAINING_PATTERN_SET, tp);
 }
 
-static void dp_set_link_bw_lanes(struct radeon_connector *radeon_connector,
-				 u8 link_configuration[DP_LINK_CONFIGURATION_SIZE])
+static int radeon_dp_link_train_init(struct radeon_dp_link_train_info *dp_info)
 {
-	radeon_dp_aux_native_write(radeon_connector, DP_LINK_BW_SET, 2,
-				   link_configuration);
-}
+	u8 tmp;
 
-static void dp_update_dpvs_emph(struct radeon_connector *radeon_connector,
-				struct drm_encoder *encoder,
-				u8 train_set[4])
-{
-	struct radeon_connector_atom_dig *dig_connector = radeon_connector->con_priv;
-	int i;
+	/* power up the sink */
+	if (dp_info->dpcd[0] >= 0x11)
+		radeon_write_dpcd_reg(dp_info->radeon_connector,
+				      DP_SET_POWER, DP_SET_POWER_D0);
+
+	/* possibly enable downspread on the sink */
+	if (dp_info->dpcd[3] & 0x1)
+		radeon_write_dpcd_reg(dp_info->radeon_connector,
+				      DP_DOWNSPREAD_CTRL, DP_SPREAD_AMP_0_5);
+	else
+		radeon_write_dpcd_reg(dp_info->radeon_connector,
+				      DP_DOWNSPREAD_CTRL, 0);
 
-	for (i = 0; i < dig_connector->dp_lane_count; i++)
-		atombios_dig_transmitter_setup(encoder,
-					       ATOM_TRANSMITTER_ACTION_SETUP_VSEMPH,
-					       i, train_set[i]);
+	radeon_dp_set_panel_mode(dp_info->encoder, dp_info->connector);
 
-	radeon_dp_aux_native_write(radeon_connector, DP_TRAINING_LANE0_SET,
-				   dig_connector->dp_lane_count, train_set);
-}
+	/* set the lane count on the sink */
+	tmp = dp_info->dp_lane_count;
+	if (dp_info->dpcd[0] >= 0x11)
+		tmp |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+	radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LANE_COUNT_SET, tmp);
 
-static void dp_set_training(struct radeon_connector *radeon_connector,
-			    u8 training)
-{
-	radeon_dp_aux_native_write(radeon_connector, DP_TRAINING_PATTERN_SET,
-				   1, &training);
-}
+	/* set the link rate on the sink */
+	tmp = dp_get_dp_link_rate_coded(dp_info->dp_clock);
+	radeon_write_dpcd_reg(dp_info->radeon_connector, DP_LINK_BW_SET, tmp);
 
-void dp_link_train(struct drm_encoder *encoder,
-		   struct drm_connector *connector)
-{
-	struct drm_device *dev = encoder->dev;
-	struct radeon_device *rdev = dev->dev_private;
-	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
-	struct radeon_encoder_atom_dig *dig;
-	struct radeon_connector *radeon_connector;
-	struct radeon_connector_atom_dig *dig_connector;
-	int enc_id = 0;
-	bool clock_recovery, channel_eq;
-	u8 link_status[DP_LINK_STATUS_SIZE];
-	u8 link_configuration[DP_LINK_CONFIGURATION_SIZE];
-	u8 tries, voltage;
-	u8 train_set[4];
-	int i;
+	/* start training on the source */
+	if (ASIC_IS_DCE4(dp_info->rdev))
+		atombios_dig_encoder_setup(dp_info->encoder,
+					   ATOM_ENCODER_CMD_DP_LINK_TRAINING_START, 0);
+	else
+		radeon_dp_encoder_service(dp_info->rdev, ATOM_DP_ACTION_TRAINING_START,
+					  dp_info->dp_clock, dp_info->enc_id, 0);
 
-	if ((connector->connector_type != DRM_MODE_CONNECTOR_DisplayPort) &&
-	    (connector->connector_type != DRM_MODE_CONNECTOR_eDP))
-		return;
+	/* disable the training pattern on the sink */
+	radeon_write_dpcd_reg(dp_info->radeon_connector,
+			      DP_TRAINING_PATTERN_SET,
+			      DP_TRAINING_PATTERN_DISABLE);
 
-	if (!radeon_encoder->enc_priv)
-		return;
-	dig = radeon_encoder->enc_priv;
+	return 0;
+}
 
-	radeon_connector = to_radeon_connector(connector);
-	if (!radeon_connector->con_priv)
-		return;
-	dig_connector = radeon_connector->con_priv;
+static int radeon_dp_link_train_finish(struct radeon_dp_link_train_info *dp_info)
+{
+	udelay(400);
 
-	if (dig->dig_encoder)
-		enc_id |= ATOM_DP_CONFIG_DIG2_ENCODER;
-	else
-		enc_id |= ATOM_DP_CONFIG_DIG1_ENCODER;
-	if (dig->linkb)
-		enc_id |= ATOM_DP_CONFIG_LINK_B;
-	else
-		enc_id |= ATOM_DP_CONFIG_LINK_A;
+	/* disable the training pattern on the sink */
+	radeon_write_dpcd_reg(dp_info->radeon_connector,
+			      DP_TRAINING_PATTERN_SET,
+			      DP_TRAINING_PATTERN_DISABLE);
 
-	memset(link_configuration, 0, DP_LINK_CONFIGURATION_SIZE);
-	if (dig_connector->dp_clock == 270000)
-		link_configuration[0] = DP_LINK_BW_2_7;
+	/* disable the training pattern on the source */
+	if (ASIC_IS_DCE4(dp_info->rdev))
+		atombios_dig_encoder_setup(dp_info->encoder,
+					   ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE, 0);
 	else
-		link_configuration[0] = DP_LINK_BW_1_62;
-	link_configuration[1] = dig_connector->dp_lane_count;
-	if (dig_connector->dpcd[0] >= 0x11)
-		link_configuration[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+		radeon_dp_encoder_service(dp_info->rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
+					  dp_info->dp_clock, dp_info->enc_id, 0);
 
-	/* power up the sink */
-	dp_set_power(radeon_connector, DP_SET_POWER_D0);
-	/* disable the training pattern on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_DISABLE);
-	/* set link bw and lanes on the sink */
-	dp_set_link_bw_lanes(radeon_connector, link_configuration);
-	/* disable downspread on the sink */
-	dp_set_downspread(radeon_connector, 0);
-	if (ASIC_IS_DCE4(rdev)) {
-		/* start training on the source */
-		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_START);
-		/* set training pattern 1 on the source */
-		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN1);
-	} else {
-		/* start training on the source */
-		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_START,
-					  dig_connector->dp_clock, enc_id, 0);
-		/* set training pattern 1 on the source */
-		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-					  dig_connector->dp_clock, enc_id, 0);
-	}
+	return 0;
+}
 
-	/* set initial vs/emph */
-	memset(train_set, 0, 4);
-	udelay(400);
-	/* set training pattern 1 on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_1);
+static int radeon_dp_link_train_cr(struct radeon_dp_link_train_info *dp_info)
+{
+	bool clock_recovery;
+ 	u8 voltage;
+	int i;
 
-	dp_update_dpvs_emph(radeon_connector, encoder, train_set);
+	radeon_dp_set_tp(dp_info, DP_TRAINING_PATTERN_1);
+	memset(dp_info->train_set, 0, 4);
+	radeon_dp_update_vs_emph(dp_info);
+
+	udelay(400);
 
 	/* clock recovery loop */
 	clock_recovery = false;
-	tries = 0;
+	dp_info->tries = 0;
 	voltage = 0xff;
-	for (;;) {
-		udelay(100);
-		if (!atom_dp_get_link_status(radeon_connector, link_status))
+	while (1) {
+		if (dp_info->rd_interval == 0)
+			udelay(100);
+		else
+			mdelay(dp_info->rd_interval * 4);
+
+		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
 			break;
 
-		if (dp_clock_recovery_ok(link_status, dig_connector->dp_lane_count)) {
+		if (dp_clock_recovery_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			clock_recovery = true;
 			break;
 		}
 
-		for (i = 0; i < dig_connector->dp_lane_count; i++) {
-			if ((train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
+		for (i = 0; i < dp_info->dp_lane_count; i++) {
+			if ((dp_info->train_set[i] & DP_TRAIN_MAX_SWING_REACHED) == 0)
 				break;
 		}
-		if (i == dig_connector->dp_lane_count) {
+		if (i == dp_info->dp_lane_count) {
 			DRM_ERROR("clock recovery reached max voltage\n");
 			break;
 		}
 
-		if ((train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
-			++tries;
-			if (tries == 5) {
+		if ((dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK) == voltage) {
+			++dp_info->tries;
+			if (dp_info->tries == 5) {
 				DRM_ERROR("clock recovery tried 5 times\n");
 				break;
 			}
 		} else
-			tries = 0;
+			dp_info->tries = 0;
 
-		voltage = train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
+		voltage = dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK;
 
 		/* Compute new train_set as requested by sink */
-		dp_get_adjust_train(link_status, dig_connector->dp_lane_count, train_set);
-		dp_update_dpvs_emph(radeon_connector, encoder, train_set);
+		dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count, dp_info->train_set);
+
+		radeon_dp_update_vs_emph(dp_info);
 	}
-	if (!clock_recovery)
+	if (!clock_recovery) {
 		DRM_ERROR("clock recovery failed\n");
-	else
+		return -1;
+	} else {
 		DRM_DEBUG_KMS("clock recovery at voltage %d pre-emphasis %d\n",
-			  train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
-			  (train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK) >>
+			  dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+			  (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK) >>
 			  DP_TRAIN_PRE_EMPHASIS_SHIFT);
+		return 0;
+	}
+}
 
+static int radeon_dp_link_train_ce(struct radeon_dp_link_train_info *dp_info)
+{
+	bool channel_eq;
 
-	/* set training pattern 2 on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_2);
-	/* set training pattern 2 on the source */
-	if (ASIC_IS_DCE4(rdev))
-		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_PATTERN2);
+	if (dp_info->tp3_supported)
+		radeon_dp_set_tp(dp_info, DP_TRAINING_PATTERN_3);
 	else
-		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_PATTERN_SEL,
-					  dig_connector->dp_clock, enc_id, 1);
+		radeon_dp_set_tp(dp_info, DP_TRAINING_PATTERN_2);
 
 	/* channel equalization loop */
-	tries = 0;
+	dp_info->tries = 0;
 	channel_eq = false;
-	for (;;) {
-		udelay(400);
-		if (!atom_dp_get_link_status(radeon_connector, link_status))
+	while (1) {
+		if (dp_info->rd_interval == 0)
+			udelay(400);
+		else
+			mdelay(dp_info->rd_interval * 4);
+
+		if (!radeon_dp_get_link_status(dp_info->radeon_connector, dp_info->link_status))
 			break;
 
-		if (dp_channel_eq_ok(link_status, dig_connector->dp_lane_count)) {
+		if (dp_channel_eq_ok(dp_info->link_status, dp_info->dp_lane_count)) {
 			channel_eq = true;
 			break;
 		}
 
 		/* Try 5 times */
-		if (tries > 5) {
+		if (dp_info->tries > 5) {
 			DRM_ERROR("channel eq failed: 5 tries\n");
 			break;
 		}
 
 		/* Compute new train_set as requested by sink */
-		dp_get_adjust_train(link_status, dig_connector->dp_lane_count, train_set);
-		dp_update_dpvs_emph(radeon_connector, encoder, train_set);
+		dp_get_adjust_train(dp_info->link_status, dp_info->dp_lane_count, dp_info->train_set);
 
-		tries++;
+		radeon_dp_update_vs_emph(dp_info);
+		dp_info->tries++;
 	}
 
-	if (!channel_eq)
+	if (!channel_eq) {
 		DRM_ERROR("channel eq failed\n");
-	else
+		return -1;
+	} else {
 		DRM_DEBUG_KMS("channel eq at voltage %d pre-emphasis %d\n",
-			  train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
-			  (train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK)
+			  dp_info->train_set[0] & DP_TRAIN_VOLTAGE_SWING_MASK,
+			  (dp_info->train_set[0] & DP_TRAIN_PRE_EMPHASIS_MASK)
 			  >> DP_TRAIN_PRE_EMPHASIS_SHIFT);
-
-	/* disable the training pattern on the sink */
-	dp_set_training(radeon_connector, DP_TRAINING_PATTERN_DISABLE);
-
-	/* disable the training pattern on the source */
-	if (ASIC_IS_DCE4(rdev))
-		atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_LINK_TRAINING_COMPLETE);
-	else
-		radeon_dp_encoder_service(rdev, ATOM_DP_ACTION_TRAINING_COMPLETE,
-					  dig_connector->dp_clock, enc_id, 0);
+		return 0;
+	}
 }
 
-int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
-			 uint8_t write_byte, uint8_t *read_byte)
+void radeon_dp_link_train(struct drm_encoder *encoder,
+			  struct drm_connector *connector)
 {
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	struct radeon_i2c_chan *auxch = (struct radeon_i2c_chan *)adapter;
-	int ret = 0;
-	uint16_t address = algo_data->address;
-	uint8_t msg[5];
-	uint8_t reply[2];
-	int msg_len, dp_msg_len;
-	int reply_bytes;
-
-	/* Set up the command byte */
-	if (mode & MODE_I2C_READ)
-		msg[2] = AUX_I2C_READ << 4;
-	else
-		msg[2] = AUX_I2C_WRITE << 4;
-
-	if (!(mode & MODE_I2C_STOP))
-		msg[2] |= AUX_I2C_MOT << 4;
+	struct drm_device *dev = encoder->dev;
+	struct radeon_device *rdev = dev->dev_private;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct radeon_encoder_atom_dig *dig;
+	struct radeon_connector *radeon_connector;
+	struct radeon_connector_atom_dig *dig_connector;
+	struct radeon_dp_link_train_info dp_info;
+ 	u8 tmp;
 
-	msg[0] = address;
-	msg[1] = address >> 8;
+	if (!radeon_encoder->enc_priv)
+		return;
+	dig = radeon_encoder->enc_priv;
 
-	reply_bytes = 1;
+	radeon_connector = to_radeon_connector(connector);
+	if (!radeon_connector->con_priv)
+		return;
+	dig_connector = radeon_connector->con_priv;
 
-	msg_len = 4;
-	dp_msg_len = 3;
-	switch (mode) {
-	case MODE_I2C_WRITE:
-		msg[4] = write_byte;
-		msg_len++;
-		dp_msg_len += 2;
-		break;
-	case MODE_I2C_READ:
-		dp_msg_len += 1;
-		break;
-	default:
-		break;
-	}
+	if ((dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_DISPLAYPORT) &&
+	    (dig_connector->dp_sink_type != CONNECTOR_OBJECT_ID_eDP))
+		return;
 
-	msg[3] = (dp_msg_len) << 4;
-	ret = radeon_process_aux_ch(auxch, msg, msg_len, reply, reply_bytes, 0);
+	dp_info.enc_id = 0;
+	if (dig->dig_encoder)
+		dp_info.enc_id |= ATOM_DP_CONFIG_DIG2_ENCODER;
+	else
+		dp_info.enc_id |= ATOM_DP_CONFIG_DIG1_ENCODER;
+	if (dig->linkb)
+		dp_info.enc_id |= ATOM_DP_CONFIG_LINK_B;
+	else
+		dp_info.enc_id |= ATOM_DP_CONFIG_LINK_A;
 
-	if (ret) {
-		if (read_byte)
-			*read_byte = reply[0];
-		return reply_bytes;
-	}
-	return -EREMOTEIO;
+	dp_info.rd_interval = radeon_read_dpcd_reg(radeon_connector, DP_TRAINING_AUX_RD_INTERVAL);
+	tmp = radeon_read_dpcd_reg(radeon_connector, DP_MAX_LANE_COUNT);
+	if (ASIC_IS_DCE5(rdev) && (tmp & DP_TPS3_SUPPORTED))
+		dp_info.tp3_supported = true;
+	else
+		dp_info.tp3_supported = false;
+
+	memcpy(dp_info.dpcd, dig_connector->dpcd, 8);
+	dp_info.rdev = rdev;
+	dp_info.encoder = encoder;
+	dp_info.connector = connector;
+	dp_info.radeon_connector = radeon_connector;
+	dp_info.dp_lane_count = dig_connector->dp_lane_count;
+	dp_info.dp_clock = dig_connector->dp_clock;
+
+	if (radeon_dp_link_train_init(&dp_info))
+		goto done;
+	if (radeon_dp_link_train_cr(&dp_info))
+		goto done;
+	if (radeon_dp_link_train_ce(&dp_info))
+		goto done;
+done:
+	if (radeon_dp_link_train_finish(&dp_info))
+		return;
 }
-
diff --git a/drivers/gpu/drm/radeon/evergreen.c b/drivers/gpu/drm/radeon/evergreen.c
index 9073e3bfb08c..7c37638095f7 100644
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@@ -1578,7 +1578,7 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	u32 sq_stack_resource_mgmt_2;
 	u32 sq_stack_resource_mgmt_3;
 	u32 vgt_cache_invalidation;
-	u32 hdp_host_path_cntl;
+	u32 hdp_host_path_cntl, tmp;
 	int i, j, num_shader_engines, ps_thread_count;
 
 	switch (rdev->family) {
@@ -1936,8 +1936,12 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 		rdev->config.evergreen.tile_config |= (3 << 0);
 		break;
 	}
-	rdev->config.evergreen.tile_config |=
-		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
+	/* num banks is 8 on all fusion asics */
+	if (rdev->flags & RADEON_IS_IGP)
+		rdev->config.evergreen.tile_config |= 8 << 4;
+	else
+		rdev->config.evergreen.tile_config |=
+			((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
 	rdev->config.evergreen.tile_config |=
 		((mc_arb_ramcfg & BURSTLENGTH_MASK) >> BURSTLENGTH_SHIFT) << 8;
 	rdev->config.evergreen.tile_config |=
@@ -2141,6 +2145,10 @@ static void evergreen_gpu_init(struct radeon_device *rdev)
 	for (i = SQ_ALU_CONST_BUFFER_SIZE_HS_0; i < 0x29000; i += 4)
 		WREG32(i, 0);
 
+	tmp = RREG32(HDP_MISC_CNTL);
+	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
+	WREG32(HDP_MISC_CNTL, tmp);
+
 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
 
diff --git a/drivers/gpu/drm/radeon/evergreend.h b/drivers/gpu/drm/radeon/evergreend.h
index fc40e0cc3451..f37e91ee8a11 100644
--- a/drivers/gpu/drm/radeon/evergreend.h
+++ b/drivers/gpu/drm/radeon/evergreend.h
@@ -64,6 +64,8 @@
 #define GB_BACKEND_MAP  				0x98FC
 #define DMIF_ADDR_CONFIG  				0xBD4
 #define HDP_ADDR_CONFIG  				0x2F48
+#define HDP_MISC_CNTL  					0x2F4C
+#define		HDP_FLUSH_INVALIDATE_CACHE      	(1 << 0)
 
 #define	CC_SYS_RB_BACKEND_DISABLE			0x3F88
 #define	GC_USER_RB_BACKEND_DISABLE			0x9B7C
diff --git a/drivers/gpu/drm/radeon/ni.c b/drivers/gpu/drm/radeon/ni.c
index 3d8a7634bbe9..b205ba1cdd8f 100644
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@@ -417,7 +417,7 @@ static u32 cayman_get_tile_pipe_to_backend_map(struct radeon_device *rdev,
 		num_shader_engines = 1;
 	if (num_shader_engines > rdev->config.cayman.max_shader_engines)
 		num_shader_engines = rdev->config.cayman.max_shader_engines;
-	if (num_backends_per_asic > num_shader_engines)
+	if (num_backends_per_asic < num_shader_engines)
 		num_backends_per_asic = num_shader_engines;
 	if (num_backends_per_asic > (rdev->config.cayman.max_backends_per_se * num_shader_engines))
 		num_backends_per_asic = rdev->config.cayman.max_backends_per_se * num_shader_engines;
@@ -829,7 +829,7 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	rdev->config.cayman.tile_config |=
 		((mc_arb_ramcfg & NOOFBANK_MASK) >> NOOFBANK_SHIFT) << 4;
 	rdev->config.cayman.tile_config |=
-		(gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT;
+		((gb_addr_config & PIPE_INTERLEAVE_SIZE_MASK) >> PIPE_INTERLEAVE_SIZE_SHIFT) << 8;
 	rdev->config.cayman.tile_config |=
 		((gb_addr_config & ROW_SIZE_MASK) >> ROW_SIZE_SHIFT) << 12;
 
@@ -931,6 +931,10 @@ static void cayman_gpu_init(struct radeon_device *rdev)
 	WREG32(CB_PERF_CTR3_SEL_0, 0);
 	WREG32(CB_PERF_CTR3_SEL_1, 0);
 
+	tmp = RREG32(HDP_MISC_CNTL);
+	tmp |= HDP_FLUSH_INVALIDATE_CACHE;
+	WREG32(HDP_MISC_CNTL, tmp);
+
 	hdp_host_path_cntl = RREG32(HDP_HOST_PATH_CNTL);
 	WREG32(HDP_HOST_PATH_CNTL, hdp_host_path_cntl);
 
diff --git a/drivers/gpu/drm/radeon/nid.h b/drivers/gpu/drm/radeon/nid.h
index 0f9a08b53fbd..9736746da2d6 100644
--- a/drivers/gpu/drm/radeon/nid.h
+++ b/drivers/gpu/drm/radeon/nid.h
@@ -136,6 +136,8 @@
 #define	HDP_NONSURFACE_INFO				0x2C08
 #define	HDP_NONSURFACE_SIZE				0x2C0C
 #define HDP_ADDR_CONFIG  				0x2F48
+#define HDP_MISC_CNTL					0x2F4C
+#define 	HDP_FLUSH_INVALIDATE_CACHE			(1 << 0)
 
 #define	CC_SYS_RB_BACKEND_DISABLE			0x3F88
 #define	GC_USER_SYS_RB_BACKEND_DISABLE			0x3F8C
@@ -351,7 +353,7 @@
 #define		MULTI_GPU_TILE_SIZE_MASK		0x03000000
 #define		MULTI_GPU_TILE_SIZE_SHIFT		24
 #define		ROW_SIZE(x)             		((x) << 28)
-#define		ROW_SIZE_MASK				0x30000007
+#define		ROW_SIZE_MASK				0x30000000
 #define		ROW_SIZE_SHIFT				28
 #define		NUM_LOWER_PIPES(x)			((x) << 30)
 #define		NUM_LOWER_PIPES_MASK			0x40000000
diff --git a/drivers/gpu/drm/radeon/radeon_asic.c b/drivers/gpu/drm/radeon/radeon_asic.c
index ca576191d058..d948265db87e 100644
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@@ -782,6 +782,7 @@ static struct radeon_asic evergreen_asic = {
 	.hpd_fini = &evergreen_hpd_fini,
 	.hpd_sense = &evergreen_hpd_sense,
 	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
 	.pm_misc = &evergreen_pm_misc,
 	.pm_prepare = &evergreen_pm_prepare,
@@ -828,6 +829,7 @@ static struct radeon_asic sumo_asic = {
 	.hpd_fini = &evergreen_hpd_fini,
 	.hpd_sense = &evergreen_hpd_sense,
 	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
 	.pm_misc = &evergreen_pm_misc,
 	.pm_prepare = &evergreen_pm_prepare,
@@ -874,6 +876,7 @@ static struct radeon_asic btc_asic = {
 	.hpd_fini = &evergreen_hpd_fini,
 	.hpd_sense = &evergreen_hpd_sense,
 	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
 	.pm_misc = &evergreen_pm_misc,
 	.pm_prepare = &evergreen_pm_prepare,
@@ -920,6 +923,7 @@ static struct radeon_asic cayman_asic = {
 	.hpd_fini = &evergreen_hpd_fini,
 	.hpd_sense = &evergreen_hpd_sense,
 	.hpd_set_polarity = &evergreen_hpd_set_polarity,
+	.ioctl_wait_idle = r600_ioctl_wait_idle,
 	.gui_idle = &r600_gui_idle,
 	.pm_misc = &evergreen_pm_misc,
 	.pm_prepare = &evergreen_pm_prepare,
diff --git a/drivers/gpu/drm/radeon/radeon_combios.c b/drivers/gpu/drm/radeon/radeon_combios.c
index 8caf546c8e92..5b991f7c6e2a 100644
--- a/drivers/gpu/drm/radeon/radeon_combios.c
+++ b/drivers/gpu/drm/radeon/radeon_combios.c
@@ -505,12 +505,18 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 	 * DDC_VGA           = RADEON_GPIO_VGA_DDC
 	 * DDC_LCD           = RADEON_GPIOPAD_MASK
 	 * DDC_GPIO          = RADEON_MDGPIO_MASK
-	 * r1xx/r2xx
+	 * r1xx
 	 * DDC_MONID         = RADEON_GPIO_MONID
 	 * DDC_CRT2          = RADEON_GPIO_CRT2_DDC
-	 * r3xx
+	 * r200
 	 * DDC_MONID         = RADEON_GPIO_MONID
 	 * DDC_CRT2          = RADEON_GPIO_DVI_DDC
+	 * r300/r350
+	 * DDC_MONID         = RADEON_GPIO_DVI_DDC
+	 * DDC_CRT2          = RADEON_GPIO_DVI_DDC
+	 * rv2xx/rv3xx
+	 * DDC_MONID         = RADEON_GPIO_MONID
+	 * DDC_CRT2          = RADEON_GPIO_MONID
 	 * rs3xx/rs4xx
 	 * DDC_MONID         = RADEON_GPIOPAD_MASK
 	 * DDC_CRT2          = RADEON_GPIO_MONID
@@ -537,17 +543,26 @@ static struct radeon_i2c_bus_rec combios_setup_i2c_bus(struct radeon_device *rde
 		    rdev->family == CHIP_RS400 ||
 		    rdev->family == CHIP_RS480)
 			ddc_line = RADEON_GPIOPAD_MASK;
-		else
+		else if (rdev->family == CHIP_R300 ||
+			 rdev->family == CHIP_R350) {
+			ddc_line = RADEON_GPIO_DVI_DDC;
+			ddc = DDC_DVI;
+		} else
 			ddc_line = RADEON_GPIO_MONID;
 		break;
 	case DDC_CRT2:
-		if (rdev->family == CHIP_RS300 ||
-		    rdev->family == CHIP_RS400 ||
-		    rdev->family == CHIP_RS480)
-			ddc_line = RADEON_GPIO_MONID;
-		else if (rdev->family >= CHIP_R300) {
+		if (rdev->family == CHIP_R200 ||
+		    rdev->family == CHIP_R300 ||
+		    rdev->family == CHIP_R350) {
 			ddc_line = RADEON_GPIO_DVI_DDC;
 			ddc = DDC_DVI;
+		} else if (rdev->family == CHIP_RS300 ||
+			   rdev->family == CHIP_RS400 ||
+			   rdev->family == CHIP_RS480)
+			ddc_line = RADEON_GPIO_MONID;
+		else if (rdev->family >= CHIP_RV350) {
+			ddc_line = RADEON_GPIO_MONID;
+			ddc = DDC_MONID;
 		} else
 			ddc_line = RADEON_GPIO_CRT2_DDC;
 		break;
@@ -709,26 +724,42 @@ void radeon_combios_i2c_init(struct radeon_device *rdev)
 	struct drm_device *dev = rdev->ddev;
 	struct radeon_i2c_bus_rec i2c;
 
+	/* actual hw pads
+	 * r1xx/rs2xx/rs3xx
+	 * 0x60, 0x64, 0x68, 0x6c, gpiopads, mm
+	 * r200
+	 * 0x60, 0x64, 0x68, mm
+	 * r300/r350
+	 * 0x60, 0x64, mm
+	 * rv2xx/rv3xx/rs4xx
+	 * 0x60, 0x64, 0x68, gpiopads, mm
+	 */
 
+	/* 0x60 */
 	i2c = combios_setup_i2c_bus(rdev, DDC_DVI, 0, 0);
 	rdev->i2c_bus[0] = radeon_i2c_create(dev, &i2c, "DVI_DDC");
-
+	/* 0x64 */
 	i2c = combios_setup_i2c_bus(rdev, DDC_VGA, 0, 0);
 	rdev->i2c_bus[1] = radeon_i2c_create(dev, &i2c, "VGA_DDC");
 
+	/* mm i2c */
 	i2c.valid = true;
 	i2c.hw_capable = true;
 	i2c.mm_i2c = true;
 	i2c.i2c_id = 0xa0;
 	rdev->i2c_bus[2] = radeon_i2c_create(dev, &i2c, "MM_I2C");
 
-	if (rdev->family == CHIP_RS300 ||
-	    rdev->family == CHIP_RS400 ||
-	    rdev->family == CHIP_RS480) {
+	if (rdev->family == CHIP_R300 ||
+	    rdev->family == CHIP_R350) {
+		/* only 2 sw i2c pads */
+	} else if (rdev->family == CHIP_RS300 ||
+		   rdev->family == CHIP_RS400 ||
+		   rdev->family == CHIP_RS480) {
 		u16 offset;
 		u8 id, blocks, clk, data;
 		int i;
 
+		/* 0x68 */
 		i2c = combios_setup_i2c_bus(rdev, DDC_CRT2, 0, 0);
 		rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID");
 
@@ -740,6 +771,7 @@ void radeon_combios_i2c_init(struct radeon_device *rdev)
 				if (id == 136) {
 					clk = RBIOS8(offset + 3 + (i * 5) + 3);
 					data = RBIOS8(offset + 3 + (i * 5) + 4);
+					/* gpiopad */
 					i2c = combios_setup_i2c_bus(rdev, DDC_MONID,
 								    (1 << clk), (1 << data));
 					rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "GPIOPAD_MASK");
@@ -747,14 +779,15 @@ void radeon_combios_i2c_init(struct radeon_device *rdev)
 				}
 			}
 		}
-
-	} else if (rdev->family >= CHIP_R300) {
+	} else if (rdev->family >= CHIP_R200) {
+		/* 0x68 */
 		i2c = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0);
 		rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID");
 	} else {
+		/* 0x68 */
 		i2c = combios_setup_i2c_bus(rdev, DDC_MONID, 0, 0);
 		rdev->i2c_bus[3] = radeon_i2c_create(dev, &i2c, "MONID");
-
+		/* 0x6c */
 		i2c = combios_setup_i2c_bus(rdev, DDC_CRT2, 0, 0);
 		rdev->i2c_bus[4] = radeon_i2c_create(dev, &i2c, "CRT2_DDC");
 	}
@@ -2504,6 +2537,12 @@ bool radeon_get_legacy_connector_info_from_bios(struct drm_device *dev)
 	return true;
 }
 
+static const char *thermal_controller_names[] = {
+	"NONE",
+	"lm63",
+	"adm1032",
+};
+
 void radeon_combios_get_power_modes(struct radeon_device *rdev)
 {
 	struct drm_device *dev = rdev->ddev;
@@ -2524,6 +2563,54 @@ void radeon_combios_get_power_modes(struct radeon_device *rdev)
 		return;
 	}
 
+	/* check for a thermal chip */
+	offset = combios_get_table_offset(dev, COMBIOS_OVERDRIVE_INFO_TABLE);
+	if (offset) {
+		u8 thermal_controller = 0, gpio = 0, i2c_addr = 0, clk_bit = 0, data_bit = 0;
+		struct radeon_i2c_bus_rec i2c_bus;
+
+		rev = RBIOS8(offset);
+
+		if (rev == 0) {
+			thermal_controller = RBIOS8(offset + 3);
+			gpio = RBIOS8(offset + 4) & 0x3f;
+			i2c_addr = RBIOS8(offset + 5);
+		} else if (rev == 1) {
+			thermal_controller = RBIOS8(offset + 4);
+			gpio = RBIOS8(offset + 5) & 0x3f;
+			i2c_addr = RBIOS8(offset + 6);
+		} else if (rev == 2) {
+			thermal_controller = RBIOS8(offset + 4);
+			gpio = RBIOS8(offset + 5) & 0x3f;
+			i2c_addr = RBIOS8(offset + 6);
+			clk_bit = RBIOS8(offset + 0xa);
+			data_bit = RBIOS8(offset + 0xb);
+		}
+		if ((thermal_controller > 0) && (thermal_controller < 3)) {
+			DRM_INFO("Possible %s thermal controller at 0x%02x\n",
+				 thermal_controller_names[thermal_controller],
+				 i2c_addr >> 1);
+			if (gpio == DDC_LCD) {
+				/* MM i2c */
+				i2c_bus.valid = true;
+				i2c_bus.hw_capable = true;
+				i2c_bus.mm_i2c = true;
+				i2c_bus.i2c_id = 0xa0;
+			} else if (gpio == DDC_GPIO)
+				i2c_bus = combios_setup_i2c_bus(rdev, gpio, 1 << clk_bit, 1 << data_bit);
+			else
+				i2c_bus = combios_setup_i2c_bus(rdev, gpio, 0, 0);
+			rdev->pm.i2c_bus = radeon_i2c_lookup(rdev, &i2c_bus);
+			if (rdev->pm.i2c_bus) {
+				struct i2c_board_info info = { };
+				const char *name = thermal_controller_names[thermal_controller];
+				info.addr = i2c_addr >> 1;
+				strlcpy(info.type, name, sizeof(info.type));
+				i2c_new_device(&rdev->pm.i2c_bus->adapter, &info);
+			}
+		}
+	}
+
 	if (rdev->flags & RADEON_IS_MOBILITY) {
 		offset = combios_get_table_offset(dev, COMBIOS_POWERPLAY_INFO_TABLE);
 		if (offset) {
diff --git a/drivers/gpu/drm/radeon/radeon_connectors.c b/drivers/gpu/drm/radeon/radeon_connectors.c
index 5f45fa12bb8b..ee1dccb3fec9 100644
--- a/drivers/gpu/drm/radeon/radeon_connectors.c
+++ b/drivers/gpu/drm/radeon/radeon_connectors.c
@@ -50,20 +50,21 @@ void radeon_connector_hotplug(struct drm_connector *connector)
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 
-	if (radeon_connector->hpd.hpd != RADEON_HPD_NONE)
-		radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
-
-	if ((connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort) ||
-	    (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
-		if ((radeon_dp_getsinktype(radeon_connector) == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
-		    (radeon_dp_getsinktype(radeon_connector) == CONNECTOR_OBJECT_ID_eDP)) {
-			if (radeon_dp_needs_link_train(radeon_connector)) {
-				if (connector->encoder)
-					dp_link_train(connector->encoder, connector);
-			}
-		}
-	}
+	radeon_hpd_set_polarity(rdev, radeon_connector->hpd.hpd);
+
+	/* powering up/down the eDP panel generates hpd events which
+	 * can interfere with modesetting.
+	 */
+	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+		return;
 
+	/* pre-r600 did not always have the hpd pins mapped accurately to connectors */
+	if (rdev->family >= CHIP_R600) {
+		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
+		else
+			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
+	}
 }
 
 static void radeon_property_change_mode(struct drm_encoder *encoder)
@@ -1054,23 +1055,124 @@ static int radeon_dp_get_modes(struct drm_connector *connector)
 	int ret;
 
 	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		struct drm_encoder *encoder;
+		struct drm_display_mode *mode;
+
 		if (!radeon_dig_connector->edp_on)
 			atombios_set_edp_panel_power(connector,
 						     ATOM_TRANSMITTER_ACTION_POWER_ON);
-	}
-	ret = radeon_ddc_get_modes(radeon_connector);
-	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		ret = radeon_ddc_get_modes(radeon_connector);
 		if (!radeon_dig_connector->edp_on)
 			atombios_set_edp_panel_power(connector,
 						     ATOM_TRANSMITTER_ACTION_POWER_OFF);
-	}
+
+		if (ret > 0) {
+			encoder = radeon_best_single_encoder(connector);
+			if (encoder) {
+				radeon_fixup_lvds_native_mode(encoder, connector);
+				/* add scaled modes */
+				radeon_add_common_modes(encoder, connector);
+			}
+			return ret;
+		}
+
+		encoder = radeon_best_single_encoder(connector);
+		if (!encoder)
+			return 0;
+
+		/* we have no EDID modes */
+		mode = radeon_fp_native_mode(encoder);
+		if (mode) {
+			ret = 1;
+			drm_mode_probed_add(connector, mode);
+			/* add the width/height from vbios tables if available */
+			connector->display_info.width_mm = mode->width_mm;
+			connector->display_info.height_mm = mode->height_mm;
+			/* add scaled modes */
+			radeon_add_common_modes(encoder, connector);
+		}
+	} else
+		ret = radeon_ddc_get_modes(radeon_connector);
 
 	return ret;
 }
 
+bool radeon_connector_encoder_is_dp_bridge(struct drm_connector *connector)
+{
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+	int i;
+	bool found = false;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0)
+			break;
+
+		obj = drm_mode_object_find(connector->dev, connector->encoder_ids[i], DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			continue;
+
+		encoder = obj_to_encoder(obj);
+		radeon_encoder = to_radeon_encoder(encoder);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_TRAVIS:
+		case ENCODER_OBJECT_ID_NUTMEG:
+			found = true;
+			break;
+		default:
+			break;
+		}
+	}
+
+	return found;
+}
+
+bool radeon_connector_encoder_is_hbr2(struct drm_connector *connector)
+{
+	struct drm_mode_object *obj;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
+	int i;
+	bool found = false;
+
+	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
+		if (connector->encoder_ids[i] == 0)
+			break;
+
+		obj = drm_mode_object_find(connector->dev, connector->encoder_ids[i], DRM_MODE_OBJECT_ENCODER);
+		if (!obj)
+			continue;
+
+		encoder = obj_to_encoder(obj);
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (radeon_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
+			found = true;
+	}
+
+	return found;
+}
+
+bool radeon_connector_is_dp12_capable(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
+
+	if (ASIC_IS_DCE5(rdev) &&
+	    (rdev->clock.dp_extclk >= 53900) &&
+	    radeon_connector_encoder_is_hbr2(connector)) {
+		return true;
+	}
+
+	return false;
+}
+
 static enum drm_connector_status
 radeon_dp_detect(struct drm_connector *connector, bool force)
 {
+	struct drm_device *dev = connector->dev;
+	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 	enum drm_connector_status ret = connector_status_disconnected;
 	struct radeon_connector_atom_dig *radeon_dig_connector = radeon_connector->con_priv;
@@ -1081,6 +1183,15 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 	}
 
 	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		struct drm_encoder *encoder = radeon_best_single_encoder(connector);
+		if (encoder) {
+			struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+			struct drm_display_mode *native_mode = &radeon_encoder->native_mode;
+
+			/* check if panel is valid */
+			if (native_mode->hdisplay >= 320 && native_mode->vdisplay >= 240)
+				ret = connector_status_connected;
+		}
 		/* eDP is always DP */
 		radeon_dig_connector->dp_sink_type = CONNECTOR_OBJECT_ID_DISPLAYPORT;
 		if (!radeon_dig_connector->edp_on)
@@ -1093,12 +1204,18 @@ radeon_dp_detect(struct drm_connector *connector, bool force)
 						     ATOM_TRANSMITTER_ACTION_POWER_OFF);
 	} else {
 		radeon_dig_connector->dp_sink_type = radeon_dp_getsinktype(radeon_connector);
-		if (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
-			if (radeon_dp_getdpcd(radeon_connector))
-				ret = connector_status_connected;
+		if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd)) {
+			ret = connector_status_connected;
+			if (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT)
+				radeon_dp_getdpcd(radeon_connector);
 		} else {
-			if (radeon_ddc_probe(radeon_connector))
-				ret = connector_status_connected;
+			if (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) {
+				if (radeon_dp_getdpcd(radeon_connector))
+					ret = connector_status_connected;
+			} else {
+				if (radeon_ddc_probe(radeon_connector))
+					ret = connector_status_connected;
+			}
 		}
 	}
 
@@ -1114,11 +1231,38 @@ static int radeon_dp_mode_valid(struct drm_connector *connector,
 
 	/* XXX check mode bandwidth */
 
-	if ((radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
-	    (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
-		return radeon_dp_mode_valid_helper(radeon_connector, mode);
-	else
+	if (connector->connector_type == DRM_MODE_CONNECTOR_eDP) {
+		struct drm_encoder *encoder = radeon_best_single_encoder(connector);
+
+		if ((mode->hdisplay < 320) || (mode->vdisplay < 240))
+			return MODE_PANEL;
+
+		if (encoder) {
+			struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+			struct drm_display_mode *native_mode = &radeon_encoder->native_mode;
+
+		/* AVIVO hardware supports downscaling modes larger than the panel
+			 * to the panel size, but I'm not sure this is desirable.
+			 */
+			if ((mode->hdisplay > native_mode->hdisplay) ||
+			    (mode->vdisplay > native_mode->vdisplay))
+				return MODE_PANEL;
+
+			/* if scaling is disabled, block non-native modes */
+			if (radeon_encoder->rmx_type == RMX_OFF) {
+				if ((mode->hdisplay != native_mode->hdisplay) ||
+				    (mode->vdisplay != native_mode->vdisplay))
+					return MODE_PANEL;
+			}
+		}
 		return MODE_OK;
+	} else {
+		if ((radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
+		    (radeon_dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
+			return radeon_dp_mode_valid_helper(connector, mode);
+		else
+			return MODE_OK;
+	}
 }
 
 struct drm_connector_helper_funcs radeon_dp_connector_helper_funcs = {
@@ -1151,8 +1295,11 @@ radeon_add_atom_connector(struct drm_device *dev,
 	struct drm_connector *connector;
 	struct radeon_connector *radeon_connector;
 	struct radeon_connector_atom_dig *radeon_dig_connector;
+	struct drm_encoder *encoder;
+	struct radeon_encoder *radeon_encoder;
 	uint32_t subpixel_order = SubPixelNone;
 	bool shared_ddc = false;
+	bool is_dp_bridge = false;
 
 	if (connector_type == DRM_MODE_CONNECTOR_Unknown)
 		return;
@@ -1184,6 +1331,21 @@ radeon_add_atom_connector(struct drm_device *dev,
 		}
 	}
 
+	/* check if it's a dp bridge */
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		radeon_encoder = to_radeon_encoder(encoder);
+		if (radeon_encoder->devices & supported_device) {
+			switch (radeon_encoder->encoder_id) {
+			case ENCODER_OBJECT_ID_TRAVIS:
+			case ENCODER_OBJECT_ID_NUTMEG:
+				is_dp_bridge = true;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
 	radeon_connector = kzalloc(sizeof(struct radeon_connector), GFP_KERNEL);
 	if (!radeon_connector)
 		return;
@@ -1201,61 +1363,39 @@ radeon_add_atom_connector(struct drm_device *dev,
 		if (!radeon_connector->router_bus)
 			DRM_ERROR("Failed to assign router i2c bus! Check dmesg for i2c errors.\n");
 	}
-	switch (connector_type) {
-	case DRM_MODE_CONNECTOR_VGA:
-		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
-		if (i2c_bus->valid) {
-			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
-			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
-		}
-		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.load_detect_property,
-					      1);
-		/* no HPD on analog connectors */
-		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->polled = DRM_CONNECTOR_POLL_CONNECT;
-		connector->interlace_allowed = true;
-		connector->doublescan_allowed = true;
-		break;
-	case DRM_MODE_CONNECTOR_DVIA:
-		drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
-		if (i2c_bus->valid) {
-			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
-			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
-		}
-		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.load_detect_property,
-					      1);
-		/* no HPD on analog connectors */
-		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->interlace_allowed = true;
-		connector->doublescan_allowed = true;
-		break;
-	case DRM_MODE_CONNECTOR_DVII:
-	case DRM_MODE_CONNECTOR_DVID:
+
+	if (is_dp_bridge) {
 		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
 		if (!radeon_dig_connector)
 			goto failed;
 		radeon_dig_connector->igp_lane_info = igp_lane_info;
 		radeon_connector->con_priv = radeon_dig_connector;
-		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+		drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type);
+		drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs);
 		if (i2c_bus->valid) {
+			/* add DP i2c bus */
+			if (connector_type == DRM_MODE_CONNECTOR_eDP)
+				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "eDP-auxch");
+			else
+				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "DP-auxch");
+			if (!radeon_dig_connector->dp_i2c_bus)
+				DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n");
 			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
 			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+				DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
 		}
-		subpixel_order = SubPixelHorizontalRGB;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.coherent_mode_property,
-					      1);
-		if (ASIC_IS_AVIVO(rdev)) {
+		switch (connector_type) {
+		case DRM_MODE_CONNECTOR_VGA:
+		case DRM_MODE_CONNECTOR_DVIA:
+		default:
+			connector->interlace_allowed = true;
+			connector->doublescan_allowed = true;
+			break;
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_DVID:
+		case DRM_MODE_CONNECTOR_HDMIA:
+		case DRM_MODE_CONNECTOR_HDMIB:
+		case DRM_MODE_CONNECTOR_DisplayPort:
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.underscan_property,
 						      UNDERSCAN_OFF);
@@ -1265,131 +1405,234 @@ radeon_add_atom_connector(struct drm_device *dev,
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.underscan_vborder_property,
 						      0);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				connector->doublescan_allowed = true;
+			else
+				connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_LVDS:
+		case DRM_MODE_CONNECTOR_eDP:
+			drm_connector_attach_property(&radeon_connector->base,
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
 		}
-		if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+	} else {
+		switch (connector_type) {
+		case DRM_MODE_CONNECTOR_VGA:
+			drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("VGA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
 			radeon_connector->dac_load_detect = true;
 			drm_connector_attach_property(&radeon_connector->base,
 						      rdev->mode_info.load_detect_property,
 						      1);
-		}
-		connector->interlace_allowed = true;
-		if (connector_type == DRM_MODE_CONNECTOR_DVII)
+			/* no HPD on analog connectors */
+			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
-		else
-			connector->doublescan_allowed = false;
-		break;
-	case DRM_MODE_CONNECTOR_HDMIA:
-	case DRM_MODE_CONNECTOR_HDMIB:
-		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
-		if (!radeon_dig_connector)
-			goto failed;
-		radeon_dig_connector->igp_lane_info = igp_lane_info;
-		radeon_connector->con_priv = radeon_dig_connector;
-		drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
-		if (i2c_bus->valid) {
-			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
-			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
-		}
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.coherent_mode_property,
-					      1);
-		if (ASIC_IS_AVIVO(rdev)) {
+			break;
+		case DRM_MODE_CONNECTOR_DVIA:
+			drm_connector_init(dev, &radeon_connector->base, &radeon_vga_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_vga_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("DVIA: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
+			radeon_connector->dac_load_detect = true;
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_property,
-						      UNDERSCAN_OFF);
+						      rdev->mode_info.load_detect_property,
+						      1);
+			/* no HPD on analog connectors */
+			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
+			connector->interlace_allowed = true;
+			connector->doublescan_allowed = true;
+			break;
+		case DRM_MODE_CONNECTOR_DVII:
+		case DRM_MODE_CONNECTOR_DVID:
+			radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+			if (!radeon_dig_connector)
+				goto failed;
+			radeon_dig_connector->igp_lane_info = igp_lane_info;
+			radeon_connector->con_priv = radeon_dig_connector;
+			drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("DVI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
+			subpixel_order = SubPixelHorizontalRGB;
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_hborder_property,
-						      0);
+						      rdev->mode_info.coherent_mode_property,
+						      1);
+			if (ASIC_IS_AVIVO(rdev)) {
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_property,
+							      UNDERSCAN_OFF);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_hborder_property,
+							      0);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_vborder_property,
+							      0);
+			}
+			if (connector_type == DRM_MODE_CONNECTOR_DVII) {
+				radeon_connector->dac_load_detect = true;
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.load_detect_property,
+							      1);
+			}
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_DVII)
+				connector->doublescan_allowed = true;
+			else
+				connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_HDMIA:
+		case DRM_MODE_CONNECTOR_HDMIB:
+			radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+			if (!radeon_dig_connector)
+				goto failed;
+			radeon_dig_connector->igp_lane_info = igp_lane_info;
+			radeon_connector->con_priv = radeon_dig_connector;
+			drm_connector_init(dev, &radeon_connector->base, &radeon_dvi_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_dvi_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("HDMI: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_vborder_property,
-						      0);
-		}
-		subpixel_order = SubPixelHorizontalRGB;
-		connector->interlace_allowed = true;
-		if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
-			connector->doublescan_allowed = true;
-		else
-			connector->doublescan_allowed = false;
-		break;
-	case DRM_MODE_CONNECTOR_DisplayPort:
-	case DRM_MODE_CONNECTOR_eDP:
-		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
-		if (!radeon_dig_connector)
-			goto failed;
-		radeon_dig_connector->igp_lane_info = igp_lane_info;
-		radeon_connector->con_priv = radeon_dig_connector;
-		drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs);
-		if (i2c_bus->valid) {
-			/* add DP i2c bus */
-			if (connector_type == DRM_MODE_CONNECTOR_eDP)
-				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "eDP-auxch");
+						      rdev->mode_info.coherent_mode_property,
+						      1);
+			if (ASIC_IS_AVIVO(rdev)) {
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_property,
+							      UNDERSCAN_OFF);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_hborder_property,
+							      0);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_vborder_property,
+							      0);
+			}
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = true;
+			if (connector_type == DRM_MODE_CONNECTOR_HDMIB)
+				connector->doublescan_allowed = true;
 			else
+				connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_DisplayPort:
+			radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+			if (!radeon_dig_connector)
+				goto failed;
+			radeon_dig_connector->igp_lane_info = igp_lane_info;
+			radeon_connector->con_priv = radeon_dig_connector;
+			drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				/* add DP i2c bus */
 				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "DP-auxch");
-			if (!radeon_dig_connector->dp_i2c_bus)
-				DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n");
-			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
-			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
-		}
-		subpixel_order = SubPixelHorizontalRGB;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.coherent_mode_property,
-					      1);
-		if (ASIC_IS_AVIVO(rdev)) {
+				if (!radeon_dig_connector->dp_i2c_bus)
+					DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n");
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
+			subpixel_order = SubPixelHorizontalRGB;
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_property,
-						      UNDERSCAN_OFF);
+						      rdev->mode_info.coherent_mode_property,
+						      1);
+			if (ASIC_IS_AVIVO(rdev)) {
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_property,
+							      UNDERSCAN_OFF);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_hborder_property,
+							      0);
+				drm_connector_attach_property(&radeon_connector->base,
+							      rdev->mode_info.underscan_vborder_property,
+							      0);
+			}
+			connector->interlace_allowed = true;
+			/* in theory with a DP to VGA converter... */
+			connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_eDP:
+			radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+			if (!radeon_dig_connector)
+				goto failed;
+			radeon_dig_connector->igp_lane_info = igp_lane_info;
+			radeon_connector->con_priv = radeon_dig_connector;
+			drm_connector_init(dev, &radeon_connector->base, &radeon_dp_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_dp_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				/* add DP i2c bus */
+				radeon_dig_connector->dp_i2c_bus = radeon_i2c_create_dp(dev, i2c_bus, "eDP-auxch");
+				if (!radeon_dig_connector->dp_i2c_bus)
+					DRM_ERROR("DP: Failed to assign dp ddc bus! Check dmesg for i2c errors.\n");
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("DP: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_hborder_property,
-						      0);
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_SVIDEO:
+		case DRM_MODE_CONNECTOR_Composite:
+		case DRM_MODE_CONNECTOR_9PinDIN:
+			drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
+			radeon_connector->dac_load_detect = true;
 			drm_connector_attach_property(&radeon_connector->base,
-						      rdev->mode_info.underscan_vborder_property,
-						      0);
-		}
-		connector->interlace_allowed = true;
-		/* in theory with a DP to VGA converter... */
-		connector->doublescan_allowed = false;
-		break;
-	case DRM_MODE_CONNECTOR_SVIDEO:
-	case DRM_MODE_CONNECTOR_Composite:
-	case DRM_MODE_CONNECTOR_9PinDIN:
-		drm_connector_init(dev, &radeon_connector->base, &radeon_tv_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_tv_connector_helper_funcs);
-		radeon_connector->dac_load_detect = true;
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.load_detect_property,
-					      1);
-		drm_connector_attach_property(&radeon_connector->base,
-					      rdev->mode_info.tv_std_property,
-					      radeon_atombios_get_tv_info(rdev));
-		/* no HPD on analog connectors */
-		radeon_connector->hpd.hpd = RADEON_HPD_NONE;
-		connector->interlace_allowed = false;
-		connector->doublescan_allowed = false;
-		break;
-	case DRM_MODE_CONNECTOR_LVDS:
-		radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
-		if (!radeon_dig_connector)
-			goto failed;
-		radeon_dig_connector->igp_lane_info = igp_lane_info;
-		radeon_connector->con_priv = radeon_dig_connector;
-		drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
-		drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
-		if (i2c_bus->valid) {
-			radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
-			if (!radeon_connector->ddc_bus)
-				DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+						      rdev->mode_info.load_detect_property,
+						      1);
+			drm_connector_attach_property(&radeon_connector->base,
+						      rdev->mode_info.tv_std_property,
+						      radeon_atombios_get_tv_info(rdev));
+			/* no HPD on analog connectors */
+			radeon_connector->hpd.hpd = RADEON_HPD_NONE;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
+		case DRM_MODE_CONNECTOR_LVDS:
+			radeon_dig_connector = kzalloc(sizeof(struct radeon_connector_atom_dig), GFP_KERNEL);
+			if (!radeon_dig_connector)
+				goto failed;
+			radeon_dig_connector->igp_lane_info = igp_lane_info;
+			radeon_connector->con_priv = radeon_dig_connector;
+			drm_connector_init(dev, &radeon_connector->base, &radeon_lvds_connector_funcs, connector_type);
+			drm_connector_helper_add(&radeon_connector->base, &radeon_lvds_connector_helper_funcs);
+			if (i2c_bus->valid) {
+				radeon_connector->ddc_bus = radeon_i2c_lookup(rdev, i2c_bus);
+				if (!radeon_connector->ddc_bus)
+					DRM_ERROR("LVDS: Failed to assign ddc bus! Check dmesg for i2c errors.\n");
+			}
+			drm_connector_attach_property(&radeon_connector->base,
+						      dev->mode_config.scaling_mode_property,
+						      DRM_MODE_SCALE_FULLSCREEN);
+			subpixel_order = SubPixelHorizontalRGB;
+			connector->interlace_allowed = false;
+			connector->doublescan_allowed = false;
+			break;
 		}
-		drm_connector_attach_property(&radeon_connector->base,
-					      dev->mode_config.scaling_mode_property,
-					      DRM_MODE_SCALE_FULLSCREEN);
-		subpixel_order = SubPixelHorizontalRGB;
-		connector->interlace_allowed = false;
-		connector->doublescan_allowed = false;
-		break;
 	}
 
 	if (radeon_connector->hpd.hpd == RADEON_HPD_NONE) {
diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c
index 890217e678d3..5b61364e31f4 100644
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@@ -923,6 +923,9 @@ int radeon_resume_kms(struct drm_device *dev)
 	radeon_fbdev_set_suspend(rdev, 0);
 	console_unlock();
 
+	/* init dig PHYs */
+	if (rdev->is_atom_bios)
+		radeon_atom_encoder_init(rdev);
 	/* reset hpd state */
 	radeon_hpd_init(rdev);
 	/* blat the mode back in */
diff --git a/drivers/gpu/drm/radeon/radeon_display.c b/drivers/gpu/drm/radeon/radeon_display.c
index bdbab5c43bdc..ae247eec87c0 100644
--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@@ -1087,8 +1087,9 @@ void radeon_compute_pll_legacy(struct radeon_pll *pll,
 	*frac_fb_div_p = best_frac_feedback_div;
 	*ref_div_p = best_ref_div;
 	*post_div_p = best_post_div;
-	DRM_DEBUG_KMS("%d %d, pll dividers - fb: %d.%d ref: %d, post %d\n",
-		      freq, best_freq / 1000, best_feedback_div, best_frac_feedback_div,
+	DRM_DEBUG_KMS("%lld %d, pll dividers - fb: %d.%d ref: %d, post %d\n",
+		      (long long)freq,
+		      best_freq / 1000, best_feedback_div, best_frac_feedback_div,
 		      best_ref_div, best_post_div);
 
 }
@@ -1344,6 +1345,11 @@ int radeon_modeset_init(struct radeon_device *rdev)
 	if (!ret) {
 		return ret;
 	}
+
+	/* init dig PHYs */
+	if (rdev->is_atom_bios)
+		radeon_atom_encoder_init(rdev);
+
 	/* initialize hpd */
 	radeon_hpd_init(rdev);
 
diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c
index 63d2de8771dc..1d330606292f 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@@ -50,9 +50,10 @@
  *   2.7.0 - fixups for r600 2D tiling support. (no external ABI change), add eg dyn gpr regs
  *   2.8.0 - pageflip support, r500 US_FORMAT regs. r500 ARGB2101010 colorbuf, r300->r500 CMASK, clock crystal query
  *   2.9.0 - r600 tiling (s3tc,rgtc) working, SET_PREDICATION packet 3 on r600 + eg, backend query
+ *   2.10.0 - fusion 2D tiling
  */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	9
+#define KMS_DRIVER_MINOR	10
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
diff --git a/drivers/gpu/drm/radeon/radeon_encoders.c b/drivers/gpu/drm/radeon/radeon_encoders.c
index b4274883227f..1b557554696e 100644
--- a/drivers/gpu/drm/radeon/radeon_encoders.c
+++ b/drivers/gpu/drm/radeon/radeon_encoders.c
@@ -229,6 +229,22 @@ radeon_get_connector_for_encoder(struct drm_encoder *encoder)
 	return NULL;
 }
 
+static struct drm_connector *
+radeon_get_connector_for_encoder_init(struct drm_encoder *encoder)
+{
+	struct drm_device *dev = encoder->dev;
+	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+	struct drm_connector *connector;
+	struct radeon_connector *radeon_connector;
+
+	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
+		radeon_connector = to_radeon_connector(connector);
+		if (radeon_encoder->devices & radeon_connector->devices)
+			return connector;
+	}
+	return NULL;
+}
+
 struct drm_encoder *radeon_atom_get_external_encoder(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
@@ -250,6 +266,25 @@ struct drm_encoder *radeon_atom_get_external_encoder(struct drm_encoder *encoder
 	return NULL;
 }
 
+bool radeon_encoder_is_dp_bridge(struct drm_encoder *encoder)
+{
+	struct drm_encoder *other_encoder = radeon_atom_get_external_encoder(encoder);
+
+	if (other_encoder) {
+		struct radeon_encoder *radeon_encoder = to_radeon_encoder(other_encoder);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_TRAVIS:
+		case ENCODER_OBJECT_ID_NUTMEG:
+			return true;
+		default:
+			return false;
+		}
+	}
+
+	return false;
+}
+
 void radeon_panel_mode_fixup(struct drm_encoder *encoder,
 			     struct drm_display_mode *adjusted_mode)
 {
@@ -621,6 +656,10 @@ atombios_get_encoder_mode(struct drm_encoder *encoder)
 	struct radeon_connector *radeon_connector;
 	struct radeon_connector_atom_dig *dig_connector;
 
+	/* dp bridges are always DP */
+	if (radeon_encoder_is_dp_bridge(encoder))
+		return ATOM_ENCODER_MODE_DP;
+
 	connector = radeon_get_connector_for_encoder(encoder);
 	if (!connector) {
 		switch (radeon_encoder->encoder_id) {
@@ -668,7 +707,6 @@ atombios_get_encoder_mode(struct drm_encoder *encoder)
 		return ATOM_ENCODER_MODE_LVDS;
 		break;
 	case DRM_MODE_CONNECTOR_DisplayPort:
-	case DRM_MODE_CONNECTOR_eDP:
 		dig_connector = radeon_connector->con_priv;
 		if ((dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_DISPLAYPORT) ||
 		    (dig_connector->dp_sink_type == CONNECTOR_OBJECT_ID_eDP))
@@ -682,6 +720,8 @@ atombios_get_encoder_mode(struct drm_encoder *encoder)
 		} else
 			return ATOM_ENCODER_MODE_DVI;
 		break;
+	case DRM_MODE_CONNECTOR_eDP:
+		return ATOM_ENCODER_MODE_DP;
 	case DRM_MODE_CONNECTOR_DVIA:
 	case DRM_MODE_CONNECTOR_VGA:
 		return ATOM_ENCODER_MODE_CRT;
@@ -747,7 +787,7 @@ union dig_encoder_control {
 };
 
 void
-atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
+atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct radeon_device *rdev = dev->dev_private;
@@ -760,6 +800,7 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 	int dp_clock = 0;
 	int dp_lane_count = 0;
 	int hpd_id = RADEON_HPD_NONE;
+	int bpc = 8;
 
 	if (connector) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -769,6 +810,7 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 		dp_clock = dig_connector->dp_clock;
 		dp_lane_count = dig_connector->dp_lane_count;
 		hpd_id = radeon_connector->hpd.hpd;
+		bpc = connector->display_info.bpc;
 	}
 
 	/* no dig encoder assigned */
@@ -791,7 +833,10 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 
 	args.v1.ucAction = action;
 	args.v1.usPixelClock = cpu_to_le16(radeon_encoder->pixel_clock / 10);
-	args.v1.ucEncoderMode = atombios_get_encoder_mode(encoder);
+	if (action == ATOM_ENCODER_CMD_SETUP_PANEL_MODE)
+		args.v3.ucPanelMode = panel_mode;
+	else
+		args.v1.ucEncoderMode = atombios_get_encoder_mode(encoder);
 
 	if ((args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP) ||
 	    (args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP_MST))
@@ -810,7 +855,27 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 				args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V4_DPLINKRATE_5_40GHZ;
 		}
 		args.v4.acConfig.ucDigSel = dig->dig_encoder;
-		args.v4.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+		switch (bpc) {
+		case 0:
+			args.v4.ucBitPerColor = PANEL_BPC_UNDEFINE;
+			break;
+		case 6:
+			args.v4.ucBitPerColor = PANEL_6BIT_PER_COLOR;
+			break;
+		case 8:
+		default:
+			args.v4.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+			break;
+		case 10:
+			args.v4.ucBitPerColor = PANEL_10BIT_PER_COLOR;
+			break;
+		case 12:
+			args.v4.ucBitPerColor = PANEL_12BIT_PER_COLOR;
+			break;
+		case 16:
+			args.v4.ucBitPerColor = PANEL_16BIT_PER_COLOR;
+			break;
+		}
 		if (hpd_id == RADEON_HPD_NONE)
 			args.v4.ucHPD_ID = 0;
 		else
@@ -819,7 +884,27 @@ atombios_dig_encoder_setup(struct drm_encoder *encoder, int action)
 		if ((args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP) && (dp_clock == 270000))
 			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_V3_DPLINKRATE_2_70GHZ;
 		args.v3.acConfig.ucDigSel = dig->dig_encoder;
-		args.v3.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+		switch (bpc) {
+		case 0:
+			args.v3.ucBitPerColor = PANEL_BPC_UNDEFINE;
+			break;
+		case 6:
+			args.v3.ucBitPerColor = PANEL_6BIT_PER_COLOR;
+			break;
+		case 8:
+		default:
+			args.v3.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+			break;
+		case 10:
+			args.v3.ucBitPerColor = PANEL_10BIT_PER_COLOR;
+			break;
+		case 12:
+			args.v3.ucBitPerColor = PANEL_12BIT_PER_COLOR;
+			break;
+		case 16:
+			args.v3.ucBitPerColor = PANEL_16BIT_PER_COLOR;
+			break;
+		}
 	} else {
 		if ((args.v1.ucEncoderMode == ATOM_ENCODER_MODE_DP) && (dp_clock == 270000))
 			args.v1.ucConfig |= ATOM_ENCODER_CONFIG_DPLINKRATE_2_70GHZ;
@@ -859,7 +944,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 	struct radeon_device *rdev = dev->dev_private;
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
-	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+	struct drm_connector *connector;
 	union dig_transmitter_control args;
 	int index = 0;
 	uint8_t frev, crev;
@@ -870,6 +955,11 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 	int connector_object_id = 0;
 	int igp_lane_info = 0;
 
+	if (action == ATOM_TRANSMITTER_ACTION_INIT)
+		connector = radeon_get_connector_for_encoder_init(encoder);
+	else
+		connector = radeon_get_connector_for_encoder(encoder);
+
 	if (connector) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
 		struct radeon_connector_atom_dig *dig_connector =
@@ -931,10 +1021,10 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 		else
 			args.v3.ucLaneNum = 4;
 
-		if (dig->linkb) {
+		if (dig->linkb)
 			args.v3.acConfig.ucLinkSel = 1;
+		if (dig->dig_encoder & 1)
 			args.v3.acConfig.ucEncoderSel = 1;
-		}
 
 		/* Select the PLL for the PHY
 		 * DP PHY should be clocked from external src if there is
@@ -946,11 +1036,16 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 		}
 
 		if (ASIC_IS_DCE5(rdev)) {
-			if (is_dp && rdev->clock.dp_extclk)
-				args.v4.acConfig.ucRefClkSource = 3; /* external src */
-			else
+			/* On DCE5 DCPLL usually generates the DP ref clock */
+			if (is_dp) {
+				if (rdev->clock.dp_extclk)
+					args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_EXTCLK;
+				else
+					args.v4.acConfig.ucRefClkSource = ENCODER_REFCLK_SRC_DCPLL;
+			} else
 				args.v4.acConfig.ucRefClkSource = pll_id;
 		} else {
+			/* On DCE4, if there is an external clock, it generates the DP ref clock */
 			if (is_dp && rdev->clock.dp_extclk)
 				args.v3.acConfig.ucRefClkSource = 2; /* external src */
 			else
@@ -1047,7 +1142,7 @@ atombios_dig_transmitter_setup(struct drm_encoder *encoder, int action, uint8_t
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
 }
 
-void
+bool
 atombios_set_edp_panel_power(struct drm_connector *connector, int action)
 {
 	struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -1058,23 +1153,37 @@ atombios_set_edp_panel_power(struct drm_connector *connector, int action)
 	uint8_t frev, crev;
 
 	if (connector->connector_type != DRM_MODE_CONNECTOR_eDP)
-		return;
+		goto done;
 
 	if (!ASIC_IS_DCE4(rdev))
-		return;
+		goto done;
 
 	if ((action != ATOM_TRANSMITTER_ACTION_POWER_ON) &&
 	    (action != ATOM_TRANSMITTER_ACTION_POWER_OFF))
-		return;
+		goto done;
 
 	if (!atom_parse_cmd_header(rdev->mode_info.atom_context, index, &frev, &crev))
-		return;
+		goto done;
 
 	memset(&args, 0, sizeof(args));
 
 	args.v1.ucAction = action;
 
 	atom_execute_table(rdev->mode_info.atom_context, index, (uint32_t *)&args);
+
+	/* wait for the panel to power up */
+	if (action == ATOM_TRANSMITTER_ACTION_POWER_ON) {
+		int i;
+
+		for (i = 0; i < 300; i++) {
+			if (radeon_hpd_sense(rdev, radeon_connector->hpd.hpd))
+				return true;
+			mdelay(1);
+		}
+		return false;
+	}
+done:
+	return true;
 }
 
 union external_encoder_control {
@@ -1092,13 +1201,19 @@ atombios_external_encoder_setup(struct drm_encoder *encoder,
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct radeon_encoder *ext_radeon_encoder = to_radeon_encoder(ext_encoder);
 	union external_encoder_control args;
-	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
+	struct drm_connector *connector;
 	int index = GetIndexIntoMasterTable(COMMAND, ExternalEncoderControl);
 	u8 frev, crev;
 	int dp_clock = 0;
 	int dp_lane_count = 0;
 	int connector_object_id = 0;
 	u32 ext_enum = (ext_radeon_encoder->encoder_enum & ENUM_ID_MASK) >> ENUM_ID_SHIFT;
+	int bpc = 8;
+
+	if (action == EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT)
+		connector = radeon_get_connector_for_encoder_init(encoder);
+	else
+		connector = radeon_get_connector_for_encoder(encoder);
 
 	if (connector) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -1109,6 +1224,7 @@ atombios_external_encoder_setup(struct drm_encoder *encoder,
 		dp_lane_count = dig_connector->dp_lane_count;
 		connector_object_id =
 			(radeon_connector->connector_object_id & OBJECT_ID_MASK) >> OBJECT_ID_SHIFT;
+		bpc = connector->display_info.bpc;
 	}
 
 	memset(&args, 0, sizeof(args));
@@ -1166,7 +1282,27 @@ atombios_external_encoder_setup(struct drm_encoder *encoder,
 				args.v3.sExtEncoder.ucConfig |= EXTERNAL_ENCODER_CONFIG_V3_ENCODER3;
 				break;
 			}
-			args.v3.sExtEncoder.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+			switch (bpc) {
+			case 0:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_BPC_UNDEFINE;
+				break;
+			case 6:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_6BIT_PER_COLOR;
+				break;
+			case 8:
+			default:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_8BIT_PER_COLOR;
+				break;
+			case 10:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_10BIT_PER_COLOR;
+				break;
+			case 12:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_12BIT_PER_COLOR;
+				break;
+			case 16:
+				args.v3.sExtEncoder.ucBitPerColor = PANEL_16BIT_PER_COLOR;
+				break;
+			}
 			break;
 		default:
 			DRM_ERROR("Unknown table version: %d, %d\n", frev, crev);
@@ -1307,9 +1443,11 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 								     ATOM_TRANSMITTER_ACTION_POWER_ON);
 					radeon_dig_connector->edp_on = true;
 				}
-				dp_link_train(encoder, connector);
 				if (ASIC_IS_DCE4(rdev))
-					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON);
+					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
+				radeon_dp_link_train(encoder, connector);
+				if (ASIC_IS_DCE4(rdev))
+					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_ON, 0);
 			}
 			if (radeon_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT))
 				atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_LCD_BLON, 0, 0);
@@ -1322,7 +1460,7 @@ radeon_atom_encoder_dpms(struct drm_encoder *encoder, int mode)
 				struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 
 				if (ASIC_IS_DCE4(rdev))
-					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF);
+					atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_DP_VIDEO_OFF, 0);
 				if (connector &&
 				    (connector->connector_type == DRM_MODE_CONNECTOR_eDP)) {
 					struct radeon_connector *radeon_connector = to_radeon_connector(connector);
@@ -1601,12 +1739,9 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder)
 	/* DCE4/5 */
 	if (ASIC_IS_DCE4(rdev)) {
 		dig = radeon_encoder->enc_priv;
-		if (ASIC_IS_DCE41(rdev)) {
-			if (dig->linkb)
-				return 1;
-			else
-				return 0;
-		} else {
+		if (ASIC_IS_DCE41(rdev))
+			return radeon_crtc->crtc_id;
+		else {
 			switch (radeon_encoder->encoder_id) {
 			case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
 				if (dig->linkb)
@@ -1662,6 +1797,34 @@ static int radeon_atom_pick_dig_encoder(struct drm_encoder *encoder)
 	return 1;
 }
 
+/* This only needs to be called once at startup */
+void
+radeon_atom_encoder_init(struct radeon_device *rdev)
+{
+	struct drm_device *dev = rdev->ddev;
+	struct drm_encoder *encoder;
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
+		struct drm_encoder *ext_encoder = radeon_atom_get_external_encoder(encoder);
+
+		switch (radeon_encoder->encoder_id) {
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
+			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			break;
+		default:
+			break;
+		}
+
+		if (ext_encoder && ASIC_IS_DCE41(rdev))
+			atombios_external_encoder_setup(encoder, ext_encoder,
+							EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT);
+	}
+}
+
 static void
 radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 			     struct drm_display_mode *mode,
@@ -1696,19 +1859,17 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 			/* disable the transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
 			/* setup and enable the encoder */
-			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP);
+			atombios_dig_encoder_setup(encoder, ATOM_ENCODER_CMD_SETUP, 0);
 
-			/* init and enable the transmitter */
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			/* enable the transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
 		} else {
 			/* disable the encoder and transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
 
 			/* setup and enable the encoder and transmitter */
-			atombios_dig_encoder_setup(encoder, ATOM_ENABLE);
-			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_INIT, 0, 0);
+			atombios_dig_encoder_setup(encoder, ATOM_ENABLE, 0);
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_SETUP, 0, 0);
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_ENABLE, 0, 0);
 		}
@@ -1733,12 +1894,10 @@ radeon_atom_encoder_mode_set(struct drm_encoder *encoder,
 	}
 
 	if (ext_encoder) {
-		if (ASIC_IS_DCE41(rdev)) {
-			atombios_external_encoder_setup(encoder, ext_encoder,
-							EXTERNAL_ENCODER_ACTION_V3_ENCODER_INIT);
+		if (ASIC_IS_DCE41(rdev))
 			atombios_external_encoder_setup(encoder, ext_encoder,
 							EXTERNAL_ENCODER_ACTION_V3_ENCODER_SETUP);
-		} else
+		else
 			atombios_external_encoder_setup(encoder, ext_encoder, ATOM_ENABLE);
 	}
 
@@ -1845,8 +2004,9 @@ static void radeon_atom_encoder_prepare(struct drm_encoder *encoder)
 	struct radeon_encoder *radeon_encoder = to_radeon_encoder(encoder);
 	struct drm_connector *connector = radeon_get_connector_for_encoder(encoder);
 
-	if (radeon_encoder->active_device &
-	    (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) {
+	if ((radeon_encoder->active_device &
+	     (ATOM_DEVICE_DFP_SUPPORT | ATOM_DEVICE_LCD_SUPPORT)) ||
+	    radeon_encoder_is_dp_bridge(encoder)) {
 		struct radeon_encoder_atom_dig *dig = radeon_encoder->enc_priv;
 		if (dig)
 			dig->dig_encoder = radeon_atom_pick_dig_encoder(encoder);
@@ -1855,11 +2015,17 @@ static void radeon_atom_encoder_prepare(struct drm_encoder *encoder)
 	radeon_atom_output_lock(encoder, true);
 	radeon_atom_encoder_dpms(encoder, DRM_MODE_DPMS_OFF);
 
-	/* select the clock/data port if it uses a router */
 	if (connector) {
 		struct radeon_connector *radeon_connector = to_radeon_connector(connector);
+
+		/* select the clock/data port if it uses a router */
 		if (radeon_connector->router.cd_valid)
 			radeon_router_select_cd_port(radeon_connector);
+
+		/* turn eDP panel on for mode set */
+		if (connector->connector_type == DRM_MODE_CONNECTOR_eDP)
+			atombios_set_edp_panel_power(connector,
+						     ATOM_TRANSMITTER_ACTION_POWER_ON);
 	}
 
 	/* this is needed for the pll/ss setup to work correctly in some cases */
@@ -1914,7 +2080,7 @@ static void radeon_atom_encoder_disable(struct drm_encoder *encoder)
 		else {
 			/* disable the encoder and transmitter */
 			atombios_dig_transmitter_setup(encoder, ATOM_TRANSMITTER_ACTION_DISABLE, 0, 0);
-			atombios_dig_encoder_setup(encoder, ATOM_DISABLE);
+			atombios_dig_encoder_setup(encoder, ATOM_DISABLE, 0);
 		}
 		break;
 	case ENCODER_OBJECT_ID_INTERNAL_DDI:
@@ -2116,8 +2282,6 @@ radeon_add_atom_encoder(struct drm_device *dev,
 		} else {
 			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
-			if (ASIC_IS_AVIVO(rdev))
-				radeon_encoder->underscan_type = UNDERSCAN_AUTO;
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
 		break;
@@ -2150,8 +2314,6 @@ radeon_add_atom_encoder(struct drm_device *dev,
 		} else {
 			drm_encoder_init(dev, encoder, &radeon_atom_enc_funcs, DRM_MODE_ENCODER_TMDS);
 			radeon_encoder->enc_priv = radeon_atombios_set_dig_info(radeon_encoder);
-			if (ASIC_IS_AVIVO(rdev))
-				radeon_encoder->underscan_type = UNDERSCAN_AUTO;
 		}
 		drm_encoder_helper_add(encoder, &radeon_atom_dig_helper_funcs);
 		break;
diff --git a/drivers/gpu/drm/radeon/radeon_i2c.c b/drivers/gpu/drm/radeon/radeon_i2c.c
index 983cbac75af0..781196db792f 100644
--- a/drivers/gpu/drm/radeon/radeon_i2c.c
+++ b/drivers/gpu/drm/radeon/radeon_i2c.c
@@ -888,6 +888,7 @@ struct radeon_i2c_chan *radeon_i2c_create(struct drm_device *dev,
 
 	i2c->rec = *rec;
 	i2c->adapter.owner = THIS_MODULE;
+	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->dev = dev;
 	i2c_set_adapdata(&i2c->adapter, i2c);
 	if (rec->mm_i2c ||
@@ -947,6 +948,7 @@ struct radeon_i2c_chan *radeon_i2c_create_dp(struct drm_device *dev,
 
 	i2c->rec = *rec;
 	i2c->adapter.owner = THIS_MODULE;
+	i2c->adapter.class = I2C_CLASS_DDC;
 	i2c->dev = dev;
 	snprintf(i2c->adapter.name, sizeof(i2c->adapter.name),
 		 "Radeon aux bus %s", name);
diff --git a/drivers/gpu/drm/radeon/radeon_mode.h b/drivers/gpu/drm/radeon/radeon_mode.h
index 9c57538231d5..977a341266b6 100644
--- a/drivers/gpu/drm/radeon/radeon_mode.h
+++ b/drivers/gpu/drm/radeon/radeon_mode.h
@@ -464,22 +464,27 @@ radeon_atombios_get_tv_info(struct radeon_device *rdev);
 extern struct drm_connector *
 radeon_get_connector_for_encoder(struct drm_encoder *encoder);
 
+extern bool radeon_encoder_is_dp_bridge(struct drm_encoder *encoder);
+extern bool radeon_connector_encoder_is_dp_bridge(struct drm_connector *connector);
+extern bool radeon_connector_encoder_is_hbr2(struct drm_connector *connector);
+extern bool radeon_connector_is_dp12_capable(struct drm_connector *connector);
+
 extern void radeon_connector_hotplug(struct drm_connector *connector);
-extern bool radeon_dp_needs_link_train(struct radeon_connector *radeon_connector);
-extern int radeon_dp_mode_valid_helper(struct radeon_connector *radeon_connector,
+extern int radeon_dp_mode_valid_helper(struct drm_connector *connector,
 				       struct drm_display_mode *mode);
 extern void radeon_dp_set_link_config(struct drm_connector *connector,
 				      struct drm_display_mode *mode);
-extern void dp_link_train(struct drm_encoder *encoder,
-			  struct drm_connector *connector);
+extern void radeon_dp_link_train(struct drm_encoder *encoder,
+				 struct drm_connector *connector);
 extern u8 radeon_dp_getsinktype(struct radeon_connector *radeon_connector);
 extern bool radeon_dp_getdpcd(struct radeon_connector *radeon_connector);
-extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action);
+extern void atombios_dig_encoder_setup(struct drm_encoder *encoder, int action, int panel_mode);
+extern void radeon_atom_encoder_init(struct radeon_device *rdev);
 extern void atombios_dig_transmitter_setup(struct drm_encoder *encoder,
 					   int action, uint8_t lane_num,
 					   uint8_t lane_set);
 extern int radeon_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
-				uint8_t write_byte, uint8_t *read_byte);
+				u8 write_byte, u8 *read_byte);
 
 extern void radeon_i2c_init(struct radeon_device *rdev);
 extern void radeon_i2c_fini(struct radeon_device *rdev);
@@ -545,7 +550,7 @@ struct drm_encoder *radeon_encoder_legacy_tmds_ext_add(struct drm_device *dev, i
 extern void atombios_dvo_setup(struct drm_encoder *encoder, int action);
 extern void atombios_digital_setup(struct drm_encoder *encoder, int action);
 extern int atombios_get_encoder_mode(struct drm_encoder *encoder);
-extern void atombios_set_edp_panel_power(struct drm_connector *connector, int action);
+extern bool atombios_set_edp_panel_power(struct drm_connector *connector, int action);
 extern void radeon_encoder_set_active_device(struct drm_encoder *encoder);
 
 extern void radeon_crtc_load_lut(struct drm_crtc *crtc);
diff --git a/drivers/gpu/vga/vga_switcheroo.c b/drivers/gpu/vga/vga_switcheroo.c
index 498b284e5ef9..58434e804d91 100644
--- a/drivers/gpu/vga/vga_switcheroo.c
+++ b/drivers/gpu/vga/vga_switcheroo.c
@@ -215,7 +215,6 @@ static int vga_switchoff(struct vga_switcheroo_client *client)
 /* stage one happens before delay */
 static int vga_switchto_stage1(struct vga_switcheroo_client *new_client)
 {
-	int ret;
 	int i;
 	struct vga_switcheroo_client *active = NULL;
 
@@ -228,11 +227,6 @@ static int vga_switchto_stage1(struct vga_switcheroo_client *new_client)
 	if (!active)
 		return 0;
 
-	/* power up the first device */
-	ret = pci_enable_device(new_client->pdev);
-	if (ret)
-		return ret;
-
 	if (new_client->pwr_state == VGA_SWITCHEROO_OFF)
 		vga_switchon(new_client);
 
diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c
index be8d4cb5861c..8a1021f2e319 100644
--- a/drivers/gpu/vga/vgaarb.c
+++ b/drivers/gpu/vga/vgaarb.c
@@ -61,7 +61,7 @@ struct vga_device {
 	unsigned int mem_lock_cnt;	/* legacy MEM lock count */
 	unsigned int io_norm_cnt;	/* normal IO count */
 	unsigned int mem_norm_cnt;	/* normal MEM count */
-
+	bool bridge_has_one_vga;
 	/* allow IRQ enable/disable hook */
 	void *cookie;
 	void (*irq_set_state)(void *cookie, bool enable);
@@ -165,6 +165,8 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev,
 	unsigned int wants, legacy_wants, match;
 	struct vga_device *conflict;
 	unsigned int pci_bits;
+	u32 flags = 0;
+
 	/* Account for "normal" resources to lock. If we decode the legacy,
 	 * counterpart, we need to request it as well
 	 */
@@ -237,16 +239,23 @@ static struct vga_device *__vga_tryget(struct vga_device *vgadev,
 		/* looks like he doesn't have a lock, we can steal
 		 * them from him
 		 */
-		vga_irq_set_state(conflict, false);
 
+		flags = 0;
 		pci_bits = 0;
-		if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
-			pci_bits |= PCI_COMMAND_MEMORY;
-		if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
-			pci_bits |= PCI_COMMAND_IO;
 
-		pci_set_vga_state(conflict->pdev, false, pci_bits,
-				  change_bridge);
+		if (!conflict->bridge_has_one_vga) {
+			vga_irq_set_state(conflict, false);
+			flags |= PCI_VGA_STATE_CHANGE_DECODES;
+			if (lwants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+				pci_bits |= PCI_COMMAND_MEMORY;
+			if (lwants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+				pci_bits |= PCI_COMMAND_IO;
+		}
+
+		if (change_bridge)
+			flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+		pci_set_vga_state(conflict->pdev, false, pci_bits, flags);
 		conflict->owns &= ~lwants;
 		/* If he also owned non-legacy, that is no longer the case */
 		if (lwants & VGA_RSRC_LEGACY_MEM)
@@ -261,14 +270,24 @@ enable_them:
 	 * also have in "decodes". We can lock resources we don't decode but
 	 * not own them.
 	 */
+	flags = 0;
 	pci_bits = 0;
-	if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
-		pci_bits |= PCI_COMMAND_MEMORY;
-	if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
-		pci_bits |= PCI_COMMAND_IO;
-	pci_set_vga_state(vgadev->pdev, true, pci_bits, !!(wants & VGA_RSRC_LEGACY_MASK));
 
-	vga_irq_set_state(vgadev, true);
+	if (!vgadev->bridge_has_one_vga) {
+		flags |= PCI_VGA_STATE_CHANGE_DECODES;
+		if (wants & (VGA_RSRC_LEGACY_MEM|VGA_RSRC_NORMAL_MEM))
+			pci_bits |= PCI_COMMAND_MEMORY;
+		if (wants & (VGA_RSRC_LEGACY_IO|VGA_RSRC_NORMAL_IO))
+			pci_bits |= PCI_COMMAND_IO;
+	}
+	if (!!(wants & VGA_RSRC_LEGACY_MASK))
+		flags |= PCI_VGA_STATE_CHANGE_BRIDGE;
+
+	pci_set_vga_state(vgadev->pdev, true, pci_bits, flags);
+
+	if (!vgadev->bridge_has_one_vga) {
+		vga_irq_set_state(vgadev, true);
+	}
 	vgadev->owns |= (wants & vgadev->decodes);
 lock_them:
 	vgadev->locks |= (rsrc & VGA_RSRC_LEGACY_MASK);
@@ -421,6 +440,62 @@ bail:
 }
 EXPORT_SYMBOL(vga_put);
 
+/* Rules for using a bridge to control a VGA descendant decoding:
+   if a bridge has only one VGA descendant then it can be used
+   to control the VGA routing for that device.
+   It should always use the bridge closest to the device to control it.
+   If a bridge has a direct VGA descendant, but also have a sub-bridge
+   VGA descendant then we cannot use that bridge to control the direct VGA descendant.
+   So for every device we register, we need to iterate all its parent bridges
+   so we can invalidate any devices using them properly.
+*/
+static void vga_arbiter_check_bridge_sharing(struct vga_device *vgadev)
+{
+	struct vga_device *same_bridge_vgadev;
+	struct pci_bus *new_bus, *bus;
+	struct pci_dev *new_bridge, *bridge;
+
+	vgadev->bridge_has_one_vga = true;
+
+	if (list_empty(&vga_list))
+		return;
+
+	/* okay iterate the new devices bridge hierarachy */
+	new_bus = vgadev->pdev->bus;
+	while (new_bus) {
+		new_bridge = new_bus->self;
+
+		if (new_bridge) {
+			/* go through list of devices already registered */
+			list_for_each_entry(same_bridge_vgadev, &vga_list, list) {
+				bus = same_bridge_vgadev->pdev->bus;
+				bridge = bus->self;
+
+				/* see if the share a bridge with this device */
+				if (new_bridge == bridge) {
+					/* if their direct parent bridge is the same
+					   as any bridge of this device then it can't be used
+					   for that device */
+					same_bridge_vgadev->bridge_has_one_vga = false;
+				}
+
+				/* now iterate the previous devices bridge hierarchy */
+				/* if the new devices parent bridge is in the other devices
+				   hierarchy then we can't use it to control this device */
+				while (bus) {
+					bridge = bus->self;
+					if (bridge) {
+						if (bridge == vgadev->pdev->bus->self)
+							vgadev->bridge_has_one_vga = false;
+					}
+					bus = bus->parent;
+				}
+			}
+		}
+		new_bus = new_bus->parent;
+	}
+}
+
 /*
  * Currently, we assume that the "initial" setup of the system is
  * not sane, that is we come up with conflicting devices and let
@@ -500,6 +575,8 @@ static bool vga_arbiter_add_pci_device(struct pci_dev *pdev)
 		vga_default = pci_dev_get(pdev);
 #endif
 
+	vga_arbiter_check_bridge_sharing(vgadev);
+
 	/* Add to the list */
 	list_add(&vgadev->list, &vga_list);
 	vga_count++;
@@ -1222,6 +1299,7 @@ static int __init vga_arb_device_init(void)
 {
 	int rc;
 	struct pci_dev *pdev;
+	struct vga_device *vgadev;
 
 	rc = misc_register(&vga_arb_device);
 	if (rc < 0)
@@ -1238,6 +1316,13 @@ static int __init vga_arb_device_init(void)
 		vga_arbiter_add_pci_device(pdev);
 
 	pr_info("vgaarb: loaded\n");
+
+	list_for_each_entry(vgadev, &vga_list, list) {
+		if (vgadev->bridge_has_one_vga)
+			pr_info("vgaarb: bridge control possible %s\n", pci_name(vgadev->pdev));
+		else
+			pr_info("vgaarb: no bridge control possible %s\n", pci_name(vgadev->pdev));
+	}
 	return rc;
 }
 subsys_initcall(vga_arb_device_init);