37 files changed, 1096 insertions, 647 deletions
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index b1bc1ea182b8..1175429da102 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -30,12 +30,11 @@ config DRM_NOUVEAU_DEBUG
 	  via debugfs.
 
 menu "I2C encoder or helper chips"
-     depends on DRM && I2C
+     depends on DRM && DRM_KMS_HELPER && I2C
 
 config DRM_I2C_CH7006
 	tristate "Chrontel ch7006 TV encoder"
-	depends on DRM_NOUVEAU
-	default m
+	default m if DRM_NOUVEAU
 	help
 	  Support for Chrontel ch7006 and similar TV encoders, found
 	  on some nVidia video cards.
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index ba143972769f..d7f8d8b4a4b8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -310,63 +310,22 @@ valid_reg(struct nvbios *bios, uint32_t reg)
 	struct drm_device *dev = bios->dev;
 
 	/* C51 has misaligned regs on purpose. Marvellous */
-	if (reg & 0x2 || (reg & 0x1 && dev_priv->VBIOS.pub.chip_version != 0x51)) {
-		NV_ERROR(dev, "========== misaligned reg 0x%08X ==========\n",
-			 reg);
-		return 0;
-	}
-	/*
-	 * Warn on C51 regs that have not been verified accessible in
-	 * mmiotracing
-	 */
+	if (reg & 0x2 ||
+	    (reg & 0x1 && dev_priv->VBIOS.pub.chip_version != 0x51))
+		NV_ERROR(dev, "======= misaligned reg 0x%08X =======\n", reg);
+
+	/* warn on C51 regs that haven't been verified accessible in tracing */
 	if (reg & 0x1 && dev_priv->VBIOS.pub.chip_version == 0x51 &&
 	    reg != 0x130d && reg != 0x1311 && reg != 0x60081d)
 		NV_WARN(dev, "=== C51 misaligned reg 0x%08X not verified ===\n",
 			reg);
 
-	/* Trust the init scripts on G80 */
-	if (dev_priv->card_type >= NV_50)
-		return 1;
-
-	#define WITHIN(x, y, z) ((x >= y) && (x < y + z))
-	if (WITHIN(reg, NV_PMC_OFFSET, NV_PMC_SIZE))
-		return 1;
-	if (WITHIN(reg, NV_PBUS_OFFSET, NV_PBUS_SIZE))
-		return 1;
-	if (WITHIN(reg, NV_PFIFO_OFFSET, NV_PFIFO_SIZE))
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version >= 0x30 &&
-	    (WITHIN(reg, 0x4000, 0x600) || reg == 0x00004600))
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version >= 0x40 &&
-						WITHIN(reg, 0xc000, 0x48))
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version >= 0x17 && reg == 0x0000d204)
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version >= 0x40) {
-		if (reg == 0x00011014 || reg == 0x00020328)
-			return 1;
-		if (WITHIN(reg, 0x88000, NV_PBUS_SIZE)) /* new PBUS */
-			return 1;
+	if (reg >= (8*1024*1024)) {
+		NV_ERROR(dev, "=== reg 0x%08x out of mapped bounds ===\n", reg);
+		return 0;
 	}
-	if (WITHIN(reg, NV_PFB_OFFSET, NV_PFB_SIZE))
-		return 1;
-	if (WITHIN(reg, NV_PEXTDEV_OFFSET, NV_PEXTDEV_SIZE))
-		return 1;
-	if (WITHIN(reg, NV_PCRTC0_OFFSET, NV_PCRTC0_SIZE * 2))
-		return 1;
-	if (WITHIN(reg, NV_PRAMDAC0_OFFSET, NV_PRAMDAC0_SIZE * 2))
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version >= 0x17 && reg == 0x0070fff0)
-		return 1;
-	if (dev_priv->VBIOS.pub.chip_version == 0x51 &&
-				WITHIN(reg, NV_PRAMIN_OFFSET, NV_PRAMIN_SIZE))
-		return 1;
-	#undef WITHIN
 
-	NV_ERROR(dev, "========== unknown reg 0x%08X ==========\n", reg);
-
-	return 0;
+	return 1;
 }
 
 static bool
@@ -3196,16 +3155,25 @@ static int call_lvds_manufacturer_script(struct drm_device *dev, struct dcb_entr
 	}
 #ifdef __powerpc__
 	/* Powerbook specific quirks */
-	if (script == LVDS_RESET && ((dev->pci_device & 0xffff) == 0x0179 || (dev->pci_device & 0xffff) == 0x0329))
-		nv_write_tmds(dev, dcbent->or, 0, 0x02, 0x72);
-	if ((dev->pci_device & 0xffff) == 0x0179 || (dev->pci_device & 0xffff) == 0x0189 || (dev->pci_device & 0xffff) == 0x0329) {
-		if (script == LVDS_PANEL_ON) {
-			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL, bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL) | (1 << 31));
-			bios_wr32(bios, NV_PCRTC_GPIO_EXT, bios_rd32(bios, NV_PCRTC_GPIO_EXT) | 1);
-		}
-		if (script == LVDS_PANEL_OFF) {
-			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL, bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL) & ~(1 << 31));
-			bios_wr32(bios, NV_PCRTC_GPIO_EXT, bios_rd32(bios, NV_PCRTC_GPIO_EXT) & ~3);
+	if ((dev->pci_device & 0xffff) == 0x0179 ||
+	    (dev->pci_device & 0xffff) == 0x0189 ||
+	    (dev->pci_device & 0xffff) == 0x0329) {
+		if (script == LVDS_RESET) {
+			nv_write_tmds(dev, dcbent->or, 0, 0x02, 0x72);
+
+		} else if (script == LVDS_PANEL_ON) {
+			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL,
+				  bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL)
+				  | (1 << 31));
+			bios_wr32(bios, NV_PCRTC_GPIO_EXT,
+				  bios_rd32(bios, NV_PCRTC_GPIO_EXT) | 1);
+
+		} else if (script == LVDS_PANEL_OFF) {
+			bios_wr32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL,
+				  bios_rd32(bios, NV_PBUS_DEBUG_DUALHEAD_CTL)
+				  & ~(1 << 31));
+			bios_wr32(bios, NV_PCRTC_GPIO_EXT,
+				  bios_rd32(bios, NV_PCRTC_GPIO_EXT) & ~3);
 		}
 	}
 #endif
@@ -5434,52 +5402,49 @@ static bool
 parse_dcb15_entry(struct drm_device *dev, struct parsed_dcb *dcb,
 		  uint32_t conn, uint32_t conf, struct dcb_entry *entry)
 {
-	if (conn != 0xf0003f00 && conn != 0xf2247f10 && conn != 0xf2204001 &&
-	    conn != 0xf2204301 && conn != 0xf2204311 && conn != 0xf2208001 &&
-	    conn != 0xf2244001 && conn != 0xf2244301 && conn != 0xf2244311 &&
-	    conn != 0xf4204011 && conn != 0xf4208011 && conn != 0xf4248011 &&
-	    conn != 0xf2045ff2 && conn != 0xf2045f14 && conn != 0xf207df14 &&
-	    conn != 0xf2205004 && conn != 0xf2209004) {
-		NV_ERROR(dev, "Unknown DCB 1.5 entry, please report\n");
-
-		/* cause output setting to fail for !TV, so message is seen */
-		if ((conn & 0xf) != 0x1)
-			dcb->entries = 0;
-
-		return false;
-	}
-	/* most of the below is a "best guess" atm */
-	entry->type = conn & 0xf;
-	if (entry->type == 2)
-		/* another way of specifying straps based lvds... */
+	switch (conn & 0x0000000f) {
+	case 0:
+		entry->type = OUTPUT_ANALOG;
+		break;
+	case 1:
+		entry->type = OUTPUT_TV;
+		break;
+	case 2:
+	case 3:
 		entry->type = OUTPUT_LVDS;
-	if (entry->type == 4) { /* digital */
-		if (conn & 0x10)
-			entry->type = OUTPUT_LVDS;
-		else
+		break;
+	case 4:
+		switch ((conn & 0x000000f0) >> 4) {
+		case 0:
 			entry->type = OUTPUT_TMDS;
+			break;
+		case 1:
+			entry->type = OUTPUT_LVDS;
+			break;
+		default:
+			NV_ERROR(dev, "Unknown DCB subtype 4/%d\n",
+				 (conn & 0x000000f0) >> 4);
+			return false;
+		}
+		break;
+	default:
+		NV_ERROR(dev, "Unknown DCB type %d\n", conn & 0x0000000f);
+		return false;
 	}
-	/* what's in bits 5-13? could be some encoder maker thing, in tv case */
-	entry->i2c_index = (conn >> 14) & 0xf;
-	/* raw heads field is in range 0-1, so move to 1-2 */
-	entry->heads = ((conn >> 18) & 0x7) + 1;
-	entry->location = (conn >> 21) & 0xf;
-	/* unused: entry->bus = (conn >> 25) & 0x7; */
-	/* set or to be same as heads -- hopefully safe enough */
-	entry->or = entry->heads;
+
+	entry->i2c_index = (conn & 0x0003c000) >> 14;
+	entry->heads = ((conn & 0x001c0000) >> 18) + 1;
+	entry->or = entry->heads; /* same as heads, hopefully safe enough */
+	entry->location = (conn & 0x01e00000) >> 21;
+	entry->bus = (conn & 0x0e000000) >> 25;
 	entry->duallink_possible = false;
 
 	switch (entry->type) {
 	case OUTPUT_ANALOG:
 		entry->crtconf.maxfreq = (conf & 0xffff) * 10;
 		break;
-	case OUTPUT_LVDS:
-		/*
-		 * This is probably buried in conn's unknown bits.
-		 * This will upset EDID-ful models, if they exist
-		 */
-		entry->lvdsconf.use_straps_for_mode = true;
-		entry->lvdsconf.use_power_scripts = true;
+	case OUTPUT_TV:
+		entry->tvconf.has_component_output = false;
 		break;
 	case OUTPUT_TMDS:
 		/*
@@ -5488,8 +5453,12 @@ parse_dcb15_entry(struct drm_device *dev, struct parsed_dcb *dcb,
 		 */
 		fabricate_vga_output(dcb, entry->i2c_index, entry->heads);
 		break;
-	case OUTPUT_TV:
-		entry->tvconf.has_component_output = false;
+	case OUTPUT_LVDS:
+		if ((conn & 0x00003f00) != 0x10)
+			entry->lvdsconf.use_straps_for_mode = true;
+		entry->lvdsconf.use_power_scripts = true;
+		break;
+	default:
 		break;
 	}
 
@@ -5564,11 +5533,13 @@ void merge_like_dcb_entries(struct drm_device *dev, struct parsed_dcb *dcb)
 	dcb->entries = newentries;
 }
 
-static int parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
+static int
+parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool twoHeads)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct bios_parsed_dcb *bdcb = &bios->bdcb;
 	struct parsed_dcb *dcb;
-	uint16_t dcbptr, i2ctabptr = 0;
+	uint16_t dcbptr = 0, i2ctabptr = 0;
 	uint8_t *dcbtable;
 	uint8_t headerlen = 0x4, entries = DCB_MAX_NUM_ENTRIES;
 	bool configblock = true;
@@ -5579,16 +5550,18 @@ static int parse_dcb_table(struct drm_device *dev, struct nvbios *bios, bool two
 	dcb->entries = 0;
 
 	/* get the offset from 0x36 */
-	dcbptr = ROM16(bios->data[0x36]);
+	if (dev_priv->card_type > NV_04) {
+		dcbptr = ROM16(bios->data[0x36]);
+		if (dcbptr == 0x0000)
+			NV_WARN(dev, "No output data (DCB) found in BIOS\n");
+	}
 
+	/* this situation likely means a really old card, pre DCB */
 	if (dcbptr == 0x0) {
-		NV_WARN(dev, "No output data (DCB) found in BIOS, "
-			       "assuming a CRT output exists\n");
-		/* this situation likely means a really old card, pre DCB */
+		NV_INFO(dev, "Assuming a CRT output exists\n");
 		fabricate_vga_output(dcb, LEGACY_I2C_CRT, 1);
 
-		if (nv04_tv_identify(dev,
-				     bios->legacy.i2c_indices.tv) >= 0)
+		if (nv04_tv_identify(dev, bios->legacy.i2c_indices.tv) >= 0)
 			fabricate_tv_output(dcb, twoHeads);
 
 		return 0;
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 0cad6d834eb2..db0ed4c13f98 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -33,10 +33,13 @@
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
 
+#include <linux/log2.h>
+
 static void
 nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct drm_device *dev = dev_priv->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 
 	ttm_bo_kunmap(&nvbo->kmap);
@@ -44,12 +47,87 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 	if (unlikely(nvbo->gem))
 		DRM_ERROR("bo %p still attached to GEM object\n", bo);
 
+	if (nvbo->tile)
+		nv10_mem_expire_tiling(dev, nvbo->tile, NULL);
+
 	spin_lock(&dev_priv->ttm.bo_list_lock);
 	list_del(&nvbo->head);
 	spin_unlock(&dev_priv->ttm.bo_list_lock);
 	kfree(nvbo);
 }
 
+static void
+nouveau_bo_fixup_align(struct drm_device *dev,
+		       uint32_t tile_mode, uint32_t tile_flags,
+		       int *align, int *size)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
+	/*
+	 * Some of the tile_flags have a periodic structure of N*4096 bytes,
+	 * align to to that as well as the page size. Overallocate memory to
+	 * avoid corruption of other buffer objects.
+	 */
+	if (dev_priv->card_type == NV_50) {
+		uint32_t block_size = nouveau_mem_fb_amount(dev) >> 15;
+		int i;
+
+		switch (tile_flags) {
+		case 0x1800:
+		case 0x2800:
+		case 0x4800:
+		case 0x7a00:
+			*size = roundup(*size, block_size);
+			if (is_power_of_2(block_size)) {
+				*size += 3 * block_size;
+				for (i = 1; i < 10; i++) {
+					*align = 12 * i * block_size;
+					if (!(*align % 65536))
+						break;
+				}
+			} else {
+				*size += 6 * block_size;
+				for (i = 1; i < 10; i++) {
+					*align = 8 * i * block_size;
+					if (!(*align % 65536))
+						break;
+				}
+			}
+			break;
+		default:
+			break;
+		}
+
+	} else {
+		if (tile_mode) {
+			if (dev_priv->chipset >= 0x40) {
+				*align = 65536;
+				*size = roundup(*size, 64 * tile_mode);
+
+			} else if (dev_priv->chipset >= 0x30) {
+				*align = 32768;
+				*size = roundup(*size, 64 * tile_mode);
+
+			} else if (dev_priv->chipset >= 0x20) {
+				*align = 16384;
+				*size = roundup(*size, 64 * tile_mode);
+
+			} else if (dev_priv->chipset >= 0x10) {
+				*align = 16384;
+				*size = roundup(*size, 32 * tile_mode);
+			}
+		}
+	}
+
+	/* ALIGN works only on powers of two. */
+	*size = roundup(*size, PAGE_SIZE);
+
+	if (dev_priv->card_type == NV_50) {
+		*size = roundup(*size, 65536);
+		*align = max(65536, *align);
+	}
+}
+
 int
 nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 	       int size, int align, uint32_t flags, uint32_t tile_mode,
@@ -58,7 +136,7 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_bo *nvbo;
-	int ret, n = 0;
+	int ret = 0;
 
 	nvbo = kzalloc(sizeof(struct nouveau_bo), GFP_KERNEL);
 	if (!nvbo)
@@ -70,59 +148,14 @@ nouveau_bo_new(struct drm_device *dev, struct nouveau_channel *chan,
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
 
-	/*
-	 * Some of the tile_flags have a periodic structure of N*4096 bytes,
-	 * align to to that as well as the page size. Overallocate memory to
-	 * avoid corruption of other buffer objects.
-	 */
-	switch (tile_flags) {
-	case 0x1800:
-	case 0x2800:
-	case 0x4800:
-	case 0x7a00:
-		if (dev_priv->chipset >= 0xA0) {
-			/* This is based on high end cards with 448 bits
-			 * memory bus, could be different elsewhere.*/
-			size += 6 * 28672;
-			/* 8 * 28672 is the actual alignment requirement,
-			 * but we must also align to page size. */
-			align = 2 * 8 * 28672;
-		} else if (dev_priv->chipset >= 0x90) {
-			size += 3 * 16384;
-			align = 12 * 16834;
-		} else {
-			size += 3 * 8192;
-			/* 12 * 8192 is the actual alignment requirement,
-			 * but we must also align to page size. */
-			align = 2 * 12 * 8192;
-		}
-		break;
-	default:
-		break;
-	}
-
+	nouveau_bo_fixup_align(dev, tile_mode, tile_flags, &align, &size);
 	align >>= PAGE_SHIFT;
 
-	size = (size + (PAGE_SIZE - 1)) & ~(PAGE_SIZE - 1);
-	if (dev_priv->card_type == NV_50) {
-		size = (size + 65535) & ~65535;
-		if (align < (65536 / PAGE_SIZE))
-			align = (65536 / PAGE_SIZE);
-	}
-
-	if (flags & TTM_PL_FLAG_VRAM)
-		nvbo->placements[n++] = TTM_PL_FLAG_VRAM | TTM_PL_MASK_CACHING;
-	if (flags & TTM_PL_FLAG_TT)
-		nvbo->placements[n++] = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING;
 	nvbo->placement.fpfn = 0;
 	nvbo->placement.lpfn = mappable ? dev_priv->fb_mappable_pages : 0;
-	nvbo->placement.placement = nvbo->placements;
-	nvbo->placement.busy_placement = nvbo->placements;
-	nvbo->placement.num_placement = n;
-	nvbo->placement.num_busy_placement = n;
+	nouveau_bo_placement_set(nvbo, flags);
 
 	nvbo->channel = chan;
-	nouveau_bo_placement_set(nvbo, flags);
 	ret = ttm_bo_init(&dev_priv->ttm.bdev, &nvbo->bo, size,
 			  ttm_bo_type_device, &nvbo->placement, align, 0,
 			  false, NULL, size, nouveau_bo_del_ttm);
@@ -421,6 +454,7 @@ nouveau_bo_evict_flags(struct ttm_buffer_object *bo, struct ttm_placement *pl)
 /* GPU-assisted copy using NV_MEMORY_TO_MEMORY_FORMAT, can access
  * TTM_PL_{VRAM,TT} directly.
  */
+
 static int
 nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 			      struct nouveau_bo *nvbo, bool evict, bool no_wait,
@@ -435,6 +469,8 @@ nouveau_bo_move_accel_cleanup(struct nouveau_channel *chan,
 
 	ret = ttm_bo_move_accel_cleanup(&nvbo->bo, fence, NULL,
 					evict, no_wait, new_mem);
+	if (nvbo->channel && nvbo->channel != chan)
+		ret = nouveau_fence_wait(fence, NULL, false, false);
 	nouveau_fence_unref((void *)&fence);
 	return ret;
 }
@@ -455,11 +491,12 @@ nouveau_bo_mem_ctxdma(struct nouveau_bo *nvbo, struct nouveau_channel *chan,
 }
 
 static int
-nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, int no_wait,
-		     struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
+		     int no_wait, struct ttm_mem_reg *new_mem)
 {
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct ttm_mem_reg *old_mem = &bo->mem;
 	struct nouveau_channel *chan;
 	uint64_t src_offset, dst_offset;
 	uint32_t page_count;
@@ -547,7 +584,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	placement.fpfn = placement.lpfn = 0;
 	placement.num_placement = placement.num_busy_placement = 1;
-	placement.placement = &placement_memtype;
+	placement.placement = placement.busy_placement = &placement_memtype;
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
@@ -559,7 +596,7 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait, &tmp_mem);
 	if (ret)
 		goto out;
 
@@ -585,7 +622,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 
 	placement.fpfn = placement.lpfn = 0;
 	placement.num_placement = placement.num_busy_placement = 1;
-	placement.placement = &placement_memtype;
+	placement.placement = placement.busy_placement = &placement_memtype;
 
 	tmp_mem = *new_mem;
 	tmp_mem.mm_node = NULL;
@@ -597,7 +634,7 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, no_wait, &bo->mem, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem);
 	if (ret)
 		goto out;
 
@@ -612,52 +649,106 @@ out:
 }
 
 static int
-nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait, struct ttm_mem_reg *new_mem)
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+		   struct nouveau_tile_reg **new_tile)
 {
 	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
-	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct drm_device *dev = dev_priv->dev;
-	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	uint64_t offset;
 	int ret;
 
-	if (dev_priv->card_type == NV_50 && new_mem->mem_type == TTM_PL_VRAM &&
-	    !nvbo->no_vm) {
-		uint64_t offset = new_mem->mm_node->start << PAGE_SHIFT;
+	if (nvbo->no_vm || new_mem->mem_type != TTM_PL_VRAM) {
+		/* Nothing to do. */
+		*new_tile = NULL;
+		return 0;
+	}
+
+	offset = new_mem->mm_node->start << PAGE_SHIFT;
 
+	if (dev_priv->card_type == NV_50) {
 		ret = nv50_mem_vm_bind_linear(dev,
 					      offset + dev_priv->vm_vram_base,
 					      new_mem->size, nvbo->tile_flags,
 					      offset);
 		if (ret)
 			return ret;
+
+	} else if (dev_priv->card_type >= NV_10) {
+		*new_tile = nv10_mem_set_tiling(dev, offset, new_mem->size,
+						nvbo->tile_mode);
 	}
 
+	return 0;
+}
+
+static void
+nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
+		      struct nouveau_tile_reg *new_tile,
+		      struct nouveau_tile_reg **old_tile)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct drm_device *dev = dev_priv->dev;
+
+	if (dev_priv->card_type >= NV_10 &&
+	    dev_priv->card_type < NV_50) {
+		if (*old_tile)
+			nv10_mem_expire_tiling(dev, *old_tile, bo->sync_obj);
+
+		*old_tile = new_tile;
+	}
+}
+
+static int
+nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
+		bool no_wait, struct ttm_mem_reg *new_mem)
+{
+	struct drm_nouveau_private *dev_priv = nouveau_bdev(bo->bdev);
+	struct nouveau_bo *nvbo = nouveau_bo(bo);
+	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct nouveau_tile_reg *new_tile = NULL;
+	int ret = 0;
+
+	ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+	if (ret)
+		return ret;
+
+	/* Software copy if the card isn't up and running yet. */
 	if (dev_priv->init_state != NOUVEAU_CARD_INIT_DONE ||
-	    !dev_priv->channel)
-		return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+	    !dev_priv->channel) {
+		ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+		goto out;
+	}
 
+	/* Fake bo copy. */
 	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
 		BUG_ON(bo->mem.mm_node != NULL);
 		bo->mem = *new_mem;
 		new_mem->mm_node = NULL;
-		return 0;
+		goto out;
 	}
 
-	if (new_mem->mem_type == TTM_PL_SYSTEM) {
-		if (old_mem->mem_type == TTM_PL_SYSTEM)
-			return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-		if (nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem))
-			return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-	} else if (old_mem->mem_type == TTM_PL_SYSTEM) {
-		if (nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem))
-			return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-	} else {
-		if (nouveau_bo_move_m2mf(bo, evict, no_wait, old_mem, new_mem))
-			return ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
-	}
+	/* Hardware assisted copy. */
+	if (new_mem->mem_type == TTM_PL_SYSTEM)
+		ret = nouveau_bo_move_flipd(bo, evict, intr, no_wait, new_mem);
+	else if (old_mem->mem_type == TTM_PL_SYSTEM)
+		ret = nouveau_bo_move_flips(bo, evict, intr, no_wait, new_mem);
+	else
+		ret = nouveau_bo_move_m2mf(bo, evict, intr, no_wait, new_mem);
 
-	return 0;
+	if (!ret)
+		goto out;
+
+	/* Fallback to software copy. */
+	ret = ttm_bo_move_memcpy(bo, evict, no_wait, new_mem);
+
+out:
+	if (ret)
+		nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
+	else
+		nouveau_bo_vm_cleanup(bo, new_tile, &nvbo->tile);
+
+	return ret;
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nouveau_channel.c b/drivers/gpu/drm/nouveau/nouveau_channel.c
index 9aaa972f8822..343d718a9667 100644
--- a/drivers/gpu/drm/nouveau/nouveau_channel.c
+++ b/drivers/gpu/drm/nouveau/nouveau_channel.c
@@ -158,6 +158,8 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 		return ret;
 	}
 
+	nouveau_dma_pre_init(chan);
+
 	/* Locate channel's user control regs */
 	if (dev_priv->card_type < NV_40)
 		user = NV03_USER(channel);
@@ -235,47 +237,6 @@ nouveau_channel_alloc(struct drm_device *dev, struct nouveau_channel **chan_ret,
 	return 0;
 }
 
-int
-nouveau_channel_idle(struct nouveau_channel *chan)
-{
-	struct drm_device *dev = chan->dev;
-	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	struct nouveau_engine *engine = &dev_priv->engine;
-	uint32_t caches;
-	int idle;
-
-	if (!chan) {
-		NV_ERROR(dev, "no channel...\n");
-		return 1;
-	}
-
-	caches = nv_rd32(dev, NV03_PFIFO_CACHES);
-	nv_wr32(dev, NV03_PFIFO_CACHES, caches & ~1);
-
-	if (engine->fifo.channel_id(dev) != chan->id) {
-		struct nouveau_gpuobj *ramfc =
-			chan->ramfc ? chan->ramfc->gpuobj : NULL;
-
-		if (!ramfc) {
-			NV_ERROR(dev, "No RAMFC for channel %d\n", chan->id);
-			return 1;
-		}
-
-		engine->instmem.prepare_access(dev, false);
-		if (nv_ro32(dev, ramfc, 0) != nv_ro32(dev, ramfc, 1))
-			idle = 0;
-		else
-			idle = 1;
-		engine->instmem.finish_access(dev);
-	} else {
-		idle = (nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_GET) ==
-			nv_rd32(dev, NV04_PFIFO_CACHE1_DMA_PUT));
-	}
-
-	nv_wr32(dev, NV03_PFIFO_CACHES, caches);
-	return idle;
-}
-
 /* stops a fifo */
 void
 nouveau_channel_free(struct nouveau_channel *chan)
@@ -414,7 +375,9 @@ nouveau_ioctl_fifo_alloc(struct drm_device *dev, void *data,
 		init->subchan[0].grclass = 0x0039;
 	else
 		init->subchan[0].grclass = 0x5039;
-	init->nr_subchan = 1;
+	init->subchan[1].handle = NvSw;
+	init->subchan[1].grclass = NV_SW;
+	init->nr_subchan = 2;
 
 	/* Named memory object area */
 	ret = drm_gem_handle_create(file_priv, chan->notifier_bo->gem,
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index 5a10deb8bdbd..7e6d673f3a23 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -24,9 +24,12 @@
  *
  */
 
+#include <acpi/button.h>
+
 #include "drmP.h"
 #include "drm_edid.h"
 #include "drm_crtc_helper.h"
+
 #include "nouveau_reg.h"
 #include "nouveau_drv.h"
 #include "nouveau_encoder.h"
@@ -83,14 +86,16 @@ nouveau_encoder_connector_get(struct nouveau_encoder *encoder)
 static void
 nouveau_connector_destroy(struct drm_connector *drm_connector)
 {
-	struct nouveau_connector *connector = nouveau_connector(drm_connector);
-	struct drm_device *dev = connector->base.dev;
+	struct nouveau_connector *nv_connector =
+		nouveau_connector(drm_connector);
+	struct drm_device *dev = nv_connector->base.dev;
 
 	NV_DEBUG_KMS(dev, "\n");
 
-	if (!connector)
+	if (!nv_connector)
 		return;
 
+	kfree(nv_connector->edid);
 	drm_sysfs_connector_remove(drm_connector);
 	drm_connector_cleanup(drm_connector);
 	kfree(drm_connector);
@@ -233,10 +238,21 @@ nouveau_connector_detect(struct drm_connector *connector)
 	if (connector->connector_type == DRM_MODE_CONNECTOR_LVDS)
 		nv_encoder = find_encoder_by_type(connector, OUTPUT_LVDS);
 	if (nv_encoder && nv_connector->native_mode) {
+#ifdef CONFIG_ACPI
+		if (!nouveau_ignorelid && !acpi_lid_open())
+			return connector_status_disconnected;
+#endif
 		nouveau_connector_set_encoder(connector, nv_encoder);
 		return connector_status_connected;
 	}
 
+	/* Cleanup the previous EDID block. */
+	if (nv_connector->edid) {
+		drm_mode_connector_update_edid_property(connector, NULL);
+		kfree(nv_connector->edid);
+		nv_connector->edid = NULL;
+	}
+
 	i2c = nouveau_connector_ddc_detect(connector, &nv_encoder);
 	if (i2c) {
 		nouveau_connector_ddc_prepare(connector, &flags);
@@ -247,7 +263,7 @@ nouveau_connector_detect(struct drm_connector *connector)
 		if (!nv_connector->edid) {
 			NV_ERROR(dev, "DDC responded, but no EDID for %s\n",
 				 drm_get_connector_name(connector));
-			return connector_status_disconnected;
+			goto detect_analog;
 		}
 
 		if (nv_encoder->dcb->type == OUTPUT_DP &&
@@ -281,6 +297,7 @@ nouveau_connector_detect(struct drm_connector *connector)
 		return connector_status_connected;
 	}
 
+detect_analog:
 	nv_encoder = find_encoder_by_type(connector, OUTPUT_ANALOG);
 	if (!nv_encoder)
 		nv_encoder = find_encoder_by_type(connector, OUTPUT_TV);
@@ -687,8 +704,12 @@ nouveau_connector_create_lvds(struct drm_device *dev,
 	 */
 	if (!nv_connector->edid && !nv_connector->native_mode &&
 	    !dev_priv->VBIOS.pub.fp_no_ddc) {
-		nv_connector->edid =
+		struct edid *edid =
 			(struct edid *)nouveau_bios_embedded_edid(dev);
+		if (edid) {
+			nv_connector->edid = kmalloc(EDID_LENGTH, GFP_KERNEL);
+			*(nv_connector->edid) = *edid;
+		}
 	}
 
 	if (!nv_connector->edid)
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.c b/drivers/gpu/drm/nouveau/nouveau_dma.c
index 703553687b20..50d9e67745af 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.c
@@ -29,12 +29,22 @@
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
 
+void
+nouveau_dma_pre_init(struct nouveau_channel *chan)
+{
+	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
+	chan->dma.put  = 0;
+	chan->dma.cur  = chan->dma.put;
+	chan->dma.free = chan->dma.max - chan->dma.cur;
+}
+
 int
 nouveau_dma_init(struct nouveau_channel *chan)
 {
 	struct drm_device *dev = chan->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_gpuobj *m2mf = NULL;
+	struct nouveau_gpuobj *nvsw = NULL;
 	int ret, i;
 
 	/* Create NV_MEMORY_TO_MEMORY_FORMAT for buffer moves */
@@ -47,6 +57,15 @@ nouveau_dma_init(struct nouveau_channel *chan)
 	if (ret)
 		return ret;
 
+	/* Create an NV_SW object for various sync purposes */
+	ret = nouveau_gpuobj_sw_new(chan, NV_SW, &nvsw);
+	if (ret)
+		return ret;
+
+	ret = nouveau_gpuobj_ref_add(dev, chan, NvSw, nvsw, NULL);
+	if (ret)
+		return ret;
+
 	/* NV_MEMORY_TO_MEMORY_FORMAT requires a notifier object */
 	ret = nouveau_notifier_alloc(chan, NvNotify0, 32, &chan->m2mf_ntfy);
 	if (ret)
@@ -64,12 +83,6 @@ nouveau_dma_init(struct nouveau_channel *chan)
 			return ret;
 	}
 
-	/* Initialise DMA vars */
-	chan->dma.max  = (chan->pushbuf_bo->bo.mem.size >> 2) - 2;
-	chan->dma.put  = 0;
-	chan->dma.cur  = chan->dma.put;
-	chan->dma.free = chan->dma.max - chan->dma.cur;
-
 	/* Insert NOPS for NOUVEAU_DMA_SKIPS */
 	ret = RING_SPACE(chan, NOUVEAU_DMA_SKIPS);
 	if (ret)
@@ -87,6 +100,13 @@ nouveau_dma_init(struct nouveau_channel *chan)
 	BEGIN_RING(chan, NvSubM2MF, NV_MEMORY_TO_MEMORY_FORMAT_DMA_NOTIFY, 1);
 	OUT_RING(chan, NvNotify0);
 
+	/* Initialise NV_SW */
+	ret = RING_SPACE(chan, 2);
+	if (ret)
+		return ret;
+	BEGIN_RING(chan, NvSubSw, 0, 1);
+	OUT_RING(chan, NvSw);
+
 	/* Sit back and pray the channel works.. */
 	FIRE_RING(chan);
 
@@ -106,47 +126,52 @@ OUT_RINGp(struct nouveau_channel *chan, const void *data, unsigned nr_dwords)
 	chan->dma.cur += nr_dwords;
 }
 
-static inline bool
-READ_GET(struct nouveau_channel *chan, uint32_t *get)
+/* Fetch and adjust GPU GET pointer
+ *
+ * Returns:
+ *  value >= 0, the adjusted GET pointer
+ *  -EINVAL if GET pointer currently outside main push buffer
+ *  -EBUSY if timeout exceeded
+ */
+static inline int
+READ_GET(struct nouveau_channel *chan, uint32_t *prev_get, uint32_t *timeout)
 {
 	uint32_t val;
 
 	val = nvchan_rd32(chan, chan->user_get);
-	if (val < chan->pushbuf_base ||
-	    val >= chan->pushbuf_base + chan->pushbuf_bo->bo.mem.size) {
-		/* meaningless to dma_wait() except to know whether the
-		 * GPU has stalled or not
-		 */
-		*get = val;
-		return false;
+
+	/* reset counter as long as GET is still advancing, this is
+	 * to avoid misdetecting a GPU lockup if the GPU happens to
+	 * just be processing an operation that takes a long time
+	 */
+	if (val != *prev_get) {
+		*prev_get = val;
+		*timeout = 0;
+	}
+
+	if ((++*timeout & 0xff) == 0) {
+		DRM_UDELAY(1);
+		if (*timeout > 100000)
+			return -EBUSY;
 	}
 
-	*get = (val - chan->pushbuf_base) >> 2;
-	return true;
+	if (val < chan->pushbuf_base ||
+	    val > chan->pushbuf_base + (chan->dma.max << 2))
+		return -EINVAL;
+
+	return (val - chan->pushbuf_base) >> 2;
 }
 
 int
 nouveau_dma_wait(struct nouveau_channel *chan, int size)
 {
-	uint32_t get, prev_get = 0, cnt = 0;
-	bool get_valid;
+	uint32_t prev_get = 0, cnt = 0;
+	int get;
 
 	while (chan->dma.free < size) {
-		/* reset counter as long as GET is still advancing, this is
-		 * to avoid misdetecting a GPU lockup if the GPU happens to
-		 * just be processing an operation that takes a long time
-		 */
-		get_valid = READ_GET(chan, &get);
-		if (get != prev_get) {
-			prev_get = get;
-			cnt = 0;
-		}
-
-		if ((++cnt & 0xff) == 0) {
-			DRM_UDELAY(1);
-			if (cnt > 100000)
-				return -EBUSY;
-		}
+		get = READ_GET(chan, &prev_get, &cnt);
+		if (unlikely(get == -EBUSY))
+			return -EBUSY;
 
 		/* loop until we have a usable GET pointer.  the value
 		 * we read from the GPU may be outside the main ring if
@@ -157,7 +182,7 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
 		 * from the SKIPS area, so the code below doesn't have to deal
 		 * with some fun corner cases.
 		 */
-		if (!get_valid || get < NOUVEAU_DMA_SKIPS)
+		if (unlikely(get == -EINVAL) || get < NOUVEAU_DMA_SKIPS)
 			continue;
 
 		if (get <= chan->dma.cur) {
@@ -183,6 +208,19 @@ nouveau_dma_wait(struct nouveau_channel *chan, int size)
 			 * after processing the currently pending commands.
 			 */
 			OUT_RING(chan, chan->pushbuf_base | 0x20000000);
+
+			/* wait for GET to depart from the skips area.
+			 * prevents writing GET==PUT and causing a race
+			 * condition that causes us to think the GPU is
+			 * idle when it's not.
+			 */
+			do {
+				get = READ_GET(chan, &prev_get, &cnt);
+				if (unlikely(get == -EBUSY))
+					return -EBUSY;
+				if (unlikely(get == -EINVAL))
+					continue;
+			} while (get <= NOUVEAU_DMA_SKIPS);
 			WRITE_PUT(NOUVEAU_DMA_SKIPS);
 
 			/* we're now submitting commands at the start of
diff --git a/drivers/gpu/drm/nouveau/nouveau_dma.h b/drivers/gpu/drm/nouveau/nouveau_dma.h
index 04e85d8f757e..dabfd655f93e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dma.h
+++ b/drivers/gpu/drm/nouveau/nouveau_dma.h
@@ -46,10 +46,11 @@
 /* Hardcoded object assignments to subchannels (subchannel id). */
 enum {
 	NvSubM2MF	= 0,
-	NvSub2D		= 1,
-	NvSubCtxSurf2D  = 1,
-	NvSubGdiRect    = 2,
-	NvSubImageBlit  = 3
+	NvSubSw		= 1,
+	NvSub2D		= 2,
+	NvSubCtxSurf2D  = 2,
+	NvSubGdiRect    = 3,
+	NvSubImageBlit  = 4
 };
 
 /* Object handles. */
@@ -67,6 +68,7 @@ enum {
 	NvClipRect	= 0x8000000b,
 	NvGdiRect	= 0x8000000c,
 	NvImageBlit	= 0x8000000d,
+	NvSw		= 0x8000000e,
 
 	/* G80+ display objects */
 	NvEvoVRAM	= 0x01000000,
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 9e2926c48579..dd4937224220 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -490,7 +490,8 @@ nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 		if (!nv_wait(NV50_AUXCH_CTRL(index), 0x00010000, 0x00000000)) {
 			NV_ERROR(dev, "expected bit 16 == 0, got 0x%08x\n",
 				 nv_rd32(dev, NV50_AUXCH_CTRL(index)));
-			return -EBUSY;
+			ret = -EBUSY;
+			goto out;
 		}
 
 		udelay(400);
@@ -501,6 +502,11 @@ nouveau_dp_auxch(struct nouveau_i2c_chan *auxch, int cmd, int addr,
 			break;
 	}
 
+	if ((stat & NV50_AUXCH_STAT_COUNT) != data_nr) {
+		ret = -EREMOTEIO;
+		goto out;
+	}
+
 	if (cmd & 1) {
 		for (i = 0; i < 4; i++) {
 			data32[i] = nv_rd32(dev, NV50_AUXCH_DATA_IN(index, i));
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.c b/drivers/gpu/drm/nouveau/nouveau_drv.c
index 06eb993e0883..343ab7f17ccc 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.c
@@ -71,6 +71,10 @@ MODULE_PARM_DESC(uscript_tmds, "TMDS output script table ID (>=GeForce 8)");
 int nouveau_uscript_tmds = -1;
 module_param_named(uscript_tmds, nouveau_uscript_tmds, int, 0400);
 
+MODULE_PARM_DESC(ignorelid, "Ignore ACPI lid status");
+int nouveau_ignorelid = 0;
+module_param_named(ignorelid, nouveau_ignorelid, int, 0400);
+
 MODULE_PARM_DESC(tv_norm, "Default TV norm.\n"
 		 "\t\tSupported: PAL, PAL-M, PAL-N, PAL-Nc, NTSC-M, NTSC-J,\n"
 		 "\t\t\thd480i, hd480p, hd576i, hd576p, hd720p, hd1080i.\n"
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 5f8cbb79c499..6b9690418bc7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -59,11 +59,19 @@ struct nouveau_grctx;
 #define MAX_NUM_DCB_ENTRIES 16
 
 #define NOUVEAU_MAX_CHANNEL_NR 128
+#define NOUVEAU_MAX_TILE_NR 15
 
 #define NV50_VM_MAX_VRAM (2*1024*1024*1024ULL)
 #define NV50_VM_BLOCK    (512*1024*1024ULL)
 #define NV50_VM_VRAM_NR  (NV50_VM_MAX_VRAM / NV50_VM_BLOCK)
 
+struct nouveau_tile_reg {
+	struct nouveau_fence *fence;
+	uint32_t addr;
+	uint32_t size;
+	bool used;
+};
+
 struct nouveau_bo {
 	struct ttm_buffer_object bo;
 	struct ttm_placement placement;
@@ -83,6 +91,7 @@ struct nouveau_bo {
 
 	uint32_t tile_mode;
 	uint32_t tile_flags;
+	struct nouveau_tile_reg *tile;
 
 	struct drm_gem_object *gem;
 	struct drm_file *cpu_filp;
@@ -277,8 +286,13 @@ struct nouveau_timer_engine {
 };
 
 struct nouveau_fb_engine {
+	int num_tiles;
+
 	int  (*init)(struct drm_device *dev);
 	void (*takedown)(struct drm_device *dev);
+
+	void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+				 uint32_t size, uint32_t pitch);
 };
 
 struct nouveau_fifo_engine {
@@ -292,6 +306,8 @@ struct nouveau_fifo_engine {
 	void (*disable)(struct drm_device *);
 	void (*enable)(struct drm_device *);
 	bool (*reassign)(struct drm_device *, bool enable);
+	bool (*cache_flush)(struct drm_device *dev);
+	bool (*cache_pull)(struct drm_device *dev, bool enable);
 
 	int  (*channel_id)(struct drm_device *);
 
@@ -330,6 +346,9 @@ struct nouveau_pgraph_engine {
 	void (*destroy_context)(struct nouveau_channel *);
 	int  (*load_context)(struct nouveau_channel *);
 	int  (*unload_context)(struct drm_device *);
+
+	void (*set_region_tiling)(struct drm_device *dev, int i, uint32_t addr,
+				  uint32_t size, uint32_t pitch);
 };
 
 struct nouveau_engine {
@@ -490,6 +509,8 @@ struct drm_nouveau_private {
 	void __iomem *ramin;
 	uint32_t ramin_size;
 
+	struct nouveau_bo *vga_ram;
+
 	struct workqueue_struct *wq;
 	struct work_struct irq_work;
 
@@ -548,6 +569,12 @@ struct drm_nouveau_private {
 		unsigned long sg_handle;
 	} gart_info;
 
+	/* nv10-nv40 tiling regions */
+	struct {
+		struct nouveau_tile_reg reg[NOUVEAU_MAX_TILE_NR];
+		spinlock_t lock;
+	} tile;
+
 	/* G8x/G9x virtual address space */
 	uint64_t vm_gart_base;
 	uint64_t vm_gart_size;
@@ -650,6 +677,7 @@ extern char *nouveau_tv_norm;
 extern int nouveau_reg_debug;
 extern char *nouveau_vbios;
 extern int nouveau_ctxfw;
+extern int nouveau_ignorelid;
 
 /* nouveau_state.c */
 extern void nouveau_preclose(struct drm_device *dev, struct drm_file *);
@@ -685,6 +713,13 @@ extern void nouveau_mem_release(struct drm_file *, struct mem_block *heap);
 extern int  nouveau_mem_init(struct drm_device *);
 extern int  nouveau_mem_init_agp(struct drm_device *);
 extern void nouveau_mem_close(struct drm_device *);
+extern struct nouveau_tile_reg *nv10_mem_set_tiling(struct drm_device *dev,
+						    uint32_t addr,
+						    uint32_t size,
+						    uint32_t pitch);
+extern void nv10_mem_expire_tiling(struct drm_device *dev,
+				   struct nouveau_tile_reg *tile,
+				   struct nouveau_fence *fence);
 extern int  nv50_mem_vm_bind_linear(struct drm_device *, uint64_t virt,
 				    uint32_t size, uint32_t flags,
 				    uint64_t phys);
@@ -713,7 +748,6 @@ extern int  nouveau_channel_alloc(struct drm_device *dev,
 				  struct drm_file *file_priv,
 				  uint32_t fb_ctxdma, uint32_t tt_ctxdma);
 extern void nouveau_channel_free(struct nouveau_channel *);
-extern int  nouveau_channel_idle(struct nouveau_channel *chan);
 
 /* nouveau_object.c */
 extern int  nouveau_gpuobj_early_init(struct drm_device *);
@@ -756,6 +790,8 @@ extern int nouveau_gpuobj_gart_dma_new(struct nouveau_channel *,
 				       uint32_t *o_ret);
 extern int nouveau_gpuobj_gr_new(struct nouveau_channel *, int class,
 				 struct nouveau_gpuobj **);
+extern int nouveau_gpuobj_sw_new(struct nouveau_channel *, int class,
+				 struct nouveau_gpuobj **);
 extern int nouveau_ioctl_grobj_alloc(struct drm_device *, void *data,
 				     struct drm_file *);
 extern int nouveau_ioctl_gpuobj_free(struct drm_device *, void *data,
@@ -804,6 +840,7 @@ nouveau_debugfs_channel_fini(struct nouveau_channel *chan)
 #endif
 
 /* nouveau_dma.c */
+extern void nouveau_dma_pre_init(struct nouveau_channel *);
 extern int  nouveau_dma_init(struct nouveau_channel *);
 extern int  nouveau_dma_wait(struct nouveau_channel *, int size);
 
@@ -879,16 +916,22 @@ extern void nv04_fb_takedown(struct drm_device *);
 /* nv10_fb.c */
 extern int  nv10_fb_init(struct drm_device *);
 extern void nv10_fb_takedown(struct drm_device *);
+extern void nv10_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+				      uint32_t, uint32_t);
 
 /* nv40_fb.c */
 extern int  nv40_fb_init(struct drm_device *);
 extern void nv40_fb_takedown(struct drm_device *);
+extern void nv40_fb_set_region_tiling(struct drm_device *, int, uint32_t,
+				      uint32_t, uint32_t);
 
 /* nv04_fifo.c */
 extern int  nv04_fifo_init(struct drm_device *);
 extern void nv04_fifo_disable(struct drm_device *);
 extern void nv04_fifo_enable(struct drm_device *);
 extern bool nv04_fifo_reassign(struct drm_device *, bool);
+extern bool nv04_fifo_cache_flush(struct drm_device *);
+extern bool nv04_fifo_cache_pull(struct drm_device *, bool);
 extern int  nv04_fifo_channel_id(struct drm_device *);
 extern int  nv04_fifo_create_context(struct nouveau_channel *);
 extern void nv04_fifo_destroy_context(struct nouveau_channel *);
@@ -941,6 +984,8 @@ extern void nv10_graph_destroy_context(struct nouveau_channel *);
 extern int  nv10_graph_load_context(struct nouveau_channel *);
 extern int  nv10_graph_unload_context(struct drm_device *);
 extern void nv10_graph_context_switch(struct drm_device *);
+extern void nv10_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+					 uint32_t, uint32_t);
 
 /* nv20_graph.c */
 extern struct nouveau_pgraph_object_class nv20_graph_grclass[];
@@ -952,6 +997,8 @@ extern int  nv20_graph_unload_context(struct drm_device *);
 extern int  nv20_graph_init(struct drm_device *);
 extern void nv20_graph_takedown(struct drm_device *);
 extern int  nv30_graph_init(struct drm_device *);
+extern void nv20_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+					 uint32_t, uint32_t);
 
 /* nv40_graph.c */
 extern struct nouveau_pgraph_object_class nv40_graph_grclass[];
@@ -963,6 +1010,8 @@ extern void nv40_graph_destroy_context(struct nouveau_channel *);
 extern int  nv40_graph_load_context(struct nouveau_channel *);
 extern int  nv40_graph_unload_context(struct drm_device *);
 extern void nv40_grctx_init(struct nouveau_grctx *);
+extern void nv40_graph_set_region_tiling(struct drm_device *, int, uint32_t,
+					 uint32_t, uint32_t);
 
 /* nv50_graph.c */
 extern struct nouveau_pgraph_object_class nv50_graph_grclass[];
@@ -1030,8 +1079,7 @@ extern long nouveau_compat_ioctl(struct file *file, unsigned int cmd,
 
 /* nv04_dac.c */
 extern int nv04_dac_create(struct drm_device *dev, struct dcb_entry *entry);
-extern enum drm_connector_status nv17_dac_detect(struct drm_encoder *encoder,
-						 struct drm_connector *connector);
+extern uint32_t nv17_dac_sample_load(struct drm_encoder *encoder);
 extern int nv04_dac_output_offset(struct drm_encoder *encoder);
 extern void nv04_dac_update_dacclk(struct drm_encoder *encoder, bool enable);
 
@@ -1049,9 +1097,6 @@ extern int nv04_tv_create(struct drm_device *dev, struct dcb_entry *entry);
 
 /* nv17_tv.c */
 extern int nv17_tv_create(struct drm_device *dev, struct dcb_entry *entry);
-extern enum drm_connector_status nv17_tv_detect(struct drm_encoder *encoder,
-						struct drm_connector *connector,
-						uint32_t pin_mask);
 
 /* nv04_display.c */
 extern int nv04_display_create(struct drm_device *);
@@ -1290,14 +1335,14 @@ nv_two_reg_pll(struct drm_device *dev)
 	return false;
 }
 
-#define NV50_NVSW                                                    0x0000506e
-#define NV50_NVSW_DMA_SEMAPHORE                                      0x00000060
-#define NV50_NVSW_SEMAPHORE_OFFSET                                   0x00000064
-#define NV50_NVSW_SEMAPHORE_ACQUIRE                                  0x00000068
-#define NV50_NVSW_SEMAPHORE_RELEASE                                  0x0000006c
-#define NV50_NVSW_DMA_VBLSEM                                         0x0000018c
-#define NV50_NVSW_VBLSEM_OFFSET                                      0x00000400
-#define NV50_NVSW_VBLSEM_RELEASE_VALUE                               0x00000404
-#define NV50_NVSW_VBLSEM_RELEASE                                     0x00000408
+#define NV_SW                                                        0x0000506e
+#define NV_SW_DMA_SEMAPHORE                                          0x00000060
+#define NV_SW_SEMAPHORE_OFFSET                                       0x00000064
+#define NV_SW_SEMAPHORE_ACQUIRE                                      0x00000068
+#define NV_SW_SEMAPHORE_RELEASE                                      0x0000006c
+#define NV_SW_DMA_VBLSEM                                             0x0000018c
+#define NV_SW_VBLSEM_OFFSET                                          0x00000400
+#define NV_SW_VBLSEM_RELEASE_VALUE                                   0x00000404
+#define NV_SW_VBLSEM_RELEASE                                         0x00000408
 
 #endif /* __NOUVEAU_DRV_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 84af25c238b6..0b05c869e0e7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -64,8 +64,7 @@ nouveau_fbcon_sync(struct fb_info *info)
 		return 0;
 
 	if (RING_SPACE(chan, 4)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 		return 0;
 	}
 
@@ -86,8 +85,7 @@ nouveau_fbcon_sync(struct fb_info *info)
 	}
 
 	if (ret) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 		return 0;
 	}
 
@@ -212,11 +210,11 @@ nouveau_fbcon_create(struct drm_device *dev, uint32_t fb_width,
 
 	mode_cmd.bpp = surface_bpp;
 	mode_cmd.pitch = mode_cmd.width * (mode_cmd.bpp >> 3);
-	mode_cmd.pitch = ALIGN(mode_cmd.pitch, 256);
+	mode_cmd.pitch = roundup(mode_cmd.pitch, 256);
 	mode_cmd.depth = surface_depth;
 
 	size = mode_cmd.pitch * mode_cmd.height;
-	size = ALIGN(size, PAGE_SIZE);
+	size = roundup(size, PAGE_SIZE);
 
 	ret = nouveau_gem_new(dev, dev_priv->channel, size, 0, TTM_PL_FLAG_VRAM,
 			      0, 0x0000, false, true, &nvbo);
@@ -380,3 +378,12 @@ nouveau_fbcon_remove(struct drm_device *dev, struct drm_framebuffer *fb)
 
 	return 0;
 }
+
+void nouveau_fbcon_gpu_lockup(struct fb_info *info)
+{
+	struct nouveau_fbcon_par *par = info->par;
+	struct drm_device *dev = par->dev;
+
+	NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
+	info->flags |= FBINFO_HWACCEL_DISABLED;
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index 8531140fedbc..462e0b87b4bd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -43,5 +43,6 @@ void nouveau_fbcon_zfill(struct drm_device *dev);
 int nv04_fbcon_accel_init(struct fb_info *info);
 int nv50_fbcon_accel_init(struct fb_info *info);
 
+void nouveau_fbcon_gpu_lockup(struct fb_info *info);
 #endif /* __NV50_FBCON_H__ */
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index dacac9a0842a..faddf53ff9ed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -142,7 +142,7 @@ nouveau_fence_emit(struct nouveau_fence *fence)
 	list_add_tail(&fence->entry, &chan->fence.pending);
 	spin_unlock_irqrestore(&chan->fence.lock, flags);
 
-	BEGIN_RING(chan, NvSubM2MF, USE_REFCNT ? 0x0050 : 0x0150, 1);
+	BEGIN_RING(chan, NvSubSw, USE_REFCNT ? 0x0050 : 0x0150, 1);
 	OUT_RING(chan, fence->sequence);
 	FIRE_RING(chan);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 18fd8ac9fca7..6ac804b0c9f9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -220,7 +220,6 @@ nouveau_gem_set_domain(struct drm_gem_object *gem, uint32_t read_domains,
 }
 
 struct validate_op {
-	struct nouveau_fence *fence;
 	struct list_head vram_list;
 	struct list_head gart_list;
 	struct list_head both_list;
@@ -252,17 +251,11 @@ validate_fini_list(struct list_head *list, struct nouveau_fence *fence)
 }
 
 static void
-validate_fini(struct validate_op *op, bool success)
+validate_fini(struct validate_op *op, struct nouveau_fence* fence)
 {
-	struct nouveau_fence *fence = op->fence;
-
-	if (unlikely(!success))
-		op->fence = NULL;
-
-	validate_fini_list(&op->vram_list, op->fence);
-	validate_fini_list(&op->gart_list, op->fence);
-	validate_fini_list(&op->both_list, op->fence);
-	nouveau_fence_unref((void *)&fence);
+	validate_fini_list(&op->vram_list, fence);
+	validate_fini_list(&op->gart_list, fence);
+	validate_fini_list(&op->both_list, fence);
 }
 
 static int
@@ -328,6 +321,7 @@ retry:
 		else {
 			NV_ERROR(dev, "invalid valid domains: 0x%08x\n",
 				 b->valid_domains);
+			list_add_tail(&nvbo->entry, &op->both_list);
 			validate_fini(op, NULL);
 			return -EINVAL;
 		}
@@ -420,10 +414,6 @@ nouveau_gem_pushbuf_validate(struct nouveau_channel *chan,
 	INIT_LIST_HEAD(&op->gart_list);
 	INIT_LIST_HEAD(&op->both_list);
 
-	ret = nouveau_fence_new(chan, &op->fence, false);
-	if (ret)
-		return ret;
-
 	if (nr_buffers == 0)
 		return 0;
 
@@ -477,13 +467,14 @@ u_memcpya(uint64_t user, unsigned nmemb, unsigned size)
 static int
 nouveau_gem_pushbuf_reloc_apply(struct nouveau_channel *chan, int nr_bo,
 				struct drm_nouveau_gem_pushbuf_bo *bo,
-				int nr_relocs, uint64_t ptr_relocs,
-				int nr_dwords, int first_dword,
+				unsigned nr_relocs, uint64_t ptr_relocs,
+				unsigned nr_dwords, unsigned first_dword,
 				uint32_t *pushbuf, bool is_iomem)
 {
 	struct drm_nouveau_gem_pushbuf_reloc *reloc = NULL;
 	struct drm_device *dev = chan->dev;
-	int ret = 0, i;
+	int ret = 0;
+	unsigned i;
 
 	reloc = u_memcpya(ptr_relocs, nr_relocs, sizeof(*reloc));
 	if (IS_ERR(reloc))
@@ -541,6 +532,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	struct drm_nouveau_gem_pushbuf_bo *bo = NULL;
 	struct nouveau_channel *chan;
 	struct validate_op op;
+	struct nouveau_fence* fence = 0;
 	uint32_t *pushbuf = NULL;
 	int ret = 0, do_reloc = 0, i;
 
@@ -597,7 +589,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 
 	OUT_RINGp(chan, pushbuf, req->nr_dwords);
 
-	ret = nouveau_fence_emit(op.fence);
+	ret = nouveau_fence_new(chan, &fence, true);
 	if (ret) {
 		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
 		WIND_RING(chan);
@@ -605,7 +597,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	}
 
 	if (nouveau_gem_pushbuf_sync(chan)) {
-		ret = nouveau_fence_wait(op.fence, NULL, false, false);
+		ret = nouveau_fence_wait(fence, NULL, false, false);
 		if (ret) {
 			for (i = 0; i < req->nr_dwords; i++)
 				NV_ERROR(dev, "0x%08x\n", pushbuf[i]);
@@ -614,7 +606,8 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 	}
 
 out:
-	validate_fini(&op, ret == 0);
+	validate_fini(&op, fence);
+	nouveau_fence_unref((void**)&fence);
 	mutex_unlock(&dev->struct_mutex);
 	kfree(pushbuf);
 	kfree(bo);
@@ -634,6 +627,7 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	struct drm_gem_object *gem;
 	struct nouveau_bo *pbbo;
 	struct validate_op op;
+	struct nouveau_fence* fence = 0;
 	int i, ret = 0, do_reloc = 0;
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
@@ -675,6 +669,18 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	}
 	pbbo = nouveau_gem_object(gem);
 
+	if ((req->offset & 3) || req->nr_dwords < 2 ||
+	    (unsigned long)req->offset > (unsigned long)pbbo->bo.mem.size ||
+	    (unsigned long)req->nr_dwords >
+	     ((unsigned long)(pbbo->bo.mem.size - req->offset ) >> 2)) {
+		NV_ERROR(dev, "pb call misaligned or out of bounds: "
+			      "%d + %d * 4 > %ld\n",
+			 req->offset, req->nr_dwords, pbbo->bo.mem.size);
+		ret = -EINVAL;
+		drm_gem_object_unreference(gem);
+		goto out;
+	}
+
 	ret = ttm_bo_reserve(&pbbo->bo, false, false, true,
 			     chan->fence.sequence);
 	if (ret) {
@@ -772,7 +778,7 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 			OUT_RING(chan, 0);
 	}
 
-	ret = nouveau_fence_emit(op.fence);
+	ret = nouveau_fence_new(chan, &fence, true);
 	if (ret) {
 		NV_ERROR(dev, "error fencing pushbuf: %d\n", ret);
 		WIND_RING(chan);
@@ -780,7 +786,8 @@ nouveau_gem_ioctl_pushbuf_call(struct drm_device *dev, void *data,
 	}
 
 out:
-	validate_fini(&op, ret == 0);
+	validate_fini(&op, fence);
+	nouveau_fence_unref((void**)&fence);
 	mutex_unlock(&dev->struct_mutex);
 	kfree(bo);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_irq.c b/drivers/gpu/drm/nouveau/nouveau_irq.c
index 370c72c968d1..3b9bad66162a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_irq.c
+++ b/drivers/gpu/drm/nouveau/nouveau_irq.c
@@ -483,6 +483,13 @@ nouveau_pgraph_intr_error(struct drm_device *dev, uint32_t nsource)
 	if (nsource & NV03_PGRAPH_NSOURCE_ILLEGAL_MTHD) {
 		if (nouveau_pgraph_intr_swmthd(dev, &trap))
 			unhandled = 1;
+	} else if (nsource & NV03_PGRAPH_NSOURCE_DMA_VTX_PROTECTION) {
+		uint32_t v = nv_rd32(dev, 0x402000);
+		nv_wr32(dev, 0x402000, v);
+
+		/* dump the error anyway for now: it's useful for
+		   Gallium development */
+		unhandled = 1;
 	} else {
 		unhandled = 1;
 	}
@@ -635,6 +642,7 @@ nv50_pgraph_irq_handler(struct drm_device *dev)
 
 		if ((nv_rd32(dev, 0x400500) & isb) != isb)
 			nv_wr32(dev, 0x400500, nv_rd32(dev, 0x400500) | isb);
+		nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) & ~(1 << 31));
 	}
 
 	nv_wr32(dev, NV03_PMC_INTR_0, NV_PMC_INTR_0_PGRAPH_PENDING);
diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index 5158a12f7844..8f3a12f614ed 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -192,6 +192,92 @@ void nouveau_mem_release(struct drm_file *file_priv, struct mem_block *heap)
 }
 
 /*
+ * NV10-NV40 tiling helpers
+ */
+
+static void
+nv10_mem_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			   uint32_t size, uint32_t pitch)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fifo_engine *pfifo = &dev_priv->engine.fifo;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	struct nouveau_pgraph_engine *pgraph = &dev_priv->engine.graph;
+	struct nouveau_tile_reg *tile = &dev_priv->tile.reg[i];
+
+	tile->addr = addr;
+	tile->size = size;
+	tile->used = !!pitch;
+	nouveau_fence_unref((void **)&tile->fence);
+
+	if (!pfifo->cache_flush(dev))
+		return;
+
+	pfifo->reassign(dev, false);
+	pfifo->cache_flush(dev);
+	pfifo->cache_pull(dev, false);
+
+	nouveau_wait_for_idle(dev);
+
+	pgraph->set_region_tiling(dev, i, addr, size, pitch);
+	pfb->set_region_tiling(dev, i, addr, size, pitch);
+
+	pfifo->cache_pull(dev, true);
+	pfifo->reassign(dev, true);
+}
+
+struct nouveau_tile_reg *
+nv10_mem_set_tiling(struct drm_device *dev, uint32_t addr, uint32_t size,
+		    uint32_t pitch)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	struct nouveau_tile_reg *tile = dev_priv->tile.reg, *found = NULL;
+	int i;
+
+	spin_lock(&dev_priv->tile.lock);
+
+	for (i = 0; i < pfb->num_tiles; i++) {
+		if (tile[i].used)
+			/* Tile region in use. */
+			continue;
+
+		if (tile[i].fence &&
+		    !nouveau_fence_signalled(tile[i].fence, NULL))
+			/* Pending tile region. */
+			continue;
+
+		if (max(tile[i].addr, addr) <
+		    min(tile[i].addr + tile[i].size, addr + size))
+			/* Kill an intersecting tile region. */
+			nv10_mem_set_region_tiling(dev, i, 0, 0, 0);
+
+		if (pitch && !found) {
+			/* Free tile region. */
+			nv10_mem_set_region_tiling(dev, i, addr, size, pitch);
+			found = &tile[i];
+		}
+	}
+
+	spin_unlock(&dev_priv->tile.lock);
+
+	return found;
+}
+
+void
+nv10_mem_expire_tiling(struct drm_device *dev, struct nouveau_tile_reg *tile,
+		       struct nouveau_fence *fence)
+{
+	if (fence) {
+		/* Mark it as pending. */
+		tile->fence = fence;
+		nouveau_fence_ref(fence);
+	}
+
+	tile->used = false;
+}
+
+/*
  * NV50 VM helpers
  */
 int
@@ -297,9 +383,8 @@ void nouveau_mem_close(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	if (dev_priv->ttm.bdev.man[TTM_PL_PRIV0].has_type)
-		ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_PRIV0);
-	ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
+	nouveau_bo_unpin(dev_priv->vga_ram);
+	nouveau_bo_ref(NULL, &dev_priv->vga_ram);
 
 	ttm_bo_device_release(&dev_priv->ttm.bdev);
 
@@ -513,6 +598,7 @@ nouveau_mem_init(struct drm_device *dev)
 
 	INIT_LIST_HEAD(&dev_priv->ttm.bo_list);
 	spin_lock_init(&dev_priv->ttm.bo_list_lock);
+	spin_lock_init(&dev_priv->tile.lock);
 
 	dev_priv->fb_available_size = nouveau_mem_fb_amount(dev);
 
@@ -535,6 +621,15 @@ nouveau_mem_init(struct drm_device *dev)
 		return ret;
 	}
 
+	ret = nouveau_bo_new(dev, NULL, 256*1024, 0, TTM_PL_FLAG_VRAM,
+			     0, 0, true, true, &dev_priv->vga_ram);
+	if (ret == 0)
+		ret = nouveau_bo_pin(dev_priv->vga_ram, TTM_PL_FLAG_VRAM);
+	if (ret) {
+		NV_WARN(dev, "failed to reserve VGA memory\n");
+		nouveau_bo_ref(NULL, &dev_priv->vga_ram);
+	}
+
 	/* GART */
 #if !defined(__powerpc__) && !defined(__ia64__)
 	if (drm_device_is_agp(dev) && dev->agp) {
@@ -566,6 +661,7 @@ nouveau_mem_init(struct drm_device *dev)
 	dev_priv->fb_mtrr = drm_mtrr_add(drm_get_resource_start(dev, 1),
 					 drm_get_resource_len(dev, 1),
 					 DRM_MTRR_WC);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_object.c b/drivers/gpu/drm/nouveau/nouveau_object.c
index 93379bb81bea..6c2cf81716df 100644
--- a/drivers/gpu/drm/nouveau/nouveau_object.c
+++ b/drivers/gpu/drm/nouveau/nouveau_object.c
@@ -881,7 +881,7 @@ nouveau_gpuobj_gr_new(struct nouveau_channel *chan, int class,
 	return 0;
 }
 
-static int
+int
 nouveau_gpuobj_sw_new(struct nouveau_channel *chan, int class,
 		      struct nouveau_gpuobj **gpuobj_ret)
 {
diff --git a/drivers/gpu/drm/nouveau/nouveau_reg.h b/drivers/gpu/drm/nouveau/nouveau_reg.h
index fa1b0e7165b9..251f1b3b38b9 100644
--- a/drivers/gpu/drm/nouveau/nouveau_reg.h
+++ b/drivers/gpu/drm/nouveau/nouveau_reg.h
@@ -349,19 +349,19 @@
 #define NV04_PGRAPH_BLEND                                  0x00400824
 #define NV04_PGRAPH_STORED_FMT                             0x00400830
 #define NV04_PGRAPH_PATT_COLORRAM                          0x00400900
-#define NV40_PGRAPH_TILE0(i)                               (0x00400900 + (i*16))
-#define NV40_PGRAPH_TLIMIT0(i)                             (0x00400904 + (i*16))
-#define NV40_PGRAPH_TSIZE0(i)                              (0x00400908 + (i*16))
-#define NV40_PGRAPH_TSTATUS0(i)                            (0x0040090C + (i*16))
+#define NV20_PGRAPH_TILE(i)                                (0x00400900 + (i*16))
+#define NV20_PGRAPH_TLIMIT(i)                              (0x00400904 + (i*16))
+#define NV20_PGRAPH_TSIZE(i)                               (0x00400908 + (i*16))
+#define NV20_PGRAPH_TSTATUS(i)                             (0x0040090C + (i*16))
 #define NV10_PGRAPH_TILE(i)                                (0x00400B00 + (i*16))
 #define NV10_PGRAPH_TLIMIT(i)                              (0x00400B04 + (i*16))
 #define NV10_PGRAPH_TSIZE(i)                               (0x00400B08 + (i*16))
 #define NV10_PGRAPH_TSTATUS(i)                             (0x00400B0C + (i*16))
 #define NV04_PGRAPH_U_RAM                                  0x00400D00
-#define NV47_PGRAPH_TILE0(i)                               (0x00400D00 + (i*16))
-#define NV47_PGRAPH_TLIMIT0(i)                             (0x00400D04 + (i*16))
-#define NV47_PGRAPH_TSIZE0(i)                              (0x00400D08 + (i*16))
-#define NV47_PGRAPH_TSTATUS0(i)                            (0x00400D0C + (i*16))
+#define NV47_PGRAPH_TILE(i)                                (0x00400D00 + (i*16))
+#define NV47_PGRAPH_TLIMIT(i)                              (0x00400D04 + (i*16))
+#define NV47_PGRAPH_TSIZE(i)                               (0x00400D08 + (i*16))
+#define NV47_PGRAPH_TSTATUS(i)                             (0x00400D0C + (i*16))
 #define NV04_PGRAPH_V_RAM                                  0x00400D40
 #define NV04_PGRAPH_W_RAM                                  0x00400D80
 #define NV10_PGRAPH_COMBINER0_IN_ALPHA                     0x00400E40
diff --git a/drivers/gpu/drm/nouveau/nouveau_state.c b/drivers/gpu/drm/nouveau/nouveau_state.c
index e76ec2d207a9..f2d0187ba152 100644
--- a/drivers/gpu/drm/nouveau/nouveau_state.c
+++ b/drivers/gpu/drm/nouveau/nouveau_state.c
@@ -76,6 +76,8 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->fifo.disable		= nv04_fifo_disable;
 		engine->fifo.enable		= nv04_fifo_enable;
 		engine->fifo.reassign		= nv04_fifo_reassign;
+		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
 		engine->fifo.channel_id		= nv04_fifo_channel_id;
 		engine->fifo.create_context	= nv04_fifo_create_context;
 		engine->fifo.destroy_context	= nv04_fifo_destroy_context;
@@ -100,6 +102,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv10_fb_init;
 		engine->fb.takedown		= nv10_fb_takedown;
+		engine->fb.set_region_tiling	= nv10_fb_set_region_tiling;
 		engine->graph.grclass		= nv10_graph_grclass;
 		engine->graph.init		= nv10_graph_init;
 		engine->graph.takedown		= nv10_graph_takedown;
@@ -109,12 +112,15 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.fifo_access	= nv04_graph_fifo_access;
 		engine->graph.load_context	= nv10_graph_load_context;
 		engine->graph.unload_context	= nv10_graph_unload_context;
+		engine->graph.set_region_tiling	= nv10_graph_set_region_tiling;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nouveau_stub_takedown;
 		engine->fifo.disable		= nv04_fifo_disable;
 		engine->fifo.enable		= nv04_fifo_enable;
 		engine->fifo.reassign		= nv04_fifo_reassign;
+		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
 		engine->fifo.channel_id		= nv10_fifo_channel_id;
 		engine->fifo.create_context	= nv10_fifo_create_context;
 		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
@@ -139,6 +145,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv10_fb_init;
 		engine->fb.takedown		= nv10_fb_takedown;
+		engine->fb.set_region_tiling	= nv10_fb_set_region_tiling;
 		engine->graph.grclass		= nv20_graph_grclass;
 		engine->graph.init		= nv20_graph_init;
 		engine->graph.takedown		= nv20_graph_takedown;
@@ -148,12 +155,15 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.fifo_access	= nv04_graph_fifo_access;
 		engine->graph.load_context	= nv20_graph_load_context;
 		engine->graph.unload_context	= nv20_graph_unload_context;
+		engine->graph.set_region_tiling	= nv20_graph_set_region_tiling;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nouveau_stub_takedown;
 		engine->fifo.disable		= nv04_fifo_disable;
 		engine->fifo.enable		= nv04_fifo_enable;
 		engine->fifo.reassign		= nv04_fifo_reassign;
+		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
 		engine->fifo.channel_id		= nv10_fifo_channel_id;
 		engine->fifo.create_context	= nv10_fifo_create_context;
 		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
@@ -178,6 +188,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv10_fb_init;
 		engine->fb.takedown		= nv10_fb_takedown;
+		engine->fb.set_region_tiling	= nv10_fb_set_region_tiling;
 		engine->graph.grclass		= nv30_graph_grclass;
 		engine->graph.init		= nv30_graph_init;
 		engine->graph.takedown		= nv20_graph_takedown;
@@ -187,12 +198,15 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.destroy_context	= nv20_graph_destroy_context;
 		engine->graph.load_context	= nv20_graph_load_context;
 		engine->graph.unload_context	= nv20_graph_unload_context;
+		engine->graph.set_region_tiling	= nv20_graph_set_region_tiling;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv10_fifo_init;
 		engine->fifo.takedown		= nouveau_stub_takedown;
 		engine->fifo.disable		= nv04_fifo_disable;
 		engine->fifo.enable		= nv04_fifo_enable;
 		engine->fifo.reassign		= nv04_fifo_reassign;
+		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
 		engine->fifo.channel_id		= nv10_fifo_channel_id;
 		engine->fifo.create_context	= nv10_fifo_create_context;
 		engine->fifo.destroy_context	= nv10_fifo_destroy_context;
@@ -218,6 +232,7 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->timer.takedown		= nv04_timer_takedown;
 		engine->fb.init			= nv40_fb_init;
 		engine->fb.takedown		= nv40_fb_takedown;
+		engine->fb.set_region_tiling	= nv40_fb_set_region_tiling;
 		engine->graph.grclass		= nv40_graph_grclass;
 		engine->graph.init		= nv40_graph_init;
 		engine->graph.takedown		= nv40_graph_takedown;
@@ -227,12 +242,15 @@ static int nouveau_init_engine_ptrs(struct drm_device *dev)
 		engine->graph.destroy_context	= nv40_graph_destroy_context;
 		engine->graph.load_context	= nv40_graph_load_context;
 		engine->graph.unload_context	= nv40_graph_unload_context;
+		engine->graph.set_region_tiling	= nv40_graph_set_region_tiling;
 		engine->fifo.channels		= 32;
 		engine->fifo.init		= nv40_fifo_init;
 		engine->fifo.takedown		= nouveau_stub_takedown;
 		engine->fifo.disable		= nv04_fifo_disable;
 		engine->fifo.enable		= nv04_fifo_enable;
 		engine->fifo.reassign		= nv04_fifo_reassign;
+		engine->fifo.cache_flush	= nv04_fifo_cache_flush;
+		engine->fifo.cache_pull		= nv04_fifo_cache_pull;
 		engine->fifo.channel_id		= nv10_fifo_channel_id;
 		engine->fifo.create_context	= nv40_fifo_create_context;
 		engine->fifo.destroy_context	= nv40_fifo_destroy_context;
@@ -507,6 +525,7 @@ static void nouveau_card_takedown(struct drm_device *dev)
 		engine->mc.takedown(dev);
 
 		mutex_lock(&dev->struct_mutex);
+		ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_VRAM);
 		ttm_bo_clean_mm(&dev_priv->ttm.bdev, TTM_PL_TT);
 		mutex_unlock(&dev->struct_mutex);
 		nouveau_sgdma_takedown(dev);
@@ -624,7 +643,10 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 		dev_priv->chipset = (reg0 & 0xff00000) >> 20;
 	/* NV04 or NV05 */
 	} else if ((reg0 & 0xff00fff0) == 0x20004000) {
-		dev_priv->chipset = 0x04;
+		if (reg0 & 0x00f00000)
+			dev_priv->chipset = 0x05;
+		else
+			dev_priv->chipset = 0x04;
 	} else
 		dev_priv->chipset = 0xff;
 
@@ -704,8 +726,8 @@ static void nouveau_close(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 
-	/* In the case of an error dev_priv may not be be allocated yet */
-	if (dev_priv && dev_priv->card_type)
+	/* In the case of an error dev_priv may not be allocated yet */
+	if (dev_priv)
 		nouveau_card_takedown(dev);
 }
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index 187eb84e4da5..c385d50f041b 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -28,45 +28,17 @@
 
 #include "nouveau_drv.h"
 
-static struct vm_operations_struct nouveau_ttm_vm_ops;
-static const struct vm_operations_struct *ttm_vm_ops;
-
-static int
-nouveau_ttm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
-	struct ttm_buffer_object *bo = vma->vm_private_data;
-	int ret;
-
-	if (unlikely(bo == NULL))
-		return VM_FAULT_NOPAGE;
-
-	ret = ttm_vm_ops->fault(vma, vmf);
-	return ret;
-}
-
 int
 nouveau_ttm_mmap(struct file *filp, struct vm_area_struct *vma)
 {
 	struct drm_file *file_priv = filp->private_data;
 	struct drm_nouveau_private *dev_priv =
 		file_priv->minor->dev->dev_private;
-	int ret;
 
 	if (unlikely(vma->vm_pgoff < DRM_FILE_PAGE_OFFSET))
 		return drm_mmap(filp, vma);
 
-	ret = ttm_bo_mmap(filp, vma, &dev_priv->ttm.bdev);
-	if (unlikely(ret != 0))
-		return ret;
-
-	if (unlikely(ttm_vm_ops == NULL)) {
-		ttm_vm_ops = vma->vm_ops;
-		nouveau_ttm_vm_ops = *ttm_vm_ops;
-		nouveau_ttm_vm_ops.fault = &nouveau_ttm_fault;
-	}
-
-	vma->vm_ops = &nouveau_ttm_vm_ops;
-	return 0;
+	return ttm_bo_mmap(filp, vma, &dev_priv->ttm.bdev);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nv04_dac.c b/drivers/gpu/drm/nouveau/nv04_dac.c
index d9f32879ba38..d0e038d28948 100644
--- a/drivers/gpu/drm/nouveau/nv04_dac.c
+++ b/drivers/gpu/drm/nouveau/nv04_dac.c
@@ -212,16 +212,15 @@ out:
 	return connector_status_disconnected;
 }
 
-enum drm_connector_status nv17_dac_detect(struct drm_encoder *encoder,
-					  struct drm_connector *connector)
+uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct dcb_entry *dcb = nouveau_encoder(encoder)->dcb;
-	uint32_t testval, regoffset = nv04_dac_output_offset(encoder);
+	uint32_t sample, testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t saved_powerctrl_2 = 0, saved_powerctrl_4 = 0, saved_routput,
 		saved_rtest_ctrl, saved_gpio0, saved_gpio1, temp, routput;
-	int head, present = 0;
+	int head;
 
 #define RGB_TEST_DATA(r, g, b) (r << 0 | g << 10 | b << 20)
 	if (dcb->type == OUTPUT_TV) {
@@ -287,13 +286,7 @@ enum drm_connector_status nv17_dac_detect(struct drm_encoder *encoder,
 		      temp | NV_PRAMDAC_TEST_CONTROL_TP_INS_EN_ASSERTED);
 	msleep(5);
 
-	temp = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
-
-	if (dcb->type == OUTPUT_TV)
-		present = (nv17_tv_detect(encoder, connector, temp)
-			   == connector_status_connected);
-	else
-		present = temp & NV_PRAMDAC_TEST_CONTROL_SENSEB_ALLHI;
+	sample = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
 
 	temp = NVReadRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL);
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TEST_CONTROL,
@@ -310,15 +303,25 @@ enum drm_connector_status nv17_dac_detect(struct drm_encoder *encoder,
 	nv17_gpio_set(dev, DCB_GPIO_TVDAC1, saved_gpio1);
 	nv17_gpio_set(dev, DCB_GPIO_TVDAC0, saved_gpio0);
 
-	if (present) {
-		NV_INFO(dev, "Load detected on output %c\n", '@' + ffs(dcb->or));
+	return sample;
+}
+
+static enum drm_connector_status
+nv17_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct dcb_entry *dcb = nouveau_encoder(encoder)->dcb;
+	uint32_t sample = nv17_dac_sample_load(encoder);
+
+	if (sample & NV_PRAMDAC_TEST_CONTROL_SENSEB_ALLHI) {
+		NV_INFO(dev, "Load detected on output %c\n",
+			'@' + ffs(dcb->or));
 		return connector_status_connected;
+	} else {
+		return connector_status_disconnected;
 	}
-
-	return connector_status_disconnected;
 }
 
-
 static bool nv04_dac_mode_fixup(struct drm_encoder *encoder,
 				struct drm_display_mode *mode,
 				struct drm_display_mode *adjusted_mode)
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index 09a31071ee58..d910873c1368 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -39,8 +39,7 @@ nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 		return;
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 4)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -62,14 +61,12 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	struct drm_device *dev = par->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = dev_priv->channel;
-	uint32_t color = ((uint32_t *) info->pseudo_palette)[rect->color];
 
 	if (info->state != FBINFO_STATE_RUNNING)
 		return;
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 7)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -80,7 +77,11 @@ nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 	BEGIN_RING(chan, NvSubGdiRect, 0x02fc, 1);
 	OUT_RING(chan, (rect->rop != ROP_COPY) ? 1 : 3);
 	BEGIN_RING(chan, NvSubGdiRect, 0x03fc, 1);
-	OUT_RING(chan, color);
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
+		OUT_RING(chan, ((uint32_t *)info->pseudo_palette)[rect->color]);
+	else
+		OUT_RING(chan, rect->color);
 	BEGIN_RING(chan, NvSubGdiRect, 0x0400, 2);
 	OUT_RING(chan, (rect->dx << 16) | rect->dy);
 	OUT_RING(chan, (rect->width << 16) | rect->height);
@@ -109,8 +110,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	}
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 8)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -144,8 +144,7 @@ nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		int iter_len = dsize > 128 ? 128 : dsize;
 
 		if (RING_SPACE(chan, iter_len + 1)) {
-			NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-			info->flags |= FBINFO_HWACCEL_DISABLED;
+			nouveau_fbcon_gpu_lockup(info);
 			cfb_imageblit(info, image);
 			return;
 		}
@@ -184,6 +183,7 @@ nv04_fbcon_accel_init(struct fb_info *info)
 	struct drm_device *dev = par->dev;
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
 	struct nouveau_channel *chan = dev_priv->channel;
+	const int sub = NvSubCtxSurf2D;
 	int surface_fmt, pattern_fmt, rect_fmt;
 	int ret;
 
@@ -242,30 +242,29 @@ nv04_fbcon_accel_init(struct fb_info *info)
 		return ret;
 
 	if (RING_SPACE(chan, 49)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 		return 0;
 	}
 
-	BEGIN_RING(chan, 1, 0x0000, 1);
+	BEGIN_RING(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvCtxSurf2D);
-	BEGIN_RING(chan, 1, 0x0184, 2);
+	BEGIN_RING(chan, sub, 0x0184, 2);
 	OUT_RING(chan, NvDmaFB);
 	OUT_RING(chan, NvDmaFB);
-	BEGIN_RING(chan, 1, 0x0300, 4);
+	BEGIN_RING(chan, sub, 0x0300, 4);
 	OUT_RING(chan, surface_fmt);
 	OUT_RING(chan, info->fix.line_length | (info->fix.line_length << 16));
 	OUT_RING(chan, info->fix.smem_start - dev->mode_config.fb_base);
 	OUT_RING(chan, info->fix.smem_start - dev->mode_config.fb_base);
 
-	BEGIN_RING(chan, 1, 0x0000, 1);
+	BEGIN_RING(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvRop);
-	BEGIN_RING(chan, 1, 0x0300, 1);
+	BEGIN_RING(chan, sub, 0x0300, 1);
 	OUT_RING(chan, 0x55);
 
-	BEGIN_RING(chan, 1, 0x0000, 1);
+	BEGIN_RING(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvImagePatt);
-	BEGIN_RING(chan, 1, 0x0300, 8);
+	BEGIN_RING(chan, sub, 0x0300, 8);
 	OUT_RING(chan, pattern_fmt);
 #ifdef __BIG_ENDIAN
 	OUT_RING(chan, 2);
@@ -279,9 +278,9 @@ nv04_fbcon_accel_init(struct fb_info *info)
 	OUT_RING(chan, ~0);
 	OUT_RING(chan, ~0);
 
-	BEGIN_RING(chan, 1, 0x0000, 1);
+	BEGIN_RING(chan, sub, 0x0000, 1);
 	OUT_RING(chan, NvClipRect);
-	BEGIN_RING(chan, 1, 0x0300, 2);
+	BEGIN_RING(chan, sub, 0x0300, 2);
 	OUT_RING(chan, 0);
 	OUT_RING(chan, (info->var.yres_virtual << 16) | info->var.xres_virtual);
 
diff --git a/drivers/gpu/drm/nouveau/nv04_fifo.c b/drivers/gpu/drm/nouveau/nv04_fifo.c
index 0c3cd53c7313..f31347b8c9b0 100644
--- a/drivers/gpu/drm/nouveau/nv04_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv04_fifo.c
@@ -71,6 +71,40 @@ nv04_fifo_reassign(struct drm_device *dev, bool enable)
 	return (reassign == 1);
 }
 
+bool
+nv04_fifo_cache_flush(struct drm_device *dev)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_timer_engine *ptimer = &dev_priv->engine.timer;
+	uint64_t start = ptimer->read(dev);
+
+	do {
+		if (nv_rd32(dev, NV03_PFIFO_CACHE1_GET) ==
+		    nv_rd32(dev, NV03_PFIFO_CACHE1_PUT))
+			return true;
+
+	} while (ptimer->read(dev) - start < 100000000);
+
+	NV_ERROR(dev, "Timeout flushing the PFIFO cache.\n");
+
+	return false;
+}
+
+bool
+nv04_fifo_cache_pull(struct drm_device *dev, bool enable)
+{
+	uint32_t pull = nv_rd32(dev, NV04_PFIFO_CACHE1_PULL0);
+
+	if (enable) {
+		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull | 1);
+	} else {
+		nv_wr32(dev, NV04_PFIFO_CACHE1_PULL0, pull & ~1);
+		nv_wr32(dev, NV04_PFIFO_CACHE1_HASH, 0);
+	}
+
+	return !!(pull & 1);
+}
+
 int
 nv04_fifo_channel_id(struct drm_device *dev)
 {
diff --git a/drivers/gpu/drm/nouveau/nv04_graph.c b/drivers/gpu/drm/nouveau/nv04_graph.c
index d561d773c0f4..e260986ea65a 100644
--- a/drivers/gpu/drm/nouveau/nv04_graph.c
+++ b/drivers/gpu/drm/nouveau/nv04_graph.c
@@ -28,6 +28,10 @@
 #include "nouveau_drv.h"
 
 static uint32_t nv04_graph_ctx_regs[] = {
+	0x0040053c,
+	0x00400544,
+	0x00400540,
+	0x00400548,
 	NV04_PGRAPH_CTX_SWITCH1,
 	NV04_PGRAPH_CTX_SWITCH2,
 	NV04_PGRAPH_CTX_SWITCH3,
@@ -102,69 +106,69 @@ static uint32_t nv04_graph_ctx_regs[] = {
 	NV04_PGRAPH_PATT_COLOR0,
 	NV04_PGRAPH_PATT_COLOR1,
 	NV04_PGRAPH_PATT_COLORRAM+0x00,
-	NV04_PGRAPH_PATT_COLORRAM+0x01,
-	NV04_PGRAPH_PATT_COLORRAM+0x02,
-	NV04_PGRAPH_PATT_COLORRAM+0x03,
 	NV04_PGRAPH_PATT_COLORRAM+0x04,
-	NV04_PGRAPH_PATT_COLORRAM+0x05,
-	NV04_PGRAPH_PATT_COLORRAM+0x06,
-	NV04_PGRAPH_PATT_COLORRAM+0x07,
 	NV04_PGRAPH_PATT_COLORRAM+0x08,
-	NV04_PGRAPH_PATT_COLORRAM+0x09,
-	NV04_PGRAPH_PATT_COLORRAM+0x0A,
-	NV04_PGRAPH_PATT_COLORRAM+0x0B,
-	NV04_PGRAPH_PATT_COLORRAM+0x0C,
-	NV04_PGRAPH_PATT_COLORRAM+0x0D,
-	NV04_PGRAPH_PATT_COLORRAM+0x0E,
-	NV04_PGRAPH_PATT_COLORRAM+0x0F,
+	NV04_PGRAPH_PATT_COLORRAM+0x0c,
 	NV04_PGRAPH_PATT_COLORRAM+0x10,
-	NV04_PGRAPH_PATT_COLORRAM+0x11,
-	NV04_PGRAPH_PATT_COLORRAM+0x12,
-	NV04_PGRAPH_PATT_COLORRAM+0x13,
 	NV04_PGRAPH_PATT_COLORRAM+0x14,
-	NV04_PGRAPH_PATT_COLORRAM+0x15,
-	NV04_PGRAPH_PATT_COLORRAM+0x16,
-	NV04_PGRAPH_PATT_COLORRAM+0x17,
 	NV04_PGRAPH_PATT_COLORRAM+0x18,
-	NV04_PGRAPH_PATT_COLORRAM+0x19,
-	NV04_PGRAPH_PATT_COLORRAM+0x1A,
-	NV04_PGRAPH_PATT_COLORRAM+0x1B,
-	NV04_PGRAPH_PATT_COLORRAM+0x1C,
-	NV04_PGRAPH_PATT_COLORRAM+0x1D,
-	NV04_PGRAPH_PATT_COLORRAM+0x1E,
-	NV04_PGRAPH_PATT_COLORRAM+0x1F,
+	NV04_PGRAPH_PATT_COLORRAM+0x1c,
 	NV04_PGRAPH_PATT_COLORRAM+0x20,
-	NV04_PGRAPH_PATT_COLORRAM+0x21,
-	NV04_PGRAPH_PATT_COLORRAM+0x22,
-	NV04_PGRAPH_PATT_COLORRAM+0x23,
 	NV04_PGRAPH_PATT_COLORRAM+0x24,
-	NV04_PGRAPH_PATT_COLORRAM+0x25,
-	NV04_PGRAPH_PATT_COLORRAM+0x26,
-	NV04_PGRAPH_PATT_COLORRAM+0x27,
 	NV04_PGRAPH_PATT_COLORRAM+0x28,
-	NV04_PGRAPH_PATT_COLORRAM+0x29,
-	NV04_PGRAPH_PATT_COLORRAM+0x2A,
-	NV04_PGRAPH_PATT_COLORRAM+0x2B,
-	NV04_PGRAPH_PATT_COLORRAM+0x2C,
-	NV04_PGRAPH_PATT_COLORRAM+0x2D,
-	NV04_PGRAPH_PATT_COLORRAM+0x2E,
-	NV04_PGRAPH_PATT_COLORRAM+0x2F,
+	NV04_PGRAPH_PATT_COLORRAM+0x2c,
 	NV04_PGRAPH_PATT_COLORRAM+0x30,
-	NV04_PGRAPH_PATT_COLORRAM+0x31,
-	NV04_PGRAPH_PATT_COLORRAM+0x32,
-	NV04_PGRAPH_PATT_COLORRAM+0x33,
 	NV04_PGRAPH_PATT_COLORRAM+0x34,
-	NV04_PGRAPH_PATT_COLORRAM+0x35,
-	NV04_PGRAPH_PATT_COLORRAM+0x36,
-	NV04_PGRAPH_PATT_COLORRAM+0x37,
 	NV04_PGRAPH_PATT_COLORRAM+0x38,
-	NV04_PGRAPH_PATT_COLORRAM+0x39,
-	NV04_PGRAPH_PATT_COLORRAM+0x3A,
-	NV04_PGRAPH_PATT_COLORRAM+0x3B,
-	NV04_PGRAPH_PATT_COLORRAM+0x3C,
-	NV04_PGRAPH_PATT_COLORRAM+0x3D,
-	NV04_PGRAPH_PATT_COLORRAM+0x3E,
-	NV04_PGRAPH_PATT_COLORRAM+0x3F,
+	NV04_PGRAPH_PATT_COLORRAM+0x3c,
+	NV04_PGRAPH_PATT_COLORRAM+0x40,
+	NV04_PGRAPH_PATT_COLORRAM+0x44,
+	NV04_PGRAPH_PATT_COLORRAM+0x48,
+	NV04_PGRAPH_PATT_COLORRAM+0x4c,
+	NV04_PGRAPH_PATT_COLORRAM+0x50,
+	NV04_PGRAPH_PATT_COLORRAM+0x54,
+	NV04_PGRAPH_PATT_COLORRAM+0x58,
+	NV04_PGRAPH_PATT_COLORRAM+0x5c,
+	NV04_PGRAPH_PATT_COLORRAM+0x60,
+	NV04_PGRAPH_PATT_COLORRAM+0x64,
+	NV04_PGRAPH_PATT_COLORRAM+0x68,
+	NV04_PGRAPH_PATT_COLORRAM+0x6c,
+	NV04_PGRAPH_PATT_COLORRAM+0x70,
+	NV04_PGRAPH_PATT_COLORRAM+0x74,
+	NV04_PGRAPH_PATT_COLORRAM+0x78,
+	NV04_PGRAPH_PATT_COLORRAM+0x7c,
+	NV04_PGRAPH_PATT_COLORRAM+0x80,
+	NV04_PGRAPH_PATT_COLORRAM+0x84,
+	NV04_PGRAPH_PATT_COLORRAM+0x88,
+	NV04_PGRAPH_PATT_COLORRAM+0x8c,
+	NV04_PGRAPH_PATT_COLORRAM+0x90,
+	NV04_PGRAPH_PATT_COLORRAM+0x94,
+	NV04_PGRAPH_PATT_COLORRAM+0x98,
+	NV04_PGRAPH_PATT_COLORRAM+0x9c,
+	NV04_PGRAPH_PATT_COLORRAM+0xa0,
+	NV04_PGRAPH_PATT_COLORRAM+0xa4,
+	NV04_PGRAPH_PATT_COLORRAM+0xa8,
+	NV04_PGRAPH_PATT_COLORRAM+0xac,
+	NV04_PGRAPH_PATT_COLORRAM+0xb0,
+	NV04_PGRAPH_PATT_COLORRAM+0xb4,
+	NV04_PGRAPH_PATT_COLORRAM+0xb8,
+	NV04_PGRAPH_PATT_COLORRAM+0xbc,
+	NV04_PGRAPH_PATT_COLORRAM+0xc0,
+	NV04_PGRAPH_PATT_COLORRAM+0xc4,
+	NV04_PGRAPH_PATT_COLORRAM+0xc8,
+	NV04_PGRAPH_PATT_COLORRAM+0xcc,
+	NV04_PGRAPH_PATT_COLORRAM+0xd0,
+	NV04_PGRAPH_PATT_COLORRAM+0xd4,
+	NV04_PGRAPH_PATT_COLORRAM+0xd8,
+	NV04_PGRAPH_PATT_COLORRAM+0xdc,
+	NV04_PGRAPH_PATT_COLORRAM+0xe0,
+	NV04_PGRAPH_PATT_COLORRAM+0xe4,
+	NV04_PGRAPH_PATT_COLORRAM+0xe8,
+	NV04_PGRAPH_PATT_COLORRAM+0xec,
+	NV04_PGRAPH_PATT_COLORRAM+0xf0,
+	NV04_PGRAPH_PATT_COLORRAM+0xf4,
+	NV04_PGRAPH_PATT_COLORRAM+0xf8,
+	NV04_PGRAPH_PATT_COLORRAM+0xfc,
 	NV04_PGRAPH_PATTERN,
 	0x0040080c,
 	NV04_PGRAPH_PATTERN_SHAPE,
@@ -247,14 +251,6 @@ static uint32_t nv04_graph_ctx_regs[] = {
 	0x004004f8,
 	0x0040047c,
 	0x004004fc,
-	0x0040053c,
-	0x00400544,
-	0x00400540,
-	0x00400548,
-	0x00400560,
-	0x00400568,
-	0x00400564,
-	0x0040056c,
 	0x00400534,
 	0x00400538,
 	0x00400514,
@@ -341,9 +337,8 @@ static uint32_t nv04_graph_ctx_regs[] = {
 	0x00400500,
 	0x00400504,
 	NV04_PGRAPH_VALID1,
-	NV04_PGRAPH_VALID2
-
-
+	NV04_PGRAPH_VALID2,
+	NV04_PGRAPH_DEBUG_3
 };
 
 struct graph_state {
@@ -388,6 +383,18 @@ nv04_graph_context_switch(struct drm_device *dev)
 	pgraph->fifo_access(dev, true);
 }
 
+static uint32_t *ctx_reg(struct graph_state *ctx, uint32_t reg)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(nv04_graph_ctx_regs); i++) {
+		if (nv04_graph_ctx_regs[i] == reg)
+			return &ctx->nv04[i];
+	}
+
+	return NULL;
+}
+
 int nv04_graph_create_context(struct nouveau_channel *chan)
 {
 	struct graph_state *pgraph_ctx;
@@ -398,15 +405,8 @@ int nv04_graph_create_context(struct nouveau_channel *chan)
 	if (pgraph_ctx == NULL)
 		return -ENOMEM;
 
-	/* dev_priv->fifos[channel].pgraph_ctx_user = channel << 24; */
-	pgraph_ctx->nv04[0] = 0x0001ffff;
-	/* is it really needed ??? */
-#if 0
-	dev_priv->fifos[channel].pgraph_ctx[1] =
-					nv_rd32(dev, NV_PGRAPH_DEBUG_4);
-	dev_priv->fifos[channel].pgraph_ctx[2] =
-					nv_rd32(dev, 0x004006b0);
-#endif
+	*ctx_reg(pgraph_ctx, NV04_PGRAPH_DEBUG_3) = 0xfad4ff31;
+
 	return 0;
 }
 
@@ -429,9 +429,13 @@ int nv04_graph_load_context(struct nouveau_channel *chan)
 		nv_wr32(dev, nv04_graph_ctx_regs[i], pgraph_ctx->nv04[i]);
 
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL, 0x10010100);
-	nv_wr32(dev, NV04_PGRAPH_CTX_USER, chan->id << 24);
+
+	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
+	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp | chan->id << 24);
+
 	tmp = nv_rd32(dev, NV04_PGRAPH_FFINTFC_ST2);
 	nv_wr32(dev, NV04_PGRAPH_FFINTFC_ST2, tmp & 0x000fffff);
+
 	return 0;
 }
 
@@ -494,7 +498,7 @@ int nv04_graph_init(struct drm_device *dev)
 	nv_wr32(dev, NV04_PGRAPH_STATE        , 0xFFFFFFFF);
 	nv_wr32(dev, NV04_PGRAPH_CTX_CONTROL  , 0x10000100);
 	tmp  = nv_rd32(dev, NV04_PGRAPH_CTX_USER) & 0x00ffffff;
-	tmp |= dev_priv->engine.fifo.channels << 24;
+	tmp |= (dev_priv->engine.fifo.channels - 1) << 24;
 	nv_wr32(dev, NV04_PGRAPH_CTX_USER, tmp);
 
 	/* These don't belong here, they're part of a per-channel context */
@@ -533,7 +537,7 @@ nv04_graph_mthd_set_operation(struct nouveau_channel *chan, int grclass,
 			      int mthd, uint32_t data)
 {
 	struct drm_device *dev = chan->dev;
-	uint32_t instance = nv_rd32(dev, NV04_PGRAPH_CTX_SWITCH4) & 0xffff;
+	uint32_t instance = (nv_rd32(dev, NV04_PGRAPH_CTX_SWITCH4) & 0xffff) << 4;
 	int subc = (nv_rd32(dev, NV04_PGRAPH_TRAPPED_ADDR) >> 13) & 0x7;
 	uint32_t tmp;
 
@@ -547,7 +551,7 @@ nv04_graph_mthd_set_operation(struct nouveau_channel *chan, int grclass,
 	return 0;
 }
 
-static struct nouveau_pgraph_object_method nv04_graph_mthds_m2mf[] = {
+static struct nouveau_pgraph_object_method nv04_graph_mthds_sw[] = {
 	{ 0x0150, nv04_graph_mthd_set_ref },
 	{}
 };
@@ -558,7 +562,7 @@ static struct nouveau_pgraph_object_method nv04_graph_mthds_set_operation[] = {
 };
 
 struct nouveau_pgraph_object_class nv04_graph_grclass[] = {
-	{ 0x0039, false, nv04_graph_mthds_m2mf },
+	{ 0x0039, false, NULL },
 	{ 0x004a, false, nv04_graph_mthds_set_operation }, /* gdirect */
 	{ 0x005f, false, nv04_graph_mthds_set_operation }, /* imageblit */
 	{ 0x0061, false, nv04_graph_mthds_set_operation }, /* ifc */
@@ -574,6 +578,7 @@ struct nouveau_pgraph_object_class nv04_graph_grclass[] = {
 	{ 0x0053, false, NULL }, /* surf3d */
 	{ 0x0054, false, NULL }, /* tex_tri */
 	{ 0x0055, false, NULL }, /* multitex_tri */
+	{ 0x506e, true, nv04_graph_mthds_sw },
 	{}
 };
 
diff --git a/drivers/gpu/drm/nouveau/nv04_instmem.c b/drivers/gpu/drm/nouveau/nv04_instmem.c
index a20c206625a2..a3b9563a6f60 100644
--- a/drivers/gpu/drm/nouveau/nv04_instmem.c
+++ b/drivers/gpu/drm/nouveau/nv04_instmem.c
@@ -30,7 +30,7 @@ nv04_instmem_determine_amount(struct drm_device *dev)
 		 * of vram.  For now, only reserve a small piece until we know
 		 * more about what each chipset requires.
 		 */
-		switch (dev_priv->chipset & 0xf0) {
+		switch (dev_priv->chipset) {
 		case 0x40:
 		case 0x47:
 		case 0x49:
diff --git a/drivers/gpu/drm/nouveau/nv10_fb.c b/drivers/gpu/drm/nouveau/nv10_fb.c
index 79e2d104d70a..cc5cda44e501 100644
--- a/drivers/gpu/drm/nouveau/nv10_fb.c
+++ b/drivers/gpu/drm/nouveau/nv10_fb.c
@@ -3,17 +3,37 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 
+void
+nv10_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			  uint32_t size, uint32_t pitch)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t limit = max(1u, addr + size) - 1;
+
+	if (pitch) {
+		if (dev_priv->card_type >= NV_20)
+			addr |= 1;
+		else
+			addr |= 1 << 31;
+	}
+
+	nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+	nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+	nv_wr32(dev, NV10_PFB_TILE(i), addr);
+}
+
 int
 nv10_fb_init(struct drm_device *dev)
 {
-	uint32_t fb_bar_size;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
 	int i;
 
-	fb_bar_size = drm_get_resource_len(dev, 0) - 1;
-	for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
-		nv_wr32(dev, NV10_PFB_TILE(i), 0);
-		nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
-	}
+	pfb->num_tiles = NV10_PFB_TILE__SIZE;
+
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < pfb->num_tiles; i++)
+		pfb->set_region_tiling(dev, i, 0, 0, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_graph.c b/drivers/gpu/drm/nouveau/nv10_graph.c
index 6870e0ee2e7e..fcf2cdd19493 100644
--- a/drivers/gpu/drm/nouveau/nv10_graph.c
+++ b/drivers/gpu/drm/nouveau/nv10_graph.c
@@ -807,6 +807,20 @@ void nv10_graph_destroy_context(struct nouveau_channel *chan)
 	chan->pgraph_ctx = NULL;
 }
 
+void
+nv10_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			     uint32_t size, uint32_t pitch)
+{
+	uint32_t limit = max(1u, addr + size) - 1;
+
+	if (pitch)
+		addr |= 1 << 31;
+
+	nv_wr32(dev, NV10_PGRAPH_TLIMIT(i), limit);
+	nv_wr32(dev, NV10_PGRAPH_TSIZE(i), pitch);
+	nv_wr32(dev, NV10_PGRAPH_TILE(i), addr);
+}
+
 int nv10_graph_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
@@ -838,17 +852,9 @@ int nv10_graph_init(struct drm_device *dev)
 	} else
 		nv_wr32(dev, NV10_PGRAPH_DEBUG_4, 0x00000000);
 
-	/* copy tile info from PFB */
-	for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
-		nv_wr32(dev, NV10_PGRAPH_TILE(i),
-					nv_rd32(dev, NV10_PFB_TILE(i)));
-		nv_wr32(dev, NV10_PGRAPH_TLIMIT(i),
-					nv_rd32(dev, NV10_PFB_TLIMIT(i)));
-		nv_wr32(dev, NV10_PGRAPH_TSIZE(i),
-					nv_rd32(dev, NV10_PFB_TSIZE(i)));
-		nv_wr32(dev, NV10_PGRAPH_TSTATUS(i),
-					nv_rd32(dev, NV10_PFB_TSTATUS(i)));
-	}
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+		nv10_graph_set_region_tiling(dev, i, 0, 0, 0);
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH1, 0x00000000);
 	nv_wr32(dev, NV10_PGRAPH_CTX_SWITCH2, 0x00000000);
diff --git a/drivers/gpu/drm/nouveau/nv17_tv.c b/drivers/gpu/drm/nouveau/nv17_tv.c
index 81c01353a9f9..58b917c3341b 100644
--- a/drivers/gpu/drm/nouveau/nv17_tv.c
+++ b/drivers/gpu/drm/nouveau/nv17_tv.c
@@ -33,13 +33,103 @@
 #include "nouveau_hw.h"
 #include "nv17_tv.h"
 
-enum drm_connector_status nv17_tv_detect(struct drm_encoder *encoder,
-					 struct drm_connector *connector,
-					 uint32_t pin_mask)
+static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
 {
+	struct drm_device *dev = encoder->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t testval, regoffset = nv04_dac_output_offset(encoder);
+	uint32_t gpio0, gpio1, fp_htotal, fp_hsync_start, fp_hsync_end,
+		fp_control, test_ctrl, dacclk, ctv_14, ctv_1c, ctv_6c;
+	uint32_t sample = 0;
+	int head;
+
+#define RGB_TEST_DATA(r, g, b) (r << 0 | g << 10 | b << 20)
+	testval = RGB_TEST_DATA(0x82, 0xeb, 0x82);
+	if (dev_priv->vbios->tvdactestval)
+		testval = dev_priv->vbios->tvdactestval;
+
+	dacclk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset);
+	head = (dacclk & 0x100) >> 8;
+
+	/* Save the previous state. */
+	gpio1 = nv17_gpio_get(dev, DCB_GPIO_TVDAC1);
+	gpio0 = nv17_gpio_get(dev, DCB_GPIO_TVDAC0);
+	fp_htotal = NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_HTOTAL);
+	fp_hsync_start = NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_START);
+	fp_hsync_end = NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_END);
+	fp_control = NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL);
+	test_ctrl = NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset);
+	ctv_1c = NVReadRAMDAC(dev, head, 0x680c1c);
+	ctv_14 = NVReadRAMDAC(dev, head, 0x680c14);
+	ctv_6c = NVReadRAMDAC(dev, head, 0x680c6c);
+
+	/* Prepare the DAC for load detection.  */
+	nv17_gpio_set(dev, DCB_GPIO_TVDAC1, true);
+	nv17_gpio_set(dev, DCB_GPIO_TVDAC0, true);
+
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HTOTAL, 1343);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_START, 1047);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_END, 1183);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL,
+		      NV_PRAMDAC_FP_TG_CONTROL_DISPEN_POS |
+		      NV_PRAMDAC_FP_TG_CONTROL_WIDTH_12 |
+		      NV_PRAMDAC_FP_TG_CONTROL_READ_PROG |
+		      NV_PRAMDAC_FP_TG_CONTROL_HSYNC_POS |
+		      NV_PRAMDAC_FP_TG_CONTROL_VSYNC_POS);
+
+	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset, 0);
+
+	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset,
+		      (dacclk & ~0xff) | 0x22);
+	msleep(1);
+	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset,
+		      (dacclk & ~0xff) | 0x21);
+
+	NVWriteRAMDAC(dev, head, 0x680c1c, 1 << 20);
+	NVWriteRAMDAC(dev, head, 0x680c14, 4 << 16);
+
+	/* Sample pin 0x4 (usually S-video luma). */
+	NVWriteRAMDAC(dev, head, 0x680c6c, testval >> 10 & 0x3ff);
+	msleep(20);
+	sample |= NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset)
+		& 0x4 << 28;
+
+	/* Sample the remaining pins. */
+	NVWriteRAMDAC(dev, head, 0x680c6c, testval & 0x3ff);
+	msleep(20);
+	sample |= NVReadRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset)
+		& 0xa << 28;
+
+	/* Restore the previous state. */
+	NVWriteRAMDAC(dev, head, 0x680c1c, ctv_1c);
+	NVWriteRAMDAC(dev, head, 0x680c14, ctv_14);
+	NVWriteRAMDAC(dev, head, 0x680c6c, ctv_6c);
+	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_DACCLK + regoffset, dacclk);
+	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + regoffset, test_ctrl);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL, fp_control);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_END, fp_hsync_end);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HSYNC_START, fp_hsync_start);
+	NVWriteRAMDAC(dev, head, NV_PRAMDAC_FP_HTOTAL, fp_htotal);
+	nv17_gpio_set(dev, DCB_GPIO_TVDAC1, gpio1);
+	nv17_gpio_set(dev, DCB_GPIO_TVDAC0, gpio0);
+
+	return sample;
+}
+
+static enum drm_connector_status
+nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
+{
+	struct drm_device *dev = encoder->dev;
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct drm_mode_config *conf = &dev->mode_config;
 	struct nv17_tv_encoder *tv_enc = to_tv_enc(encoder);
+	struct dcb_entry *dcb = tv_enc->base.dcb;
 
-	tv_enc->pin_mask = pin_mask >> 28 & 0xe;
+	if (dev_priv->chipset == 0x42 ||
+	    dev_priv->chipset == 0x43)
+		tv_enc->pin_mask = nv42_tv_sample_load(encoder) >> 28 & 0xe;
+	else
+		tv_enc->pin_mask = nv17_dac_sample_load(encoder) >> 28 & 0xe;
 
 	switch (tv_enc->pin_mask) {
 	case 0x2:
@@ -50,7 +140,7 @@ enum drm_connector_status nv17_tv_detect(struct drm_encoder *encoder,
 		tv_enc->subconnector = DRM_MODE_SUBCONNECTOR_SVIDEO;
 		break;
 	case 0xe:
-		if (nouveau_encoder(encoder)->dcb->tvconf.has_component_output)
+		if (dcb->tvconf.has_component_output)
 			tv_enc->subconnector = DRM_MODE_SUBCONNECTOR_Component;
 		else
 			tv_enc->subconnector = DRM_MODE_SUBCONNECTOR_SCART;
@@ -61,11 +151,16 @@ enum drm_connector_status nv17_tv_detect(struct drm_encoder *encoder,
 	}
 
 	drm_connector_property_set_value(connector,
-			encoder->dev->mode_config.tv_subconnector_property,
-							tv_enc->subconnector);
+					 conf->tv_subconnector_property,
+					 tv_enc->subconnector);
 
-	return tv_enc->subconnector ? connector_status_connected :
-					connector_status_disconnected;
+	if (tv_enc->subconnector) {
+		NV_INFO(dev, "Load detected on output %c\n",
+			'@' + ffs(dcb->or));
+		return connector_status_connected;
+	} else {
+		return connector_status_disconnected;
+	}
 }
 
 static const struct {
@@ -633,7 +728,7 @@ static struct drm_encoder_helper_funcs nv17_tv_helper_funcs = {
 	.prepare = nv17_tv_prepare,
 	.commit = nv17_tv_commit,
 	.mode_set = nv17_tv_mode_set,
-	.detect = nv17_dac_detect,
+	.detect = nv17_tv_detect,
 };
 
 static struct drm_encoder_slave_funcs nv17_tv_slave_funcs = {
diff --git a/drivers/gpu/drm/nouveau/nv20_graph.c b/drivers/gpu/drm/nouveau/nv20_graph.c
index 18ba74f19703..d6fc0a82f03d 100644
--- a/drivers/gpu/drm/nouveau/nv20_graph.c
+++ b/drivers/gpu/drm/nouveau/nv20_graph.c
@@ -514,6 +514,27 @@ nv20_graph_rdi(struct drm_device *dev)
 	nouveau_wait_for_idle(dev);
 }
 
+void
+nv20_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			     uint32_t size, uint32_t pitch)
+{
+	uint32_t limit = max(1u, addr + size) - 1;
+
+	if (pitch)
+		addr |= 1;
+
+	nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+	nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+	nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+
+	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + 4 * i);
+	nv_wr32(dev, NV10_PGRAPH_RDI_DATA, limit);
+	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + 4 * i);
+	nv_wr32(dev, NV10_PGRAPH_RDI_DATA, pitch);
+	nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + 4 * i);
+	nv_wr32(dev, NV10_PGRAPH_RDI_DATA, addr);
+}
+
 int
 nv20_graph_init(struct drm_device *dev)
 {
@@ -572,27 +593,10 @@ nv20_graph_init(struct drm_device *dev)
 		nv_wr32(dev, NV10_PGRAPH_RDI_DATA , 0x00000030);
 	}
 
-	/* copy tile info from PFB */
-	for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
-		nv_wr32(dev, 0x00400904 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TLIMIT(i)));
-			/* which is NV40_PGRAPH_TLIMIT0(i) ?? */
-		nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0030 + i * 4);
-		nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
-					nv_rd32(dev, NV10_PFB_TLIMIT(i)));
-		nv_wr32(dev, 0x00400908 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TSIZE(i)));
-			/* which is NV40_PGRAPH_TSIZE0(i) ?? */
-		nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0050 + i * 4);
-		nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
-					nv_rd32(dev, NV10_PFB_TSIZE(i)));
-		nv_wr32(dev, 0x00400900 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TILE(i)));
-			/* which is NV40_PGRAPH_TILE0(i) ?? */
-		nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0010 + i * 4);
-		nv_wr32(dev, NV10_PGRAPH_RDI_DATA,
-					nv_rd32(dev, NV10_PFB_TILE(i)));
-	}
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+		nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
+
 	for (i = 0; i < 8; i++) {
 		nv_wr32(dev, 0x400980 + i * 4, nv_rd32(dev, 0x100300 + i * 4));
 		nv_wr32(dev, NV10_PGRAPH_RDI_INDEX, 0x00EA0090 + i * 4);
@@ -704,18 +708,9 @@ nv30_graph_init(struct drm_device *dev)
 
 	nv_wr32(dev, 0x4000c0, 0x00000016);
 
-	/* copy tile info from PFB */
-	for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
-		nv_wr32(dev, 0x00400904 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TLIMIT(i)));
-			/* which is NV40_PGRAPH_TLIMIT0(i) ?? */
-		nv_wr32(dev, 0x00400908 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TSIZE(i)));
-			/* which is NV40_PGRAPH_TSIZE0(i) ?? */
-		nv_wr32(dev, 0x00400900 + i * 0x10,
-					nv_rd32(dev, NV10_PFB_TILE(i)));
-			/* which is NV40_PGRAPH_TILE0(i) ?? */
-	}
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < NV10_PFB_TILE__SIZE; i++)
+		nv20_graph_set_region_tiling(dev, i, 0, 0, 0);
 
 	nv_wr32(dev, NV10_PGRAPH_CTX_CONTROL, 0x10000100);
 	nv_wr32(dev, NV10_PGRAPH_STATE      , 0xFFFFFFFF);
diff --git a/drivers/gpu/drm/nouveau/nv40_fb.c b/drivers/gpu/drm/nouveau/nv40_fb.c
index ca1d27107a8e..3cd07d8d5bd7 100644
--- a/drivers/gpu/drm/nouveau/nv40_fb.c
+++ b/drivers/gpu/drm/nouveau/nv40_fb.c
@@ -3,12 +3,37 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 
+void
+nv40_fb_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			  uint32_t size, uint32_t pitch)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t limit = max(1u, addr + size) - 1;
+
+	if (pitch)
+		addr |= 1;
+
+	switch (dev_priv->chipset) {
+	case 0x40:
+		nv_wr32(dev, NV10_PFB_TLIMIT(i), limit);
+		nv_wr32(dev, NV10_PFB_TSIZE(i), pitch);
+		nv_wr32(dev, NV10_PFB_TILE(i), addr);
+		break;
+
+	default:
+		nv_wr32(dev, NV40_PFB_TLIMIT(i), limit);
+		nv_wr32(dev, NV40_PFB_TSIZE(i), pitch);
+		nv_wr32(dev, NV40_PFB_TILE(i), addr);
+		break;
+	}
+}
+
 int
 nv40_fb_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
-	uint32_t fb_bar_size, tmp;
-	int num_tiles;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	uint32_t tmp;
 	int i;
 
 	/* This is strictly a NV4x register (don't know about NV5x). */
@@ -23,35 +48,23 @@ nv40_fb_init(struct drm_device *dev)
 	case 0x45:
 		tmp = nv_rd32(dev, NV10_PFB_CLOSE_PAGE2);
 		nv_wr32(dev, NV10_PFB_CLOSE_PAGE2, tmp & ~(1 << 15));
-		num_tiles = NV10_PFB_TILE__SIZE;
+		pfb->num_tiles = NV10_PFB_TILE__SIZE;
 		break;
 	case 0x46: /* G72 */
 	case 0x47: /* G70 */
 	case 0x49: /* G71 */
 	case 0x4b: /* G73 */
 	case 0x4c: /* C51 (G7X version) */
-		num_tiles = NV40_PFB_TILE__SIZE_1;
+		pfb->num_tiles = NV40_PFB_TILE__SIZE_1;
 		break;
 	default:
-		num_tiles = NV40_PFB_TILE__SIZE_0;
+		pfb->num_tiles = NV40_PFB_TILE__SIZE_0;
 		break;
 	}
 
-	fb_bar_size = drm_get_resource_len(dev, 0) - 1;
-	switch (dev_priv->chipset) {
-	case 0x40:
-		for (i = 0; i < num_tiles; i++) {
-			nv_wr32(dev, NV10_PFB_TILE(i), 0);
-			nv_wr32(dev, NV10_PFB_TLIMIT(i), fb_bar_size);
-		}
-		break;
-	default:
-		for (i = 0; i < num_tiles; i++) {
-			nv_wr32(dev, NV40_PFB_TILE(i), 0);
-			nv_wr32(dev, NV40_PFB_TLIMIT(i), fb_bar_size);
-		}
-		break;
-	}
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < pfb->num_tiles; i++)
+		pfb->set_region_tiling(dev, i, 0, 0, 0);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv40_graph.c b/drivers/gpu/drm/nouveau/nv40_graph.c
index 2b332bb55acf..53e8afe1dcd1 100644
--- a/drivers/gpu/drm/nouveau/nv40_graph.c
+++ b/drivers/gpu/drm/nouveau/nv40_graph.c
@@ -181,6 +181,48 @@ nv40_graph_unload_context(struct drm_device *dev)
 	return ret;
 }
 
+void
+nv40_graph_set_region_tiling(struct drm_device *dev, int i, uint32_t addr,
+			     uint32_t size, uint32_t pitch)
+{
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	uint32_t limit = max(1u, addr + size) - 1;
+
+	if (pitch)
+		addr |= 1;
+
+	switch (dev_priv->chipset) {
+	case 0x44:
+	case 0x4a:
+	case 0x4e:
+		nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+		nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+		nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+		break;
+
+	case 0x46:
+	case 0x47:
+	case 0x49:
+	case 0x4b:
+		nv_wr32(dev, NV47_PGRAPH_TSIZE(i), pitch);
+		nv_wr32(dev, NV47_PGRAPH_TLIMIT(i), limit);
+		nv_wr32(dev, NV47_PGRAPH_TILE(i), addr);
+		nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+		nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+		nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+		break;
+
+	default:
+		nv_wr32(dev, NV20_PGRAPH_TSIZE(i), pitch);
+		nv_wr32(dev, NV20_PGRAPH_TLIMIT(i), limit);
+		nv_wr32(dev, NV20_PGRAPH_TILE(i), addr);
+		nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), pitch);
+		nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), limit);
+		nv_wr32(dev, NV40_PGRAPH_TILE1(i), addr);
+		break;
+	}
+}
+
 /*
  * G70		0x47
  * G71		0x49
@@ -195,7 +237,8 @@ nv40_graph_init(struct drm_device *dev)
 {
 	struct drm_nouveau_private *dev_priv =
 		(struct drm_nouveau_private *)dev->dev_private;
-	uint32_t vramsz, tmp;
+	struct nouveau_fb_engine *pfb = &dev_priv->engine.fb;
+	uint32_t vramsz;
 	int i, j;
 
 	nv_wr32(dev, NV03_PMC_ENABLE, nv_rd32(dev, NV03_PMC_ENABLE) &
@@ -292,74 +335,9 @@ nv40_graph_init(struct drm_device *dev)
 	nv_wr32(dev, 0x400b38, 0x2ffff800);
 	nv_wr32(dev, 0x400b3c, 0x00006000);
 
-	/* copy tile info from PFB */
-	switch (dev_priv->chipset) {
-	case 0x40: /* vanilla NV40 */
-		for (i = 0; i < NV10_PFB_TILE__SIZE; i++) {
-			tmp = nv_rd32(dev, NV10_PFB_TILE(i));
-			nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
-			tmp = nv_rd32(dev, NV10_PFB_TLIMIT(i));
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
-			tmp = nv_rd32(dev, NV10_PFB_TSIZE(i));
-			nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
-			tmp = nv_rd32(dev, NV10_PFB_TSTATUS(i));
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
-		}
-		break;
-	case 0x44:
-	case 0x4a:
-	case 0x4e: /* NV44-based cores don't have 0x406900? */
-		for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
-			tmp = nv_rd32(dev, NV40_PFB_TILE(i));
-			nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
-			nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
-		}
-		break;
-	case 0x46:
-	case 0x47:
-	case 0x49:
-	case 0x4b: /* G7X-based cores */
-		for (i = 0; i < NV40_PFB_TILE__SIZE_1; i++) {
-			tmp = nv_rd32(dev, NV40_PFB_TILE(i));
-			nv_wr32(dev, NV47_PGRAPH_TILE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
-			nv_wr32(dev, NV47_PGRAPH_TLIMIT0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
-			nv_wr32(dev, NV47_PGRAPH_TSIZE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
-			nv_wr32(dev, NV47_PGRAPH_TSTATUS0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
-		}
-		break;
-	default: /* everything else */
-		for (i = 0; i < NV40_PFB_TILE__SIZE_0; i++) {
-			tmp = nv_rd32(dev, NV40_PFB_TILE(i));
-			nv_wr32(dev, NV40_PGRAPH_TILE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TILE1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TLIMIT(i));
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TLIMIT1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSIZE(i));
-			nv_wr32(dev, NV40_PGRAPH_TSIZE0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSIZE1(i), tmp);
-			tmp = nv_rd32(dev, NV40_PFB_TSTATUS(i));
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS0(i), tmp);
-			nv_wr32(dev, NV40_PGRAPH_TSTATUS1(i), tmp);
-		}
-		break;
-	}
+	/* Turn all the tiling regions off. */
+	for (i = 0; i < pfb->num_tiles; i++)
+		nv40_graph_set_region_tiling(dev, i, 0, 0, 0);
 
 	/* begin RAM config */
 	vramsz = drm_get_resource_len(dev, 0) - 1;
diff --git a/drivers/gpu/drm/nouveau/nv50_crtc.c b/drivers/gpu/drm/nouveau/nv50_crtc.c
index 118d3285fd8c..40b7360841f8 100644
--- a/drivers/gpu/drm/nouveau/nv50_crtc.c
+++ b/drivers/gpu/drm/nouveau/nv50_crtc.c
@@ -432,6 +432,7 @@ nv50_crtc_prepare(struct drm_crtc *crtc)
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct drm_device *dev = crtc->dev;
 	struct drm_encoder *encoder;
+	uint32_t dac = 0, sor = 0;
 
 	NV_DEBUG_KMS(dev, "index %d\n", nv_crtc->index);
 
@@ -439,9 +440,28 @@ nv50_crtc_prepare(struct drm_crtc *crtc)
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 
-		if (drm_helper_encoder_in_use(encoder))
+		if (!drm_helper_encoder_in_use(encoder))
 			continue;
 
+		if (nv_encoder->dcb->type == OUTPUT_ANALOG ||
+		    nv_encoder->dcb->type == OUTPUT_TV)
+			dac |= (1 << nv_encoder->or);
+		else
+			sor |= (1 << nv_encoder->or);
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+
+		if (nv_encoder->dcb->type == OUTPUT_ANALOG ||
+		    nv_encoder->dcb->type == OUTPUT_TV) {
+			if (dac & (1 << nv_encoder->or))
+				continue;
+		} else {
+			if (sor & (1 << nv_encoder->or))
+				continue;
+		}
+
 		nv_encoder->disconnect(nv_encoder);
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index a9263d92a231..90f0bf59fbcd 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -690,9 +690,21 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcbent,
 			   int pxclk)
 {
 	struct drm_nouveau_private *dev_priv = dev->dev_private;
+	struct nouveau_connector *nv_connector = NULL;
+	struct drm_encoder *encoder;
 	struct nvbios *bios = &dev_priv->VBIOS;
 	uint32_t mc, script = 0, or;
 
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+
+		if (nv_encoder->dcb != dcbent)
+			continue;
+
+		nv_connector = nouveau_encoder_connector_get(nv_encoder);
+		break;
+	}
+
 	or = ffs(dcbent->or) - 1;
 	mc = nv50_display_mode_ctrl(dev, dcbent->type != OUTPUT_ANALOG, or);
 	switch (dcbent->type) {
@@ -711,6 +723,11 @@ nv50_display_script_select(struct drm_device *dev, struct dcb_entry *dcbent,
 			} else
 			if (bios->fp.strapless_is_24bit & 1)
 				script |= 0x0200;
+
+			if (nv_connector && nv_connector->edid &&
+			    (nv_connector->edid->revision >= 4) &&
+			    (nv_connector->edid->input & 0x70) >= 0x20)
+				script |= 0x0200;
 		}
 
 		if (nouveau_uscript_lvds >= 0) {
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index 6bcc6d39e9b0..e4f279ee61cf 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -16,9 +16,7 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) &&
 	     RING_SPACE(chan, rect->rop == ROP_COPY ? 7 : 11)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -31,7 +29,11 @@ nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 		OUT_RING(chan, 1);
 	}
 	BEGIN_RING(chan, NvSub2D, 0x0588, 1);
-	OUT_RING(chan, rect->color);
+	if (info->fix.visual == FB_VISUAL_TRUECOLOR ||
+	    info->fix.visual == FB_VISUAL_DIRECTCOLOR)
+		OUT_RING(chan, ((uint32_t *)info->pseudo_palette)[rect->color]);
+	else
+		OUT_RING(chan, rect->color);
 	BEGIN_RING(chan, NvSub2D, 0x0600, 4);
 	OUT_RING(chan, rect->dx);
 	OUT_RING(chan, rect->dy);
@@ -56,9 +58,7 @@ nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 		return;
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 12)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -101,8 +101,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 	}
 
 	if (!(info->flags & FBINFO_HWACCEL_DISABLED) && RING_SPACE(chan, 11)) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
-		info->flags |= FBINFO_HWACCEL_DISABLED;
+		nouveau_fbcon_gpu_lockup(info);
 	}
 
 	if (info->flags & FBINFO_HWACCEL_DISABLED) {
@@ -135,9 +134,7 @@ nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 		int push = dwords > 2047 ? 2047 : dwords;
 
 		if (RING_SPACE(chan, push + 1)) {
-			NV_ERROR(dev,
-				 "GPU lockup - switching to software fbcon\n");
-			info->flags |= FBINFO_HWACCEL_DISABLED;
+			nouveau_fbcon_gpu_lockup(info);
 			cfb_imageblit(info, image);
 			return;
 		}
@@ -199,7 +196,7 @@ nv50_fbcon_accel_init(struct fb_info *info)
 
 	ret = RING_SPACE(chan, 59);
 	if (ret) {
-		NV_ERROR(dev, "GPU lockup - switching to software fbcon\n");
+		nouveau_fbcon_gpu_lockup(info);
 		return ret;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nv50_fifo.c b/drivers/gpu/drm/nouveau/nv50_fifo.c
index b7282284f080..32b244bcb482 100644
--- a/drivers/gpu/drm/nouveau/nv50_fifo.c
+++ b/drivers/gpu/drm/nouveau/nv50_fifo.c
@@ -272,7 +272,7 @@ nv50_fifo_create_context(struct nouveau_channel *chan)
 			return ret;
 		ramfc = chan->ramfc->gpuobj;
 
-		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 4096, 256,
+		ret = nouveau_gpuobj_new_ref(dev, chan, NULL, 0, 4096, 1024,
 					     0, &chan->cache);
 		if (ret)
 			return ret;
@@ -384,8 +384,8 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
 		nv_wr32(dev, NV40_PFIFO_CACHE1_DATA(ptr),
 			nv_ro32(dev, cache, (ptr * 2) + 1));
 	}
-	nv_wr32(dev, 0x3210, cnt << 2);
-	nv_wr32(dev, 0x3270, 0);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, cnt << 2);
+	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
 
 	/* guessing that all the 0x34xx regs aren't on NV50 */
 	if (!IS_G80) {
@@ -398,8 +398,6 @@ nv50_fifo_load_context(struct nouveau_channel *chan)
 
 	dev_priv->engine.instmem.finish_access(dev);
 
-	nv_wr32(dev, NV03_PFIFO_CACHE1_GET, 0);
-	nv_wr32(dev, NV03_PFIFO_CACHE1_PUT, 0);
 	nv_wr32(dev, NV03_PFIFO_CACHE1_PUSH1, chan->id | (1<<16));
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv50_graph.c b/drivers/gpu/drm/nouveau/nv50_graph.c
index ca79f32be44c..20319e59d368 100644
--- a/drivers/gpu/drm/nouveau/nv50_graph.c
+++ b/drivers/gpu/drm/nouveau/nv50_graph.c
@@ -84,7 +84,7 @@ nv50_graph_init_regs__nv(struct drm_device *dev)
 	nv_wr32(dev, 0x400804, 0xc0000000);
 	nv_wr32(dev, 0x406800, 0xc0000000);
 	nv_wr32(dev, 0x400c04, 0xc0000000);
-	nv_wr32(dev, 0x401804, 0xc0000000);
+	nv_wr32(dev, 0x401800, 0xc0000000);
 	nv_wr32(dev, 0x405018, 0xc0000000);
 	nv_wr32(dev, 0x402000, 0xc0000000);
 
@@ -282,6 +282,7 @@ nv50_graph_unload_context(struct drm_device *dev)
 		return 0;
 	inst &= NV50_PGRAPH_CTXCTL_CUR_INSTANCE;
 
+	nouveau_wait_for_idle(dev);
 	nv_wr32(dev, 0x400500, fifo & ~1);
 	nv_wr32(dev, 0x400784, inst);
 	nv_wr32(dev, 0x400824, nv_rd32(dev, 0x400824) | 0x20);
diff --git a/drivers/gpu/drm/nouveau/nv50_sor.c b/drivers/gpu/drm/nouveau/nv50_sor.c
index e395c16d30f5..ecf1936b8224 100644
--- a/drivers/gpu/drm/nouveau/nv50_sor.c
+++ b/drivers/gpu/drm/nouveau/nv50_sor.c
@@ -90,11 +90,24 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct drm_encoder *enc;
 	uint32_t val;
 	int or = nv_encoder->or;
 
 	NV_DEBUG_KMS(dev, "or %d mode %d\n", or, mode);
 
+	nv_encoder->last_dpms = mode;
+	list_for_each_entry(enc, &dev->mode_config.encoder_list, head) {
+		struct nouveau_encoder *nvenc = nouveau_encoder(enc);
+
+		if (nvenc == nv_encoder ||
+		    nvenc->dcb->or != nv_encoder->dcb->or)
+			continue;
+
+		if (nvenc->last_dpms == DRM_MODE_DPMS_ON)
+			return;
+	}
+
 	/* wait for it to be done */
 	if (!nv_wait(NV50_PDISPLAY_SOR_DPMS_CTRL(or),
 		     NV50_PDISPLAY_SOR_DPMS_CTRL_PENDING, 0)) {