318 files changed, 10883 insertions, 5755 deletions
diff --git a/drivers/gpu/drm/nouveau/Kbuild b/drivers/gpu/drm/nouveau/Kbuild
index 2527bf4ca5d9..fde6e3656636 100644
--- a/drivers/gpu/drm/nouveau/Kbuild
+++ b/drivers/gpu/drm/nouveau/Kbuild
@@ -22,6 +22,7 @@ nouveau-$(CONFIG_DEBUG_FS) += nouveau_debugfs.o
 nouveau-y += nouveau_drm.o
 nouveau-y += nouveau_hwmon.o
 nouveau-$(CONFIG_COMPAT) += nouveau_ioc32.o
+nouveau-$(CONFIG_LEDS_CLASS) += nouveau_led.o
 nouveau-y += nouveau_nvif.o
 nouveau-$(CONFIG_NOUVEAU_PLATFORM_DRIVER) += nouveau_platform.o
 nouveau-y += nouveau_usif.o # userspace <-> nvif
diff --git a/drivers/gpu/drm/nouveau/Kconfig b/drivers/gpu/drm/nouveau/Kconfig
index 2922a82cba8e..c02a13406a81 100644
--- a/drivers/gpu/drm/nouveau/Kconfig
+++ b/drivers/gpu/drm/nouveau/Kconfig
@@ -1,6 +1,6 @@
 config DRM_NOUVEAU
 	tristate "Nouveau (NVIDIA) cards"
-	depends on DRM && PCI
+	depends on DRM && PCI && MMU
         select FW_LOADER
 	select DRM_KMS_HELPER
 	select DRM_TTM
@@ -16,6 +16,7 @@ config DRM_NOUVEAU
 	select INPUT if ACPI && X86
 	select THERMAL if ACPI && X86
 	select ACPI_VIDEO if ACPI && X86
+	select DRM_VM
 	help
 	  Choose this option for open-source NVIDIA support.
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/arb.c b/drivers/gpu/drm/nouveau/dispnv04/arb.c
index a555681c3096..90075b676256 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/arb.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/arb.c
@@ -198,7 +198,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		int *burst, int *lwm)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nv_fifo_info fifo_data;
 	struct nv_sim_state sim_data;
 	int MClk = nouveau_hw_get_clock(dev, PLL_MEMORY);
@@ -227,7 +227,7 @@ nv04_update_arb(struct drm_device *dev, int VClk, int bpp,
 		sim_data.mem_page_miss = ((cfg1 >> 4) & 0xf) + ((cfg1 >> 31) & 0x1);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		nv04_calc_arb(&fifo_data, &sim_data);
 	else
 		nv10_calc_arb(&fifo_data, &sim_data);
@@ -254,7 +254,7 @@ nouveau_calc_arb(struct drm_device *dev, int vclk, int bpp, int *burst, int *lwm
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN)
 		nv04_update_arb(dev, vclk, bpp, burst, lwm);
 	else if ((dev->pdev->device & 0xfff0) == 0x0240 /*CHIPSET_C51*/ ||
 		 (dev->pdev->device & 0xfff0) == 0x03d0 /*CHIPSET_C512*/) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/crtc.c b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
index 0cb7a18cde26..ab7b69c11d40 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/crtc.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/crtc.c
@@ -113,8 +113,8 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 {
 	struct drm_device *dev = crtc->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv04_mode_state *state = &nv04_display(dev)->mode_reg;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[nv_crtc->index];
@@ -138,7 +138,7 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	 * has yet been observed in allowing the use a single stage pll on all
 	 * nv43 however.  the behaviour of single stage use is untested on nv40
 	 */
-	if (drm->device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
+	if (drm->client.device.info.chipset > 0x40 && dot_clock <= (pll_lim.vco1.max_freq / 2))
 		memset(&pll_lim.vco2, 0, sizeof(pll_lim.vco2));
 
 
@@ -148,10 +148,10 @@ static void nv_crtc_calc_state_ext(struct drm_crtc *crtc, struct drm_display_mod
 	state->pllsel &= PLLSEL_VPLL1_MASK | PLLSEL_VPLL2_MASK | PLLSEL_TV_MASK;
 
 	/* The blob uses this always, so let's do the same */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_USE_VPLL2_TRUE;
 	/* again nv40 and some nv43 act more like nv3x as described above */
-	if (drm->device.info.chipset < 0x41)
+	if (drm->client.device.info.chipset < 0x41)
 		state->pllsel |= NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_MPLL |
 				 NV_PRAMDAC_PLL_COEFF_SELECT_SOURCE_PROG_NVPLL;
 	state->pllsel |= nv_crtc->index ? PLLSEL_VPLL2_MASK : PLLSEL_VPLL1_MASK;
@@ -270,7 +270,7 @@ nv_crtc_mode_set_vga(struct drm_crtc *crtc, struct drm_display_mode *mode)
 		horizEnd = horizTotal - 2;
 		horizBlankEnd = horizTotal + 4;
 #if 0
-		if (dev->overlayAdaptor && drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+		if (dev->overlayAdaptor && drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 			/* This reportedly works around some video overlay bandwidth problems */
 			horizTotal += 2;
 #endif
@@ -460,6 +460,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv04_crtc_reg *regp = &nv04_display(dev)->mode_reg.crtc_reg[nv_crtc->index];
 	struct nv04_crtc_reg *savep = &nv04_display(dev)->saved_reg.crtc_reg[nv_crtc->index];
+	const struct drm_framebuffer *fb = crtc->primary->fb;
 	struct drm_encoder *encoder;
 	bool lvds_output = false, tmds_output = false, tv_output = false,
 		off_chip_digital = false;
@@ -504,7 +505,7 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	regp->cursor_cfg = NV_PCRTC_CURSOR_CONFIG_CUR_LINES_64 |
 			     NV_PCRTC_CURSOR_CONFIG_CUR_PIXELS_64 |
 			     NV_PCRTC_CURSOR_CONFIG_ADDRESS_SPACE_PNVM;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_CUR_BPP_32;
 	if (mode->flags & DRM_MODE_FLAG_DBLSCAN)
 		regp->cursor_cfg |= NV_PCRTC_CURSOR_CONFIG_DOUBLE_SCAN_ENABLE;
@@ -545,47 +546,47 @@ nv_crtc_mode_set_regs(struct drm_crtc *crtc, struct drm_display_mode * mode)
 	 * 1 << 30 on 0x60.830), for no apparent reason */
 	regp->CRTC[NV_CIO_CRE_59] = off_chip_digital;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->CRTC[0x9f] = off_chip_digital ? 0x11 : 0x1;
 
 	regp->crtc_830 = mode->crtc_vdisplay - 3;
 	regp->crtc_834 = mode->crtc_vdisplay - 1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		/* This is what the blob does */
 		regp->crtc_850 = NVReadCRTC(dev, 0, NV_PCRTC_850);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		regp->gpio_ext = NVReadCRTC(dev, 0, NV_PCRTC_GPIO_EXT);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->crtc_cfg = NV10_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 	else
 		regp->crtc_cfg = NV04_PCRTC_CONFIG_START_ADDRESS_HSYNC;
 
 	/* Some misc regs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->CRTC[NV_CIO_CRE_85] = 0xFF;
 		regp->CRTC[NV_CIO_CRE_86] = 0x1;
 	}
 
-	regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] = (crtc->primary->fb->depth + 1) / 8;
+	regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] = (fb->format->depth + 1) / 8;
 	/* Enable slaved mode (called MODE_TV in nv4ref.h) */
 	if (lvds_output || tmds_output || tv_output)
 		regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] |= (1 << 7);
 
 	/* Generic PRAMDAC regs */
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		/* Only bit that bios and blob set. */
 		regp->nv10_cursync = (1 << 25);
 
 	regp->ramdac_gen_ctrl = NV_PRAMDAC_GENERAL_CONTROL_BPC_8BITS |
 				NV_PRAMDAC_GENERAL_CONTROL_VGA_STATE_SEL |
 				NV_PRAMDAC_GENERAL_CONTROL_PIXMIX_ON;
-	if (crtc->primary->fb->depth == 16)
+	if (fb->format->depth == 16)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_PIPE_LONG;
 
 	regp->ramdac_630 = 0; /* turn off green mode (tv test pattern?) */
@@ -648,7 +649,7 @@ nv_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *mode,
 
 	nv_crtc_mode_set_vga(crtc, adjusted_mode);
 	/* calculated in nv04_dfp_prepare, nv40 needs it written before calculating PLLs */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, nv04_display(dev)->mode_reg.sel_clk);
 	nv_crtc_mode_set_regs(crtc, adjusted_mode);
 	nv_crtc_calc_state_ext(crtc, mode, adjusted_mode->clock);
@@ -702,14 +703,14 @@ static void nv_crtc_prepare(struct drm_crtc *crtc)
 	if (nv_two_heads(dev))
 		NVSetOwner(dev, nv_crtc->index);
 
-	drm_vblank_pre_modeset(dev, nv_crtc->index);
+	drm_crtc_vblank_off(crtc);
 	funcs->dpms(crtc, DRM_MODE_DPMS_OFF);
 
 	NVBlankScreen(dev, nv_crtc->index, true);
 
 	/* Some more preparation. */
 	NVWriteCRTC(dev, nv_crtc->index, NV_PCRTC_CONFIG, NV_PCRTC_CONFIG_START_ADDRESS_NON_VGA);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		uint32_t reg900 = NVReadRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900);
 		NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_900, reg900 & ~0x10000);
 	}
@@ -734,7 +735,7 @@ static void nv_crtc_commit(struct drm_crtc *crtc)
 #endif
 
 	funcs->dpms(crtc, DRM_MODE_DPMS_ON);
-	drm_vblank_post_modeset(dev, nv_crtc->index);
+	drm_crtc_vblank_on(crtc);
 }
 
 static void nv_crtc_destroy(struct drm_crtc *crtc)
@@ -847,16 +848,16 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 
 	nv_crtc->fb.offset = fb->nvbo->bo.offset;
 
-	if (nv_crtc->lut.depth != drm_fb->depth) {
-		nv_crtc->lut.depth = drm_fb->depth;
+	if (nv_crtc->lut.depth != drm_fb->format->depth) {
+		nv_crtc->lut.depth = drm_fb->format->depth;
 		nv_crtc_gamma_load(crtc);
 	}
 
 	/* Update the framebuffer format. */
 	regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] &= ~3;
-	regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] |= (crtc->primary->fb->depth + 1) / 8;
+	regp->CRTC[NV_CIO_CRE_PIXEL_INDEX] |= (drm_fb->format->depth + 1) / 8;
 	regp->ramdac_gen_ctrl &= ~NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
-	if (crtc->primary->fb->depth == 16)
+	if (drm_fb->format->depth == 16)
 		regp->ramdac_gen_ctrl |= NV_PRAMDAC_GENERAL_CONTROL_ALT_MODE_SEL;
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_PIXEL_INDEX);
 	NVWriteRAMDAC(dev, nv_crtc->index, NV_PRAMDAC_GENERAL_CONTROL,
@@ -873,11 +874,11 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 
 	/* Update the framebuffer location. */
 	regp->fb_start = nv_crtc->fb.offset & ~3;
-	regp->fb_start += (y * drm_fb->pitches[0]) + (x * drm_fb->bits_per_pixel / 8);
+	regp->fb_start += (y * drm_fb->pitches[0]) + (x * drm_fb->format->cpp[0]);
 	nv_set_crtc_base(dev, nv_crtc->index, regp->fb_start);
 
 	/* Update the arbitration parameters. */
-	nouveau_calc_arb(dev, crtc->mode.clock, drm_fb->bits_per_pixel,
+	nouveau_calc_arb(dev, crtc->mode.clock, drm_fb->format->cpp[0] * 8,
 			 &arb_burst, &arb_lwm);
 
 	regp->CRTC[NV_CIO_CRE_FF_INDEX] = arb_burst;
@@ -885,7 +886,7 @@ nv04_crtc_do_mode_set_base(struct drm_crtc *crtc,
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FF_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN) {
 		regp->CRTC[NV_CIO_CRE_47] = arb_lwm >> 8;
 		crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_47);
 	}
@@ -966,7 +967,7 @@ static void nv11_cursor_upload(struct drm_device *dev, struct nouveau_bo *src,
 		{
 			struct nouveau_drm *drm = nouveau_drm(dev);
 
-			if (drm->device.info.chipset == 0x11) {
+			if (drm->client.device.info.chipset == 0x11) {
 				pixel = ((pixel & 0x000000ff) << 24) |
 					((pixel & 0x0000ff00) << 8) |
 					((pixel & 0x00ff0000) >> 8) |
@@ -1007,7 +1008,7 @@ nv04_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
 	if (ret)
 		goto out;
 
-	if (drm->device.info.chipset >= 0x11)
+	if (drm->client.device.info.chipset >= 0x11)
 		nv11_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
 	else
 		nv04_cursor_upload(dev, cursor, nv_crtc->cursor.nvbo);
@@ -1123,8 +1124,9 @@ nv04_crtc_create(struct drm_device *dev, int crtc_num)
 	drm_crtc_helper_add(&nv_crtc->base, &nv04_crtc_helper_funcs);
 	drm_mode_crtc_set_gamma_size(&nv_crtc->base, 256);
 
-	ret = nouveau_bo_new(dev, 64*64*4, 0x100, TTM_PL_FLAG_VRAM,
-			     0, 0x0000, NULL, NULL, &nv_crtc->cursor.nvbo);
+	ret = nouveau_bo_new(&nouveau_drm(dev)->client, 64*64*4, 0x100,
+			     TTM_PL_FLAG_VRAM, 0, 0x0000, NULL, NULL,
+			     &nv_crtc->cursor.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(nv_crtc->cursor.nvbo, TTM_PL_FLAG_VRAM, false);
 		if (!ret) {
diff --git a/drivers/gpu/drm/nouveau/dispnv04/cursor.c b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
index c83116a308a4..f26e44ea7389 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/cursor.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/cursor.c
@@ -55,7 +55,7 @@ nv04_cursor_set_offset(struct nouveau_crtc *nv_crtc, uint32_t offset)
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	crtc_wr_cio_state(crtc, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, nv_crtc->index);
 }
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dac.c b/drivers/gpu/drm/nouveau/dispnv04/dac.c
index b6cc7766e6f7..4feab0a5419d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dac.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dac.c
@@ -66,7 +66,7 @@ int nv04_dac_output_offset(struct drm_encoder *encoder)
 static int sample_load_twice(struct drm_device *dev, bool sense[2])
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int i;
 
 	for (i = 0; i < 2; i++) {
@@ -80,19 +80,19 @@ static int sample_load_twice(struct drm_device *dev, bool sense[2])
 		 * use a 10ms timeout (guards against crtc being inactive, in
 		 * which case blank state would never change)
 		 */
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (nvif_msec(&drm->device, 10,
+		if (nvif_msec(&drm->client.device, 10,
 			if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 1))
 				break;
 		) < 0)
@@ -133,7 +133,7 @@ static enum drm_connector_status nv04_dac_detect(struct drm_encoder *encoder,
 						 struct drm_connector *connector)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t saved_seq1, saved_pi, saved_rpc1, saved_cr_mode;
 	uint8_t saved_palette0[3], saved_palette_mask;
@@ -236,8 +236,8 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	uint32_t sample, testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t saved_powerctrl_2 = 0, saved_powerctrl_4 = 0, saved_routput,
@@ -288,7 +288,7 @@ uint32_t nv17_dac_sample_load(struct drm_encoder *encoder)
 	/* nv driver and nv31 use 0xfffffeee, nv34 and 6600 use 0xfffffece */
 	routput = (saved_routput & 0xfffffece) | head << 8;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE) {
 		if (dcb->type == DCB_OUTPUT_TV)
 			routput |= 0x1a << 16;
 		else
@@ -403,7 +403,7 @@ static void nv04_dac_mode_set(struct drm_encoder *encoder,
 	}
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/dfp.c b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
index c2947ef7d4fc..9805d2cdc1a1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/dfp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/dfp.c
@@ -281,7 +281,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			      struct drm_display_mode *adjusted_mode)
 {
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nv04_crtc_reg *regp = &nv04_display(dev)->mode_reg.crtc_reg[nv_crtc->index];
@@ -290,6 +290,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct drm_display_mode *output_mode = &nv_encoder->mode;
 	struct drm_connector *connector = &nv_connector->base;
+	const struct drm_framebuffer *fb = encoder->crtc->primary->fb;
 	uint32_t mode_ratio, panel_ratio;
 
 	NV_DEBUG(drm, "Output mode on CRTC %d:\n", nv_crtc->index);
@@ -415,8 +416,8 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 	/* Output property. */
 	if ((nv_connector->dithering_mode == DITHERING_MODE_ON) ||
 	    (nv_connector->dithering_mode == DITHERING_MODE_AUTO &&
-	     encoder->crtc->primary->fb->depth > connector->display_info.bpc * 3)) {
-		if (drm->device.info.chipset == 0x11)
+	     fb->format->depth > connector->display_info.bpc * 3)) {
+		if (drm->client.device.info.chipset == 0x11)
 			regp->dither = savep->dither | 0x00010000;
 		else {
 			int i;
@@ -427,7 +428,7 @@ static void nv04_dfp_mode_set(struct drm_encoder *encoder,
 			}
 		}
 	} else {
-		if (drm->device.info.chipset != 0x11) {
+		if (drm->client.device.info.chipset != 0x11) {
 			/* reset them */
 			int i;
 			for (i = 0; i < 3; i++) {
@@ -463,7 +464,7 @@ static void nv04_dfp_commit(struct drm_encoder *encoder)
 		NVReadRAMDAC(dev, head, NV_PRAMDAC_FP_TG_CONTROL);
 
 	/* This could use refinement for flatpanels, but it should work this way */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0xf0000000);
 	else
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL + nv04_dac_output_offset(encoder), 0x00100000);
@@ -485,7 +486,7 @@ static void nv04_dfp_update_backlight(struct drm_encoder *encoder, int mode)
 {
 #ifdef __powerpc__
 	struct drm_device *dev = encoder->dev;
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 
 	/* BIOS scripts usually take care of the backlight, thanks
 	 * Apple for your consistency.
@@ -623,7 +624,7 @@ static void nv04_tmds_slave_init(struct drm_encoder *encoder)
 	struct drm_device *dev = encoder->dev;
 	struct dcb_output *dcb = nouveau_encoder(encoder)->dcb;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, NVKM_I2C_BUS_PRI);
 	struct nvkm_i2c_bus_probe info[] = {
 		{
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.c b/drivers/gpu/drm/nouveau/dispnv04/disp.c
index 34c0f2f67548..5b9d549aa791 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.c
@@ -35,7 +35,7 @@ int
 nv04_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *ct;
 	struct drm_encoder *encoder;
@@ -48,7 +48,7 @@ nv04_display_create(struct drm_device *dev)
 	if (!disp)
 		return -ENOMEM;
 
-	nvif_object_map(&drm->device.object);
+	nvif_object_map(&drm->client.device.object);
 
 	nouveau_display(dev)->priv = disp;
 	nouveau_display(dev)->dtor = nv04_display_destroy;
@@ -139,7 +139,7 @@ nv04_display_destroy(struct drm_device *dev)
 	nouveau_display(dev)->priv = NULL;
 	kfree(disp);
 
-	nvif_object_unmap(&drm->device.object);
+	nvif_object_unmap(&drm->client.device.object);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/dispnv04/disp.h b/drivers/gpu/drm/nouveau/dispnv04/disp.h
index 7030307d2d48..bea4543554ba 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/disp.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/disp.h
@@ -129,7 +129,7 @@ nv_two_heads(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS && impl != 0x0100 &&
 	    impl != 0x0150 && impl != 0x01a0 && impl != 0x0200)
 		return true;
 
@@ -148,7 +148,7 @@ nv_two_reg_pll(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	const int impl = dev->pdev->device & 0x0ff0;
 
-	if (impl == 0x0310 || impl == 0x0340 || drm->device.info.family >= NV_DEVICE_INFO_V0_CURIE)
+	if (impl == 0x0310 || impl == 0x0340 || drm->client.device.info.family >= NV_DEVICE_INFO_V0_CURIE)
 		return true;
 	return false;
 }
@@ -170,7 +170,7 @@ nouveau_bios_run_init_table(struct drm_device *dev, u16 table,
 			    struct dcb_output *outp, int crtc)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios_init init = {
 		.subdev = &bios->subdev,
 		.bios = bios,
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.c b/drivers/gpu/drm/nouveau/dispnv04/hw.c
index 74856a8b8f35..b98599002831 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.c
@@ -89,7 +89,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	if (owner == 1)
 		owner *= 3;
 
-	if (drm->device.info.chipset == 0x11) {
+	if (drm->client.device.info.chipset == 0x11) {
 		/* This might seem stupid, but the blob does it and
 		 * omitting it often locks the system up.
 		 */
@@ -100,7 +100,7 @@ NVSetOwner(struct drm_device *dev, int owner)
 	/* CR44 is always changed on CRTC0 */
 	NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_44, owner);
 
-	if (drm->device.info.chipset == 0x11) {	/* set me harder */
+	if (drm->client.device.info.chipset == 0x11) {	/* set me harder */
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 		NVWriteVgaCrtc(dev, 0, NV_CIO_CRE_2E, owner);
 	}
@@ -149,7 +149,7 @@ nouveau_hw_decode_pll(struct drm_device *dev, uint32_t reg1, uint32_t pll1,
 		pllvals->NM1 = pll1 & 0xffff;
 		if (nv_two_reg_pll(dev) && pll2 & NV31_RAMDAC_ENABLE_VCO2)
 			pllvals->NM2 = pll2 & 0xffff;
-		else if (drm->device.info.chipset == 0x30 || drm->device.info.chipset == 0x35) {
+		else if (drm->client.device.info.chipset == 0x30 || drm->client.device.info.chipset == 0x35) {
 			pllvals->M1 &= 0xf; /* only 4 bits */
 			if (pll1 & NV30_RAMDAC_ENABLE_VCO2) {
 				pllvals->M2 = (pll1 >> 4) & 0x7;
@@ -165,8 +165,8 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		       struct nvkm_pll_vals *pllvals)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvif_object *device = &drm->client.device.object;
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	uint32_t reg1, pll1, pll2 = 0;
 	struct nvbios_pll pll_lim;
 	int ret;
@@ -184,7 +184,7 @@ nouveau_hw_get_pllvals(struct drm_device *dev, enum nvbios_pll_type plltype,
 		pll2 = nvif_rd32(device, reg2);
 	}
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS && reg1 >= NV_PRAMDAC_VPLL_COEFF) {
 		uint32_t ramdac580 = NVReadRAMDAC(dev, 0, NV_PRAMDAC_580);
 
 		/* check whether vpll has been forced into single stage mode */
@@ -222,6 +222,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t mpllP;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 3), 0x6c, &mpllP);
+		mpllP = (mpllP >> 8) & 0xf;
 		if (!mpllP)
 			mpllP = 4;
 
@@ -232,7 +233,7 @@ nouveau_hw_get_clock(struct drm_device *dev, enum nvbios_pll_type plltype)
 		uint32_t clock;
 
 		pci_read_config_dword(pci_get_bus_and_slot(0, 5), 0x4c, &clock);
-		return clock;
+		return clock / 1000;
 	}
 
 	ret = nouveau_hw_get_pllvals(dev, plltype, &pllvals);
@@ -252,7 +253,7 @@ nouveau_hw_fix_bad_vpll(struct drm_device *dev, int head)
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_clk *clk = nvxx_clk(device);
 	struct nvkm_bios *bios = nvxx_bios(device);
 	struct nvbios_pll pll_lim;
@@ -391,21 +392,21 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		regp->nv10_cursync = NVReadRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC);
 
 	nouveau_hw_get_pllvals(dev, head ? PLL_VPLL1 : PLL_VPLL0, &regp->pllvals);
 	state->pllsel = NVReadRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT);
 	if (nv_two_heads(dev))
 		state->sel_clk = NVReadRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		regp->dither = NVReadRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11);
 
 	regp->ramdac_gen_ctrl = NVReadRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL);
 
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_630 = NVReadRAMDAC(dev, head, NV_PRAMDAC_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		regp->ramdac_634 = NVReadRAMDAC(dev, head, NV_PRAMDAC_634);
 
 	regp->tv_setup = NVReadRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP);
@@ -447,7 +448,7 @@ nv_save_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		regp->ramdac_8c0 = NVReadRAMDAC(dev, head, NV_PRAMDAC_8C0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		regp->ramdac_a20 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A20);
 		regp->ramdac_a24 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A24);
 		regp->ramdac_a34 = NVReadRAMDAC(dev, head, NV_PRAMDAC_A34);
@@ -463,26 +464,26 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 		     struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_clk *clk = nvxx_clk(&drm->device);
+	struct nvkm_clk *clk = nvxx_clk(&drm->client.device);
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t pllreg = head ? NV_RAMDAC_VPLL2 : NV_PRAMDAC_VPLL_COEFF;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_NV10_CURSYNC, regp->nv10_cursync);
 
 	clk->pll_prog(clk, pllreg, &regp->pllvals);
 	NVWriteRAMDAC(dev, 0, NV_PRAMDAC_PLL_COEFF_SELECT, state->pllsel);
 	if (nv_two_heads(dev))
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_SEL_CLK, state->sel_clk);
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		NVWriteRAMDAC(dev, head, NV_RAMDAC_DITHER_NV11, regp->dither);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_GENERAL_CONTROL, regp->ramdac_gen_ctrl);
 
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_630, regp->ramdac_630);
-	if (drm->device.info.chipset >= 0x30)
+	if (drm->client.device.info.chipset >= 0x30)
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_634, regp->ramdac_634);
 
 	NVWriteRAMDAC(dev, head, NV_PRAMDAC_TV_SETUP, regp->tv_setup);
@@ -519,7 +520,7 @@ nv_load_state_ramdac(struct drm_device *dev, int head,
 	if (nv_gf4_disp_arch(dev))
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_8C0, regp->ramdac_8c0);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A20, regp->ramdac_a20);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A24, regp->ramdac_a24);
 		NVWriteRAMDAC(dev, head, NV_PRAMDAC_A34, regp->ramdac_a34);
@@ -600,10 +601,10 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_21);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		rd_cio_state(dev, head, regp, 0x9f);
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_49);
@@ -612,14 +613,14 @@ nv_save_state_ext(struct drm_device *dev, int head,
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		regp->crtc_830 = NVReadCRTC(dev, head, NV_PCRTC_830);
 		regp->crtc_834 = NVReadCRTC(dev, head, NV_PCRTC_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			regp->gpio_ext = NVReadCRTC(dev, head, NV_PCRTC_GPIO_EXT);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 			regp->crtc_850 = NVReadCRTC(dev, head, NV_PCRTC_850);
 
 		if (nv_two_heads(dev))
@@ -631,7 +632,7 @@ nv_save_state_ext(struct drm_device *dev, int head,
 
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	rd_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		rd_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -660,12 +661,12 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		  struct nv04_mode_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nv04_crtc_reg *regp = &state->crtc_reg[head];
 	uint32_t reg900;
 	int i;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		if (nv_two_heads(dev))
 			/* setting ENGINE_CTRL (EC) *must* come before
 			 * CIO_CRE_LCD, as writing CRE_LCD sets bits 16 & 17 in
@@ -677,20 +678,20 @@ nv_load_state_ext(struct drm_device *dev, int head,
 		nvif_wr32(device, NV_PVIDEO_INTR_EN, 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(0), 0);
 		nvif_wr32(device, NV_PVIDEO_OFFSET_BUFF(1), 0);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->device.info.ram_size - 1);
-		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_LIMIT(1), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(0), drm->client.device.info.ram_size - 1);
+		nvif_wr32(device, NV_PVIDEO_UVPLANE_LIMIT(1), drm->client.device.info.ram_size - 1);
 		nvif_wr32(device, NV_PBUS_POWERCTRL_2, 0);
 
 		NVWriteCRTC(dev, head, NV_PCRTC_CURSOR_CONFIG, regp->cursor_cfg);
 		NVWriteCRTC(dev, head, NV_PCRTC_830, regp->crtc_830);
 		NVWriteCRTC(dev, head, NV_PCRTC_834, regp->crtc_834);
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 			NVWriteCRTC(dev, head, NV_PCRTC_GPIO_EXT, regp->gpio_ext);
 
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE) {
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE) {
 			NVWriteCRTC(dev, head, NV_PCRTC_850, regp->crtc_850);
 
 			reg900 = NVReadRAMDAC(dev, head, NV_PRAMDAC_900);
@@ -713,23 +714,23 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FF_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_FFLWM__INDEX);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KELVIN)
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_47);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE)
 		wr_cio_state(dev, head, regp, 0x9f);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_49);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR0_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR1_INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_HCUR_ADDR2_INDEX);
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_ILACE__INDEX);
 
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH3__INDEX);
 	wr_cio_state(dev, head, regp, NV_CIO_CRE_SCRATCH4__INDEX);
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_EBR_INDEX);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_CSB);
 		wr_cio_state(dev, head, regp, NV_CIO_CRE_4B);
@@ -737,14 +738,14 @@ nv_load_state_ext(struct drm_device *dev, int head,
 	}
 	/* NV11 and NV20 stop at 0x52. */
 	if (nv_gf4_disp_arch(dev)) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_KELVIN) {
 			/* Not waiting for vertical retrace before modifying
 			   CRE_53/CRE_54 causes lockups. */
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if ( (nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
-			nvif_msec(&drm->device, 650,
+			nvif_msec(&drm->client.device, 650,
 				if (!(nvif_rd32(device, NV_PRMCIO_INP0__COLOR) & 8))
 					break;
 			);
@@ -770,7 +771,7 @@ static void
 nv_save_state_palette(struct drm_device *dev, int head,
 		      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -789,7 +790,7 @@ void
 nouveau_hw_load_state_palette(struct drm_device *dev, int head,
 			      struct nv04_mode_state *state)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	int head_offset = head * NV_PRMDIO_SIZE, i;
 
 	nvif_wr08(device, NV_PRMDIO_PIXEL_MASK + head_offset,
@@ -809,7 +810,7 @@ void nouveau_hw_save_state(struct drm_device *dev, int head,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		/* NB: no attempt is made to restore the bad pll later on */
 		nouveau_hw_fix_bad_vpll(dev, head);
 	nv_save_state_ramdac(dev, head, state);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/hw.h b/drivers/gpu/drm/nouveau/dispnv04/hw.h
index 3bded60c5596..3a2be47fb4f1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/hw.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/hw.h
@@ -60,7 +60,7 @@ extern void nouveau_calc_arb(struct drm_device *, int vclk, int bpp,
 static inline uint32_t NVReadCRTC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
@@ -71,7 +71,7 @@ static inline uint32_t NVReadCRTC(struct drm_device *dev,
 static inline void NVWriteCRTC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PCRTC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -80,7 +80,7 @@ static inline void NVWriteCRTC(struct drm_device *dev,
 static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint32_t val;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
@@ -91,7 +91,7 @@ static inline uint32_t NVReadRAMDAC(struct drm_device *dev,
 static inline void NVWriteRAMDAC(struct drm_device *dev,
 					int head, uint32_t reg, uint32_t val)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (head)
 		reg += NV_PRAMDAC0_SIZE;
 	nvif_wr32(device, reg, val);
@@ -120,7 +120,7 @@ static inline void nv_write_tmds(struct drm_device *dev,
 static inline void NVWriteVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	nvif_wr08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE, value);
 }
@@ -128,7 +128,7 @@ static inline void NVWriteVgaCrtc(struct drm_device *dev,
 static inline uint8_t NVReadVgaCrtc(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	nvif_wr08(device, NV_PRMCIO_CRX__COLOR + head * NV_PRMCIO_SIZE, index);
 	val = nvif_rd08(device, NV_PRMCIO_CR__COLOR + head * NV_PRMCIO_SIZE);
@@ -165,13 +165,13 @@ static inline uint8_t NVReadVgaCrtc5758(struct drm_device *dev, int head, uint8_
 static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 					int head, uint32_t reg)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	uint8_t val;
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	val = nvif_rd08(device, reg);
@@ -181,12 +181,12 @@ static inline uint8_t NVReadPRMVIO(struct drm_device *dev,
 static inline void NVWritePRMVIO(struct drm_device *dev,
 					int head, uint32_t reg, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	/* Only NV4x have two pvio ranges; other twoHeads cards MUST call
 	 * NVSetOwner for the relevant head to be programmed */
-	if (head && drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (head && drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		reg += NV_PRMVIO_SIZE;
 
 	nvif_wr08(device, reg, value);
@@ -194,14 +194,14 @@ static inline void NVWritePRMVIO(struct drm_device *dev,
 
 static inline void NVSetEnablePalette(struct drm_device *dev, int head, bool enable)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	nvif_wr08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE, enable ? 0 : 0x20);
 }
 
 static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	nvif_rd08(device, NV_PRMCIO_INP0__COLOR + head * NV_PRMCIO_SIZE);
 	return !(nvif_rd08(device, NV_PRMCIO_ARX + head * NV_PRMCIO_SIZE) & 0x20);
 }
@@ -209,7 +209,7 @@ static inline bool NVGetEnablePalette(struct drm_device *dev, int head)
 static inline void NVWriteVgaAttr(struct drm_device *dev,
 					int head, uint8_t index, uint8_t value)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
 	else
@@ -223,7 +223,7 @@ static inline void NVWriteVgaAttr(struct drm_device *dev,
 static inline uint8_t NVReadVgaAttr(struct drm_device *dev,
 					int head, uint8_t index)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	uint8_t val;
 	if (NVGetEnablePalette(dev, head))
 		index &= ~0x20;
@@ -259,10 +259,10 @@ static inline void NVVgaProtect(struct drm_device *dev, int head, bool protect)
 static inline bool
 nv_heads_tied(struct drm_device *dev)
 {
-	struct nvif_object *device = &nouveau_drm(dev)->device.object;
+	struct nvif_object *device = &nouveau_drm(dev)->client.device.object;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	if (drm->device.info.chipset == 0x11)
+	if (drm->client.device.info.chipset == 0x11)
 		return !!(nvif_rd32(device, NV_PBUS_DEBUG_1) & (1 << 28));
 
 	return NVReadVgaCrtc(dev, 0, NV_CIO_CRE_44) & 0x4;
@@ -318,7 +318,7 @@ NVLockVgaCrtcs(struct drm_device *dev, bool lock)
 	NVWriteVgaCrtc(dev, 0, NV_CIO_SR_LOCK_INDEX,
 		       lock ? NV_CIO_SR_LOCK_VALUE : NV_CIO_SR_UNLOCK_RW_VALUE);
 	/* NV11 has independently lockable extended crtcs, except when tied */
-	if (drm->device.info.chipset == 0x11 && !nv_heads_tied(dev))
+	if (drm->client.device.info.chipset == 0x11 && !nv_heads_tied(dev))
 		NVWriteVgaCrtc(dev, 1, NV_CIO_SR_LOCK_INDEX,
 			       lock ? NV_CIO_SR_LOCK_VALUE :
 				      NV_CIO_SR_UNLOCK_RW_VALUE);
@@ -335,7 +335,7 @@ static inline int nv_cursor_width(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
-	return drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
+	return drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS ? NV10_CURSOR_SIZE : NV04_CURSOR_SIZE;
 }
 
 static inline void
@@ -357,7 +357,7 @@ nv_set_crtc_base(struct drm_device *dev, int head, uint32_t offset)
 
 	NVWriteCRTC(dev, head, NV_PCRTC_START, offset);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT) {
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT) {
 		/*
 		 * Hilarious, the 24th bit doesn't want to stick to
 		 * PCRTC_START...
@@ -382,7 +382,7 @@ nv_show_cursor(struct drm_device *dev, int head, bool show)
 		*curctl1 &= ~MASK(NV_CIO_CRE_HCUR_ADDR1_ENABLE);
 	NVWriteVgaCrtc(dev, head, NV_CIO_CRE_HCUR_ADDR1_INDEX, *curctl1);
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		nv_fix_nv40_hw_cursor(dev, head);
 }
 
@@ -398,7 +398,7 @@ nv_pitch_align(struct drm_device *dev, uint32_t width, int bpp)
 		bpp = 8;
 
 	/* Alignment requirements taken from the Haiku driver */
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_TNT)
 		mask = 128 / bpp - 1;
 	else
 		mask = 512 / bpp - 1;
diff --git a/drivers/gpu/drm/nouveau/dispnv04/overlay.c b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
index ec444eac6258..5319f2a7f24d 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/overlay.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/overlay.c
@@ -33,7 +33,7 @@
 #include "nouveau_connector.h"
 #include "nouveau_display.h"
 #include "nvreg.h"
-
+#include "disp.h"
 
 struct nouveau_plane {
 	struct drm_plane base;
@@ -97,7 +97,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_w, uint32_t src_h)
 {
 	struct nouveau_drm *drm = nouveau_drm(plane->dev);
-	struct nvif_object *dev = &drm->device.object;
+	struct nvif_object *dev = &drm->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -119,7 +119,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	if (format > 0xffff)
 		return -ERANGE;
 
-	if (drm->device.info.chipset >= 0x30) {
+	if (drm->client.device.info.chipset >= 0x30) {
 		if (crtc_w < (src_w >> 1) || crtc_h < (src_h >> 1))
 			return -ERANGE;
 	} else {
@@ -145,16 +145,16 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 	nvif_wr32(dev, NV_PVIDEO_POINT_OUT(flip), crtc_y << 16 | crtc_x);
 	nvif_wr32(dev, NV_PVIDEO_SIZE_OUT(flip), crtc_h << 16 | crtc_w);
 
-	if (fb->pixel_format != DRM_FORMAT_UYVY)
+	if (fb->format->format != DRM_FORMAT_UYVY)
 		format |= NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8;
-	if (fb->pixel_format == DRM_FORMAT_NV12)
+	if (fb->format->format == DRM_FORMAT_NV12)
 		format |= NV_PVIDEO_FORMAT_PLANAR;
 	if (nv_plane->iturbt_709)
 		format |= NV_PVIDEO_FORMAT_MATRIX_ITURBT709;
 	if (nv_plane->colorkey & (1 << 24))
 		format |= NV_PVIDEO_FORMAT_DISPLAY_COLOR_KEY;
 
-	if (fb->pixel_format == DRM_FORMAT_NV12) {
+	if (fb->format->format == DRM_FORMAT_NV12) {
 		nvif_wr32(dev, NV_PVIDEO_UVPLANE_BASE(flip), 0);
 		nvif_wr32(dev, NV_PVIDEO_UVPLANE_OFFSET_BUFF(flip),
 			nv_fb->nvbo->bo.offset + fb->offsets[1]);
@@ -174,7 +174,7 @@ nv10_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv10_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -198,7 +198,7 @@ nv_destroy_plane(struct drm_plane *plane)
 static void
 nv10_set_params(struct nouveau_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->base.dev)->client.device.object;
 	u32 luma = (plane->brightness - 512) << 16 | plane->contrast;
 	u32 chroma = ((sin_mul(plane->hue, plane->saturation) & 0xffff) << 16) |
 		(cos_mul(plane->hue, plane->saturation) & 0xffff);
@@ -268,7 +268,7 @@ nv10_overlay_init(struct drm_device *device)
 	if (!plane)
 		return;
 
-	switch (drm->device.info.chipset) {
+	switch (drm->client.device.info.chipset) {
 	case 0x10:
 	case 0x11:
 	case 0x15:
@@ -347,7 +347,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 		  uint32_t src_x, uint32_t src_y,
 		  uint32_t src_w, uint32_t src_h)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
@@ -411,7 +411,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 
 	if (nv_plane->colorkey & (1 << 24))
 		overlay |= 0x10;
-	if (fb->pixel_format == DRM_FORMAT_YUYV)
+	if (fb->format->format == DRM_FORMAT_YUYV)
 		overlay |= 0x100;
 
 	nvif_wr32(dev, NV_PVIDEO_OVERLAY, overlay);
@@ -427,7 +427,7 @@ nv04_update_plane(struct drm_plane *plane, struct drm_crtc *crtc,
 static int
 nv04_disable_plane(struct drm_plane *plane)
 {
-	struct nvif_object *dev = &nouveau_drm(plane->dev)->device.object;
+	struct nvif_object *dev = &nouveau_drm(plane->dev)->client.device.object;
 	struct nouveau_plane *nv_plane =
 		container_of(plane, struct nouveau_plane, base);
 
@@ -495,7 +495,7 @@ err:
 void
 nouveau_overlay_init(struct drm_device *device)
 {
-	struct nvif_device *dev = &nouveau_drm(device)->device;
+	struct nvif_device *dev = &nouveau_drm(device)->client.device;
 	if (dev->info.chipset < 0x10)
 		nv04_overlay_init(device);
 	else if (dev->info.chipset <= 0x40)
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
index 477a8d072af4..01664357d3e1 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv04.c
@@ -54,7 +54,7 @@ static struct nvkm_i2c_bus_probe nv04_tv_encoder_info[] = {
 int nv04_tv_identify(struct drm_device *dev, int i2c_index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, i2c_index);
 	if (bus) {
 		return nvkm_i2c_bus_probe(bus, "TV encoder",
@@ -206,7 +206,7 @@ nv04_tv_create(struct drm_connector *connector, struct dcb_output *entry)
 	struct drm_encoder *encoder;
 	struct drm_device *dev = connector->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = nvkm_i2c_bus_find(i2c, entry->i2c_index);
 	int type, ret;
 
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
index 434d1e29f279..6d99f11fee4e 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.c
@@ -46,7 +46,7 @@ static uint32_t nv42_tv_sample_load(struct drm_encoder *encoder)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	uint32_t testval, regoffset = nv04_dac_output_offset(encoder);
 	uint32_t gpio0, gpio1, fp_htotal, fp_hsync_start, fp_hsync_end,
 		fp_control, test_ctrl, dacclk, ctv_14, ctv_1c, ctv_6c;
@@ -130,7 +130,7 @@ static bool
 get_tv_detect_quirks(struct drm_device *dev, uint32_t *pin_mask)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	if (device->quirk && device->quirk->tv_pin_mask) {
 		*pin_mask = device->quirk->tv_pin_mask;
@@ -154,8 +154,8 @@ nv17_tv_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 		return connector_status_disconnected;
 
 	if (reliable) {
-		if (drm->device.info.chipset == 0x42 ||
-		    drm->device.info.chipset == 0x43)
+		if (drm->client.device.info.chipset == 0x42 ||
+		    drm->client.device.info.chipset == 0x43)
 			tv_enc->pin_mask =
 				nv42_tv_sample_load(encoder) >> 28 & 0xe;
 		else
@@ -362,7 +362,7 @@ static void  nv17_tv_dpms(struct drm_encoder *encoder, int mode)
 {
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nv17_tv_state *regs = &to_tv_enc(encoder)->state;
 	struct nv17_tv_norm_params *tv_norm = get_tv_norm(encoder);
 
@@ -435,7 +435,7 @@ static void nv17_tv_prepare(struct drm_encoder *encoder)
 	/* Set the DACCLK register */
 	dacclk = (NVReadRAMDAC(dev, 0, dacclk_off) & ~0x30) | 0x1;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE)
 		dacclk |= 0x1a << 16;
 
 	if (tv_norm->kind == CTV_ENC_MODE) {
@@ -492,7 +492,7 @@ static void nv17_tv_mode_set(struct drm_encoder *encoder,
 			tv_regs->ptv_614 = 0x13;
 		}
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_RANKINE) {
 			tv_regs->ptv_500 = 0xe8e0;
 			tv_regs->ptv_504 = 0x1710;
 			tv_regs->ptv_604 = 0x0;
@@ -587,7 +587,7 @@ static void nv17_tv_commit(struct drm_encoder *encoder)
 	nv17_tv_state_load(dev, &to_tv_enc(encoder)->state);
 
 	/* This could use refinement for flatpanels, but it should work */
-	if (drm->device.info.chipset < 0x44)
+	if (drm->client.device.info.chipset < 0x44)
 		NVWriteRAMDAC(dev, 0, NV_PRAMDAC_TEST_CONTROL +
 					nv04_dac_output_offset(encoder),
 					0xf0000000);
diff --git a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
index 1b07521cde0d..29773b325bd9 100644
--- a/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
+++ b/drivers/gpu/drm/nouveau/dispnv04/tvnv17.h
@@ -130,13 +130,13 @@ void nv17_ctv_update_rescaler(struct drm_encoder *encoder);
 static inline void nv_write_ptv(struct drm_device *dev, uint32_t reg,
 				uint32_t val)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	nvif_wr32(&device->object, reg, val);
 }
 
 static inline uint32_t nv_read_ptv(struct drm_device *dev, uint32_t reg)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	return nvif_rd32(&device->object, reg);
 }
 
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h
index d15c296b5f33..ae49dfd1f97b 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl5070.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl5070.h
@@ -34,6 +34,8 @@ struct nv50_disp_mthd_v1 {
 #define NV50_DISP_MTHD_V1_SOR_HDMI_PWR                                     0x22
 #define NV50_DISP_MTHD_V1_SOR_LVDS_SCRIPT                                  0x23
 #define NV50_DISP_MTHD_V1_SOR_DP_PWR                                       0x24
+#define NV50_DISP_MTHD_V1_SOR_DP_MST_LINK                                  0x25
+#define NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI                                  0x26
 #define NV50_DISP_MTHD_V1_PIOR_PWR                                         0x30
 	__u8  method;
 	__u16 hasht;
@@ -90,6 +92,21 @@ struct nv50_disp_sor_dp_pwr_v0 {
 	__u8  pad02[6];
 };
 
+struct nv50_disp_sor_dp_mst_link_v0 {
+	__u8  version;
+	__u8  state;
+	__u8  pad02[6];
+};
+
+struct nv50_disp_sor_dp_mst_vcpi_v0 {
+	__u8  version;
+	__u8  pad01[1];
+	__u8  start_slot;
+	__u8  num_slots;
+	__u16 pbn;
+	__u16 aligned_pbn;
+};
+
 struct nv50_disp_pior_pwr_v0 {
 	__u8  version;
 	__u8  state;
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
index 05e6ef7cd190..91e33db21a2f 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826e.h
@@ -10,5 +10,5 @@ struct g82_channel_dma_v0 {
 	__u64 offset;
 };
 
-#define G82_CHANNEL_DMA_V0_NTFY_UEVENT                                     0x00
+#define NV826E_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
index cecafcb1e954..e34efd4ec537 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl826f.h
@@ -11,5 +11,5 @@ struct g82_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define G82_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                  0x00
+#define NV826F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
index 2caf0838fcfd..a2d5410a491b 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cl906f.h
@@ -10,5 +10,6 @@ struct fermi_channel_gpfifo_v0 {
 	__u64 vm;
 };
 
-#define FERMI_CHANNEL_GPFIFO_V0_NTFY_UEVENT                                0x00
+#define NV906F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NV906F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
index 46301ec018ce..2efa3d048bb9 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/cla06f.h
@@ -25,5 +25,6 @@ struct kepler_channel_gpfifo_a_v0 {
 	__u64 vm;
 };
 
-#define NVA06F_V0_NTFY_UEVENT                                              0x00
+#define NVA06F_V0_NTFY_NON_STALL_INTERRUPT                                 0x00
+#define NVA06F_V0_NTFY_KILLED                                              0x01
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/class.h b/drivers/gpu/drm/nouveau/include/nvif/class.h
index e6e9537537cf..3a2c0137d4b4 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/class.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/class.h
@@ -2,23 +2,31 @@
 #define __NVIF_CLASS_H__
 
 /* these class numbers are made up by us, and not nvidia-assigned */
-#define NVIF_CLASS_CONTROL                                    /* if0001.h */ -1
-#define NVIF_CLASS_PERFMON                                    /* if0002.h */ -2
-#define NVIF_CLASS_PERFDOM                                    /* if0003.h */ -3
-#define NVIF_CLASS_SW_NV04                                    /* if0004.h */ -4
-#define NVIF_CLASS_SW_NV10                                    /* if0005.h */ -5
-#define NVIF_CLASS_SW_NV50                                    /* if0005.h */ -6
-#define NVIF_CLASS_SW_GF100                                   /* if0005.h */ -7
+#define NVIF_CLASS_CLIENT                            /* if0000.h */ -0x00000000
+
+#define NVIF_CLASS_CONTROL                           /* if0001.h */ -0x00000001
+
+#define NVIF_CLASS_PERFMON                           /* if0002.h */ -0x00000002
+#define NVIF_CLASS_PERFDOM                           /* if0003.h */ -0x00000003
+
+#define NVIF_CLASS_SW_NV04                           /* if0004.h */ -0x00000004
+#define NVIF_CLASS_SW_NV10                           /* if0005.h */ -0x00000005
+#define NVIF_CLASS_SW_NV50                           /* if0005.h */ -0x00000006
+#define NVIF_CLASS_SW_GF100                          /* if0005.h */ -0x00000007
 
 /* the below match nvidia-assigned (either in hw, or sw) class numbers */
+#define NV_NULL_CLASS                                                0x00000030
+
 #define NV_DEVICE                                     /* cl0080.h */ 0x00000080
 
 #define NV_DMA_FROM_MEMORY                            /* cl0002.h */ 0x00000002
 #define NV_DMA_TO_MEMORY                              /* cl0002.h */ 0x00000003
 #define NV_DMA_IN_MEMORY                              /* cl0002.h */ 0x0000003d
 
+#define NV50_TWOD                                                    0x0000502d
 #define FERMI_TWOD_A                                                 0x0000902d
 
+#define NV50_MEMORY_TO_MEMORY_FORMAT                                 0x00005039
 #define FERMI_MEMORY_TO_MEMORY_FORMAT_A                              0x00009039
 
 #define KEPLER_INLINE_TO_MEMORY_A                                    0x0000a040
@@ -52,7 +60,7 @@
 #define GM107_DISP                                    /* cl5070.h */ 0x00009470
 #define GM200_DISP                                    /* cl5070.h */ 0x00009570
 #define GP100_DISP                                    /* cl5070.h */ 0x00009770
-#define GP104_DISP                                    /* cl5070.h */ 0x00009870
+#define GP102_DISP                                    /* cl5070.h */ 0x00009870
 
 #define NV31_MPEG                                                    0x00003174
 #define G82_MPEG                                                     0x00008274
@@ -90,7 +98,7 @@
 #define GM107_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000947d
 #define GM200_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000957d
 #define GP100_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000977d
-#define GP104_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
+#define GP102_DISP_CORE_CHANNEL_DMA                   /* cl507d.h */ 0x0000987d
 
 #define NV50_DISP_OVERLAY_CHANNEL_DMA                 /* cl507e.h */ 0x0000507e
 #define G82_DISP_OVERLAY_CHANNEL_DMA                  /* cl507e.h */ 0x0000827e
@@ -99,6 +107,12 @@
 #define GF110_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000907e
 #define GK104_DISP_OVERLAY_CONTROL_DMA                /* cl507e.h */ 0x0000917e
 
+#define NV50_TESLA                                                   0x00005097
+#define G82_TESLA                                                    0x00008297
+#define GT200_TESLA                                                  0x00008397
+#define GT214_TESLA                                                  0x00008597
+#define GT21A_TESLA                                                  0x00008697
+
 #define FERMI_A                                       /* cl9097.h */ 0x00009097
 #define FERMI_B                                       /* cl9097.h */ 0x00009197
 #define FERMI_C                                       /* cl9097.h */ 0x00009297
@@ -140,6 +154,8 @@
 
 #define FERMI_DECOMPRESS                                             0x000090b8
 
+#define NV50_COMPUTE                                                 0x000050c0
+#define GT214_COMPUTE                                                0x000085c0
 #define FERMI_COMPUTE_A                                              0x000090c0
 #define FERMI_COMPUTE_B                                              0x000091c0
 #define KEPLER_COMPUTE_A                                             0x0000a0c0
diff --git a/drivers/gpu/drm/nouveau/include/nvif/client.h b/drivers/gpu/drm/nouveau/include/nvif/client.h
index 4a7f6f7b836d..b52a8eadce01 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/client.h
@@ -11,8 +11,7 @@ struct nvif_client {
 	bool super;
 };
 
-int  nvif_client_init(const char *drv, const char *name, u64 device,
-		      const char *cfg, const char *dbg,
+int  nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
 		      struct nvif_client *);
 void nvif_client_fini(struct nvif_client *);
 int  nvif_client_ioctl(struct nvif_client *, void *, u32);
diff --git a/drivers/gpu/drm/nouveau/include/nvif/driver.h b/drivers/gpu/drm/nouveau/include/nvif/driver.h
index 8bd39e69229c..0c6f48d8140a 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/driver.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/driver.h
@@ -1,5 +1,7 @@
 #ifndef __NVIF_DRIVER_H__
 #define __NVIF_DRIVER_H__
+#include <nvif/os.h>
+struct nvif_client;
 
 struct nvif_driver {
 	const char *name;
@@ -14,9 +16,11 @@ struct nvif_driver {
 	bool keep;
 };
 
+int nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		     const char *name, u64 device, struct nvif_client *);
+
 extern const struct nvif_driver nvif_driver_nvkm;
 extern const struct nvif_driver nvif_driver_drm;
 extern const struct nvif_driver nvif_driver_lib;
 extern const struct nvif_driver nvif_driver_null;
-
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvif/if0000.h b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
index 85c44e8a1201..c2c0fc41e017 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/if0000.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/if0000.h
@@ -1,9 +1,16 @@
 #ifndef __NVIF_IF0000_H__
 #define __NVIF_IF0000_H__
 
-#define NV_CLIENT_DEVLIST                                                  0x00
+struct nvif_client_v0 {
+	__u8  version;
+	__u8  pad01[7];
+	__u64 device;
+	char  name[32];
+};
+
+#define NVIF_CLIENT_V0_DEVLIST                                             0x00
 
-struct nv_client_devlist_v0 {
+struct nvif_client_devlist_v0 {
 	__u8  version;
 	__u8  count;
 	__u8  pad02[6];
diff --git a/drivers/gpu/drm/nouveau/include/nvif/object.h b/drivers/gpu/drm/nouveau/include/nvif/object.h
index 8d815967767f..9e58b305b020 100644
--- a/drivers/gpu/drm/nouveau/include/nvif/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvif/object.h
@@ -66,6 +66,35 @@ void nvif_object_unmap(struct nvif_object *);
 
 #define nvif_mthd(a,b,c,d) nvif_object_mthd((a), (b), (c), (d))
 
+struct nvif_mclass {
+	s32 oclass;
+	int version;
+};
+
+#define nvif_mclass(o,m) ({                                                    \
+	struct nvif_object *object = (o);                                      \
+	struct nvif_sclass *sclass;                                            \
+	const typeof(m[0]) *mclass = (m);                                      \
+	int ret = -ENODEV;                                                     \
+	int cnt, i, j;                                                         \
+                                                                               \
+	cnt = nvif_object_sclass_get(object, &sclass);                         \
+	if (cnt >= 0) {                                                        \
+		for (i = 0; ret < 0 && mclass[i].oclass; i++) {                \
+			for (j = 0; j < cnt; j++) {                            \
+				if (mclass[i].oclass  == sclass[j].oclass &&   \
+				    mclass[i].version >= sclass[j].minver &&   \
+				    mclass[i].version <= sclass[j].maxver) {   \
+					ret = i;                               \
+					break;                                 \
+				}                                              \
+			}                                                      \
+		}                                                              \
+		nvif_object_sclass_put(&sclass);                               \
+	}                                                                      \
+	ret;                                                                   \
+})
+
 /*XXX*/
 #include <core/object.h>
 #define nvxx_object(a) ({                                                      \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
index eaf5905a87a3..e876634da10a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/client.h
@@ -1,5 +1,6 @@
 #ifndef __NVKM_CLIENT_H__
 #define __NVKM_CLIENT_H__
+#define nvkm_client(p) container_of((p), struct nvkm_client, object)
 #include <core/object.h>
 
 struct nvkm_client {
@@ -8,9 +9,8 @@ struct nvkm_client {
 	u64 device;
 	u32 debug;
 
-	struct nvkm_client_notify *notify[16];
+	struct nvkm_client_notify *notify[32];
 	struct rb_root objroot;
-	struct rb_root dmaroot;
 
 	bool super;
 	void *data;
@@ -19,15 +19,11 @@ struct nvkm_client {
 	struct nvkm_vm *vm;
 };
 
-bool nvkm_client_insert(struct nvkm_client *, struct nvkm_object *);
-void nvkm_client_remove(struct nvkm_client *, struct nvkm_object *);
-struct nvkm_object *nvkm_client_search(struct nvkm_client *, u64 object);
-
 int  nvkm_client_new(const char *name, u64 device, const char *cfg,
-		     const char *dbg, struct nvkm_client **);
-void nvkm_client_del(struct nvkm_client **);
-int  nvkm_client_init(struct nvkm_client *);
-int  nvkm_client_fini(struct nvkm_client *, bool suspend);
+		     const char *dbg,
+		     int (*)(const void *, u32, const void *, u32),
+		     struct nvkm_client **);
+struct nvkm_client *nvkm_client_search(struct nvkm_client *, u64 handle);
 
 int nvkm_client_notify_new(struct nvkm_object *, struct nvkm_event *,
 			   void *data, u32 size);
@@ -37,8 +33,8 @@ int nvkm_client_notify_put(struct nvkm_client *, int index);
 
 /* logging for client-facing objects */
 #define nvif_printk(o,l,p,f,a...) do {                                         \
-	struct nvkm_object *_object = (o);                                     \
-	struct nvkm_client *_client = _object->client;                         \
+	const struct nvkm_object *_object = (o);                               \
+	const struct nvkm_client *_client = _object->client;                   \
 	if (_client->debug >= NV_DBG_##l)                                      \
 		printk(KERN_##p "nouveau: %s:%08x:%08x: "f, _client->name,     \
 		       _object->handle, _object->oclass, ##a);                 \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
index 6bc712f32c8b..d426b86e2712 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/device.h
@@ -262,7 +262,7 @@ extern const struct nvkm_sclass nvkm_udevice_sclass;
 
 /* device logging */
 #define nvdev_printk_(d,l,p,f,a...) do {                                       \
-	struct nvkm_device *_device = (d);                                     \
+	const struct nvkm_device *_device = (d);                               \
 	if (_device->debug >= (l))                                             \
 		dev_##p(_device->dev, f, ##a);                                 \
 } while(0)
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
index 9ebfd8782366..d4cd2fbfde88 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/engine.h
@@ -20,6 +20,7 @@ struct nvkm_engine_func {
 	int (*fini)(struct nvkm_engine *, bool suspend);
 	void (*intr)(struct nvkm_engine *);
 	void (*tile)(struct nvkm_engine *, int region, struct nvkm_fb_tile *);
+	bool (*chsw_load)(struct nvkm_engine *);
 
 	struct {
 		int (*sclass)(struct nvkm_oclass *, int index,
@@ -44,4 +45,5 @@ int nvkm_engine_new_(const struct nvkm_engine_func *, struct nvkm_device *,
 struct nvkm_engine *nvkm_engine_ref(struct nvkm_engine *);
 void nvkm_engine_unref(struct nvkm_engine **);
 void nvkm_engine_tile(struct nvkm_engine *, int region);
+bool nvkm_engine_chsw_load(struct nvkm_engine *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
index 9363b839a9da..33ca6769266a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/memory.h
@@ -6,9 +6,10 @@ struct nvkm_vma;
 struct nvkm_vm;
 
 enum nvkm_memory_target {
-	NVKM_MEM_TARGET_INST,
-	NVKM_MEM_TARGET_VRAM,
-	NVKM_MEM_TARGET_HOST,
+	NVKM_MEM_TARGET_INST, /* instance memory */
+	NVKM_MEM_TARGET_VRAM, /* video memory */
+	NVKM_MEM_TARGET_HOST, /* coherent system memory */
+	NVKM_MEM_TARGET_NCOH, /* non-coherent system memory */
 };
 
 struct nvkm_memory {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
index d92fd41e4056..7bd4897a8a2a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/mm.h
@@ -5,7 +5,7 @@
 struct nvkm_mm_node {
 	struct list_head nl_entry;
 	struct list_head fl_entry;
-	struct list_head rl_entry;
+	struct nvkm_mm_node *next;
 
 #define NVKM_MM_HEAP_ANY 0x00
 	u8  heap;
@@ -38,4 +38,10 @@ int  nvkm_mm_tail(struct nvkm_mm *, u8 heap, u8 type, u32 size_max,
 		  u32 size_min, u32 align, struct nvkm_mm_node **);
 void nvkm_mm_free(struct nvkm_mm *, struct nvkm_mm_node **);
 void nvkm_mm_dump(struct nvkm_mm *, const char *);
+
+static inline bool
+nvkm_mm_contiguous(struct nvkm_mm_node *node)
+{
+	return !node->next;
+}
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
index dcd048b91fac..96dda350ada3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/object.h
@@ -62,6 +62,11 @@ int nvkm_object_wr32(struct nvkm_object *, u64 addr, u32  data);
 int nvkm_object_bind(struct nvkm_object *, struct nvkm_gpuobj *, int align,
 		     struct nvkm_gpuobj **);
 
+bool nvkm_object_insert(struct nvkm_object *);
+void nvkm_object_remove(struct nvkm_object *);
+struct nvkm_object *nvkm_object_search(struct nvkm_client *, u64 object,
+				       const struct nvkm_object_func *);
+
 struct nvkm_sclass {
 	int minver;
 	int maxver;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
index 57adefa8b08e..ca9ed3d68f44 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/core/subdev.h
@@ -32,7 +32,7 @@ void nvkm_subdev_intr(struct nvkm_subdev *);
 
 /* subdev logging */
 #define nvkm_printk_(s,l,p,f,a...) do {                                        \
-	struct nvkm_subdev *_subdev = (s);                                     \
+	const struct nvkm_subdev *_subdev = (s);                               \
 	if (_subdev->debug >= (l)) {                                           \
 		dev_##p(_subdev->device->dev, "%s: "f,                         \
 			nvkm_subdev_name[_subdev->index], ##a);                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
index d3d26a1e215d..b93f4c1a95e5 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/ce.h
@@ -8,5 +8,5 @@ int gk104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm107_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gm200_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 int gp100_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
-int gp104_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
+int gp102_ce_new(struct nvkm_device *, int, struct nvkm_engine **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
index e82049667ce4..970ae753968a 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/disp.h
@@ -33,5 +33,5 @@ int gk110_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm107_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gm200_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 int gp100_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
-int gp104_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
+int gp102_disp_new(struct nvkm_device *, int, struct nvkm_disp **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
index 114bfb737a81..d2a6532ce3b9 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/dma.h
@@ -12,9 +12,6 @@ struct nvkm_dmaobj {
 	u32 access;
 	u64 start;
 	u64 limit;
-
-	struct rb_node rb;
-	u64 handle; /*XXX HANDLE MERGE */
 };
 
 struct nvkm_dma {
@@ -22,8 +19,7 @@ struct nvkm_dma {
 	struct nvkm_engine engine;
 };
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *, struct nvkm_client *, u64 object);
+struct nvkm_dmaobj *nvkm_dmaobj_search(struct nvkm_client *, u64 object);
 
 int nv04_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
 int nv50_dma_new(struct nvkm_device *, int, struct nvkm_dma **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
index e6baf039c269..7e498e65b1e8 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/falcon.h
@@ -4,13 +4,26 @@
 #include <core/engine.h>
 struct nvkm_fifo_chan;
 
+enum nvkm_falcon_dmaidx {
+	FALCON_DMAIDX_UCODE		= 0,
+	FALCON_DMAIDX_VIRT		= 1,
+	FALCON_DMAIDX_PHYS_VID		= 2,
+	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
+	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
+};
+
 struct nvkm_falcon {
 	const struct nvkm_falcon_func *func;
-	struct nvkm_engine engine;
-
+	const struct nvkm_subdev *owner;
+	const char *name;
 	u32 addr;
-	u8  version;
-	u8  secret;
+
+	struct mutex mutex;
+	const struct nvkm_subdev *user;
+
+	u8 version;
+	u8 secret;
+	bool debug;
 
 	struct nvkm_memory *core;
 	bool external;
@@ -19,15 +32,25 @@ struct nvkm_falcon {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} code;
 
 	struct {
 		u32 limit;
 		u32 *data;
 		u32  size;
+		u8 ports;
 	} data;
+
+	struct nvkm_engine engine;
 };
 
+int nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		       struct nvkm_falcon **);
+void nvkm_falcon_del(struct nvkm_falcon **);
+int nvkm_falcon_get(struct nvkm_falcon *, const struct nvkm_subdev *);
+void nvkm_falcon_put(struct nvkm_falcon *, const struct nvkm_subdev *);
+
 int nvkm_falcon_new_(const struct nvkm_falcon_func *, struct nvkm_device *,
 		     int index, bool enable, u32 addr, struct nvkm_engine **);
 
@@ -42,6 +65,51 @@ struct nvkm_falcon_func {
 	} data;
 	void (*init)(struct nvkm_falcon *);
 	void (*intr)(struct nvkm_falcon *, struct nvkm_fifo_chan *);
+	void (*load_imem)(struct nvkm_falcon *, void *, u32, u32, u16, u8, bool);
+	void (*load_dmem)(struct nvkm_falcon *, void *, u32, u32, u8);
+	void (*read_dmem)(struct nvkm_falcon *, u32, u32, u8, void *);
+	void (*bind_context)(struct nvkm_falcon *, struct nvkm_gpuobj *);
+	int (*wait_for_halt)(struct nvkm_falcon *, u32);
+	int (*clear_interrupt)(struct nvkm_falcon *, u32);
+	void (*set_start_addr)(struct nvkm_falcon *, u32 start_addr);
+	void (*start)(struct nvkm_falcon *);
+	int (*enable)(struct nvkm_falcon *falcon);
+	void (*disable)(struct nvkm_falcon *falcon);
+
 	struct nvkm_sclass sclass[];
 };
+
+static inline u32
+nvkm_falcon_rd32(struct nvkm_falcon *falcon, u32 addr)
+{
+	return nvkm_rd32(falcon->owner->device, falcon->addr + addr);
+}
+
+static inline void
+nvkm_falcon_wr32(struct nvkm_falcon *falcon, u32 addr, u32 data)
+{
+	nvkm_wr32(falcon->owner->device, falcon->addr + addr, data);
+}
+
+static inline u32
+nvkm_falcon_mask(struct nvkm_falcon *falcon, u32 addr, u32 mask, u32 val)
+{
+	struct nvkm_device *device = falcon->owner->device;
+
+	return nvkm_mask(device, falcon->addr + addr, mask, val);
+}
+
+void nvkm_falcon_load_imem(struct nvkm_falcon *, void *, u32, u32, u16, u8,
+			   bool);
+void nvkm_falcon_load_dmem(struct nvkm_falcon *, void *, u32, u32, u8);
+void nvkm_falcon_read_dmem(struct nvkm_falcon *, u32, u32, u8, void *);
+void nvkm_falcon_bind_context(struct nvkm_falcon *, struct nvkm_gpuobj *);
+void nvkm_falcon_set_start_addr(struct nvkm_falcon *, u32);
+void nvkm_falcon_start(struct nvkm_falcon *);
+int nvkm_falcon_wait_for_halt(struct nvkm_falcon *, u32);
+int nvkm_falcon_clear_interrupt(struct nvkm_falcon *, u32);
+int nvkm_falcon_enable(struct nvkm_falcon *);
+void nvkm_falcon_disable(struct nvkm_falcon *);
+int nvkm_falcon_reset(struct nvkm_falcon *);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
index ed92fec5292c..24efa900d8ca 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/engine/fifo.h
@@ -40,6 +40,7 @@ struct nvkm_fifo {
 
 	struct nvkm_event uevent; /* async user trigger */
 	struct nvkm_event cevent; /* channel creation event */
+	struct nvkm_event kevent; /* channel killed */
 };
 
 void nvkm_fifo_pause(struct nvkm_fifo *, unsigned long *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h
index 934b0ae5521d..2ff64a20c0ec 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/boost.h
@@ -1,6 +1,6 @@
 #ifndef __NVBIOS_BOOST_H__
 #define __NVBIOS_BOOST_H__
-u16 nvbios_boostTe(struct nvkm_bios *, u8 *, u8 *, u8 *, u8 *, u8 *, u8 *);
+u32 nvbios_boostTe(struct nvkm_bios *, u8 *, u8 *, u8 *, u8 *, u8 *, u8 *);
 
 struct nvbios_boostE {
 	u8  pstate;
@@ -8,10 +8,10 @@ struct nvbios_boostE {
 	u32 max;
 };
 
-u16 nvbios_boostEe(struct nvkm_bios *, int idx, u8 *, u8 *, u8 *, u8 *);
-u16 nvbios_boostEp(struct nvkm_bios *, int idx, u8 *, u8 *, u8 *, u8 *,
+u32 nvbios_boostEe(struct nvkm_bios *, int idx, u8 *, u8 *, u8 *, u8 *);
+u32 nvbios_boostEp(struct nvkm_bios *, int idx, u8 *, u8 *, u8 *, u8 *,
 		   struct nvbios_boostE *);
-u16 nvbios_boostEm(struct nvkm_bios *, u8, u8 *, u8 *, u8 *, u8 *,
+u32 nvbios_boostEm(struct nvkm_bios *, u8, u8 *, u8 *, u8 *, u8 *,
 		   struct nvbios_boostE *);
 
 struct nvbios_boostS {
@@ -21,7 +21,7 @@ struct nvbios_boostS {
 	u32 max;
 };
 
-u16 nvbios_boostSe(struct nvkm_bios *, int, u16, u8 *, u8 *, u8, u8);
-u16 nvbios_boostSp(struct nvkm_bios *, int, u16, u8 *, u8 *, u8, u8,
+u32 nvbios_boostSe(struct nvkm_bios *, int, u32, u8 *, u8 *, u8, u8);
+u32 nvbios_boostSp(struct nvkm_bios *, int, u32, u8 *, u8 *, u8, u8,
 		   struct nvbios_boostS *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h
index 2f0e0c8e83be..76fe7d50a1ce 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/cstep.h
@@ -1,6 +1,6 @@
 #ifndef __NVBIOS_CSTEP_H__
 #define __NVBIOS_CSTEP_H__
-u16 nvbios_cstepTe(struct nvkm_bios *,
+u32 nvbios_cstepTe(struct nvkm_bios *,
 		   u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *xnr, u8 *xsz);
 
 struct nvbios_cstepE {
@@ -8,10 +8,10 @@ struct nvbios_cstepE {
 	u8  index;
 };
 
-u16 nvbios_cstepEe(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr);
-u16 nvbios_cstepEp(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr,
+u32 nvbios_cstepEe(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr);
+u32 nvbios_cstepEp(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr,
 		   struct nvbios_cstepE *);
-u16 nvbios_cstepEm(struct nvkm_bios *, u8 pstate, u8 *ver, u8 *hdr,
+u32 nvbios_cstepEm(struct nvkm_bios *, u8 pstate, u8 *ver, u8 *hdr,
 		   struct nvbios_cstepE *);
 
 struct nvbios_cstepX {
@@ -20,7 +20,7 @@ struct nvbios_cstepX {
 	u8  voltage;
 };
 
-u16 nvbios_cstepXe(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr);
-u16 nvbios_cstepXp(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr,
+u32 nvbios_cstepXe(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr);
+u32 nvbios_cstepXp(struct nvkm_bios *, int idx, u8 *ver, u8 *hdr,
 		   struct nvbios_cstepX *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h
index 693ea7d9ec43..a7513e8406a3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/fan.h
@@ -2,5 +2,5 @@
 #define __NVBIOS_FAN_H__
 #include <subdev/bios/therm.h>
 
-u16 nvbios_fan_parse(struct nvkm_bios *bios, struct nvbios_therm_fan *fan);
+u32 nvbios_fan_parse(struct nvkm_bios *bios, struct nvbios_therm_fan *fan);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
index a47d46dda704..b7a54e605469 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/gpio.h
@@ -6,6 +6,7 @@ enum dcb_gpio_func_name {
 	DCB_GPIO_TVDAC1 = 0x2d,
 	DCB_GPIO_FAN = 0x09,
 	DCB_GPIO_FAN_SENSE = 0x3d,
+	DCB_GPIO_LOGO_LED_PWM = 0x84,
 	DCB_GPIO_UNUSED = 0xff,
 	DCB_GPIO_VID0 = 0x04,
 	DCB_GPIO_VID1 = 0x05,
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h
index 9cb97477248b..e933d3eede70 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/iccsense.h
@@ -1,10 +1,16 @@
 #ifndef __NVBIOS_ICCSENSE_H__
 #define __NVBIOS_ICCSENSE_H__
+struct pwr_rail_resistor_t {
+	u8 mohm;
+	bool enabled;
+};
+
 struct pwr_rail_t {
 	u8 mode;
 	u8 extdev_id;
-	u8 resistor_mohm;
-	u8 rail;
+	u8 resistor_count;
+	struct pwr_rail_resistor_t resistors[3];
+	u16 config;
 };
 
 struct nvbios_iccsense {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h
index d3bd250103d5..478b1c0d2089 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/perf.h
@@ -1,6 +1,6 @@
 #ifndef __NVBIOS_PERF_H__
 #define __NVBIOS_PERF_H__
-u16 nvbios_perf_table(struct nvkm_bios *, u8 *ver, u8 *hdr,
+u32 nvbios_perf_table(struct nvkm_bios *, u8 *ver, u8 *hdr,
 		      u8 *cnt, u8 *len, u8 *snr, u8 *ssz);
 
 struct nvbios_perfE {
@@ -17,9 +17,9 @@ struct nvbios_perfE {
 	u8  pcie_width;
 };
 
-u16 nvbios_perf_entry(struct nvkm_bios *, int idx,
+u32 nvbios_perf_entry(struct nvkm_bios *, int idx,
 		      u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 nvbios_perfEp(struct nvkm_bios *, int idx,
+u32 nvbios_perfEp(struct nvkm_bios *, int idx,
 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_perfE *);
 
 struct nvbios_perfS {
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
new file mode 100644
index 000000000000..f5f4a14c4030
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/power_budget.h
@@ -0,0 +1,26 @@
+#ifndef __NVBIOS_POWER_BUDGET_H__
+#define __NVBIOS_POWER_BUDGET_H__
+
+#include <nvkm/subdev/bios.h>
+
+struct nvbios_power_budget_entry {
+	u32 min_w;
+	u32 avg_w;
+	u32 max_w;
+};
+
+struct nvbios_power_budget {
+	u32 offset;
+	u8  ver;
+	u8  hlen;
+	u8  elen;
+	u8  ecount;
+	u8  cap_entry;
+};
+
+int nvbios_power_budget_header(struct nvkm_bios *,
+                               struct nvbios_power_budget *);
+int nvbios_power_budget_entry(struct nvkm_bios *, struct nvbios_power_budget *,
+                              u8 idx, struct nvbios_power_budget_entry *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h
index 339a826aa176..38188d4c9ab5 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/timing.h
@@ -2,10 +2,10 @@
 #define __NVBIOS_TIMING_H__
 #include <subdev/bios/ramcfg.h>
 
-u16 nvbios_timingTe(struct nvkm_bios *,
+u32 nvbios_timingTe(struct nvkm_bios *,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz);
-u16 nvbios_timingEe(struct nvkm_bios *, int idx,
+u32 nvbios_timingEe(struct nvkm_bios *, int idx,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 nvbios_timingEp(struct nvkm_bios *, int idx,
+u32 nvbios_timingEp(struct nvkm_bios *, int idx,
 		    u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ramcfg *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h
index 6633c6db9281..bea31cdd1dd1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vmap.h
@@ -1,21 +1,24 @@
 #ifndef __NVBIOS_VMAP_H__
 #define __NVBIOS_VMAP_H__
 struct nvbios_vmap {
+	u8  max0;
+	u8  max1;
+	u8  max2;
 };
 
-u16 nvbios_vmap_table(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 nvbios_vmap_parse(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+u32 nvbios_vmap_table(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_vmap_parse(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		      struct nvbios_vmap *);
 
 struct nvbios_vmap_entry {
-	u8  unk0;
+	u8  mode;
 	u8  link;
 	u32 min;
 	u32 max;
 	s32 arg[6];
 };
 
-u16 nvbios_vmap_entry(struct nvkm_bios *, int idx, u8 *ver, u8 *len);
-u16 nvbios_vmap_entry_parse(struct nvkm_bios *, int idx, u8 *ver, u8 *len,
+u32 nvbios_vmap_entry(struct nvkm_bios *, int idx, u8 *ver, u8 *len);
+u32 nvbios_vmap_entry_parse(struct nvkm_bios *, int idx, u8 *ver, u8 *len,
 			    struct nvbios_vmap_entry *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
index b0df610cec2b..f0baa2c7de09 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/volt.h
@@ -13,16 +13,17 @@ struct nvbios_volt {
 	u32 base;
 
 	/* GPIO mode */
-	u8  vidmask;
-	s16 step;
+	bool ranged;
+	u8   vidmask;
+	s16  step;
 
 	/* PWM mode */
 	u32 pwm_freq;
 	u32 pwm_range;
 };
 
-u16 nvbios_volt_table(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
-u16 nvbios_volt_parse(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
+u32 nvbios_volt_table(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len);
+u32 nvbios_volt_parse(struct nvkm_bios *, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		      struct nvbios_volt *);
 
 struct nvbios_volt_entry {
@@ -30,7 +31,7 @@ struct nvbios_volt_entry {
 	u8  vid;
 };
 
-u16 nvbios_volt_entry(struct nvkm_bios *, int idx, u8 *ver, u8 *len);
-u16 nvbios_volt_entry_parse(struct nvkm_bios *, int idx, u8 *ver, u8 *len,
+u32 nvbios_volt_entry(struct nvkm_bios *, int idx, u8 *ver, u8 *len);
+u32 nvbios_volt_entry_parse(struct nvkm_bios *, int idx, u8 *ver, u8 *len,
 			    struct nvbios_volt_entry *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h
new file mode 100644
index 000000000000..87f804fc3a88
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/bios/vpstate.h
@@ -0,0 +1,24 @@
+#ifndef __NVBIOS_VPSTATE_H__
+#define __NVBIOS_VPSTATE_H__
+struct nvbios_vpstate_header {
+	u32 offset;
+
+	u8 version;
+	u8 hlen;
+	u8 ecount;
+	u8 elen;
+	u8 scount;
+	u8 slen;
+
+	u8 base_id;
+	u8 boost_id;
+	u8 tdp_id;
+};
+struct nvbios_vpstate_entry {
+	u8  pstate;
+	u16 clock_mhz;
+};
+int nvbios_vpstate_parse(struct nvkm_bios *, struct nvbios_vpstate_header *);
+int nvbios_vpstate_entry(struct nvkm_bios *, struct nvbios_vpstate_header *,
+			 u8 idx, struct nvbios_vpstate_entry *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
index fb54417bc458..e5275f742977 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/clk.h
@@ -6,6 +6,10 @@
 struct nvbios_pll;
 struct nvkm_pll_vals;
 
+#define NVKM_CLK_CSTATE_DEFAULT -1 /* POSTed default */
+#define NVKM_CLK_CSTATE_BASE    -2 /* pstate base */
+#define NVKM_CLK_CSTATE_HIGHEST -3 /* highest possible */
+
 enum nv_clk_src {
 	nv_clk_src_crystal,
 	nv_clk_src_href,
@@ -52,6 +56,7 @@ struct nvkm_cstate {
 	struct list_head head;
 	u8  voltage;
 	u32 domain[nv_clk_src_max];
+	u8  id;
 };
 
 struct nvkm_pstate {
@@ -67,7 +72,8 @@ struct nvkm_pstate {
 struct nvkm_domain {
 	enum nv_clk_src name;
 	u8 bios; /* 0xff for none */
-#define NVKM_CLK_DOM_FLAG_CORE 0x01
+#define NVKM_CLK_DOM_FLAG_CORE    0x01
+#define NVKM_CLK_DOM_FLAG_VPSTATE 0x02
 	u8 flags;
 	const char *mname;
 	int mdiv;
@@ -93,10 +99,16 @@ struct nvkm_clk {
 	int ustate_ac; /* user-requested (-1 disabled, -2 perfmon) */
 	int ustate_dc; /* user-requested (-1 disabled, -2 perfmon) */
 	int astate; /* perfmon adjustment (base) */
-	int tstate; /* thermal adjustment (max-) */
 	int dstate; /* display adjustment (min+) */
+	u8  temp;
 
 	bool allow_reclock;
+#define NVKM_CLK_BOOST_NONE 0x0
+#define NVKM_CLK_BOOST_BIOS 0x1
+#define NVKM_CLK_BOOST_FULL 0x2
+	u8  boost_mode;
+	u32 base_khz;
+	u32 boost_khz;
 
 	/*XXX: die, these are here *only* to support the completely
 	 *     bat-shit insane what-was-nouveau_hw.c code
@@ -110,7 +122,7 @@ int nvkm_clk_read(struct nvkm_clk *, enum nv_clk_src);
 int nvkm_clk_ustate(struct nvkm_clk *, int req, int pwr);
 int nvkm_clk_astate(struct nvkm_clk *, int req, int rel, bool wait);
 int nvkm_clk_dstate(struct nvkm_clk *, int req, int rel);
-int nvkm_clk_tstate(struct nvkm_clk *, int req, int rel);
+int nvkm_clk_tstate(struct nvkm_clk *, u8 temperature);
 
 int nv04_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
 int nv40_clk_new(struct nvkm_device *, int, struct nvkm_clk **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
index 3a410275fa71..0b26a4c860ec 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/fb.h
@@ -29,7 +29,7 @@ struct nvkm_mem {
 	u8  page_shift;
 
 	struct nvkm_mm_node *tag;
-	struct list_head regions;
+	struct nvkm_mm_node *mem;
 	dma_addr_t *pages;
 	u32 memtype;
 	u64 offset;
@@ -93,8 +93,9 @@ int gk104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gk20a_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm107_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gm200_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gm20b_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 int gp100_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
-int gp104_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
+int gp102_fb_new(struct nvkm_device *, int, struct nvkm_fb **);
 
 #include <subdev/bios.h>
 #include <subdev/bios/ramcfg.h>
@@ -156,4 +157,6 @@ struct nvkm_ram_func {
 	int (*prog)(struct nvkm_ram *);
 	void (*tidy)(struct nvkm_ram *);
 };
+
+extern const u8 gf100_pte_storage_type_map[256];
 #endif
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
index 3c2ddd975273..b7a9b041e130 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/iccsense.h
@@ -8,6 +8,9 @@ struct nvkm_iccsense {
 	bool data_valid;
 	struct list_head sensors;
 	struct list_head rails;
+
+	u32 power_w_max;
+	u32 power_w_crit;
 };
 
 int gf100_iccsense_new(struct nvkm_device *, int index, struct nvkm_iccsense **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
index 27d25b18d85c..e68ba636741b 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/mc.h
@@ -9,6 +9,7 @@ struct nvkm_mc {
 
 void nvkm_mc_enable(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_disable(struct nvkm_device *, enum nvkm_devidx);
+bool nvkm_mc_enabled(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_reset(struct nvkm_device *, enum nvkm_devidx);
 void nvkm_mc_intr(struct nvkm_device *, bool *handled);
 void nvkm_mc_intr_unarm(struct nvkm_device *);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
index e6523e2cea9f..ac2a695963c1 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pci.h
@@ -43,6 +43,7 @@ int nv40_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv46_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int nv4c_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g84_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
+int g92_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int g94_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf100_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
 int gf106_pci_new(struct nvkm_device *, int, struct nvkm_pci **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
index e61923d5e49c..179b6ed3f595 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/pmu.h
@@ -1,10 +1,12 @@
 #ifndef __NVKM_PMU_H__
 #define __NVKM_PMU_H__
 #include <core/subdev.h>
+#include <engine/falcon.h>
 
 struct nvkm_pmu {
 	const struct nvkm_pmu_func *func;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *falcon;
 
 	struct {
 		u32 base;
@@ -35,6 +37,9 @@ int gk110_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk208_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gk20a_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 int gm107_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gm20b_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gp100_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
+int gp102_pmu_new(struct nvkm_device *, int, struct nvkm_pmu **);
 
 /* interface to MEMX process running on PMU */
 struct nvkm_memx;
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
index b04c38c07761..5dbd8aa4f8c2 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/secboot.h
@@ -26,7 +26,7 @@
 #include <core/subdev.h>
 
 enum nvkm_secboot_falcon {
-	NVKM_SECBOOT_FALCON_PMU	= 0,
+	NVKM_SECBOOT_FALCON_PMU = 0,
 	NVKM_SECBOOT_FALCON_RESERVED = 1,
 	NVKM_SECBOOT_FALCON_FECS = 2,
 	NVKM_SECBOOT_FALCON_GPCCS = 3,
@@ -35,22 +35,23 @@ enum nvkm_secboot_falcon {
 };
 
 /**
- * @base:		base IO address of the falcon performing secure boot
- * @irq_mask:		IRQ mask of the falcon performing secure boot
- * @enable_mask:	enable mask of the falcon performing secure boot
+ * @wpr_set: whether the WPR region is currently set
 */
 struct nvkm_secboot {
 	const struct nvkm_secboot_func *func;
+	struct nvkm_acr *acr;
 	struct nvkm_subdev subdev;
+	struct nvkm_falcon *boot_falcon;
 
-	enum nvkm_devidx devidx;
-	u32 base;
+	u64 wpr_addr;
+	u32 wpr_size;
+
+	bool wpr_set;
 };
 #define nvkm_secboot(p) container_of((p), struct nvkm_secboot, subdev)
 
 bool nvkm_secboot_is_managed(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-int nvkm_secboot_reset(struct nvkm_secboot *, u32 falcon);
-int nvkm_secboot_start(struct nvkm_secboot *, u32 falcon);
+int nvkm_secboot_reset(struct nvkm_secboot *, enum nvkm_secboot_falcon);
 
 int gm200_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
 int gm20b_secboot_new(struct nvkm_device *, int, struct nvkm_secboot **);
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
index 82d3e28918fd..6a567fe347b3 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/timer.h
@@ -48,10 +48,8 @@ void nvkm_timer_alarm_cancel(struct nvkm_timer *, struct nvkm_alarm *);
 	} while (_taken = nvkm_timer_read(_tmr) - _time0, _taken < _nsecs);    \
                                                                                \
 	if (_taken >= _nsecs) {                                                \
-		if (_warn) {                                                   \
-			dev_warn(_device->dev, "timeout at %s:%d/%s()!\n",     \
-				 __FILE__, __LINE__, __func__);                \
-		}                                                              \
+		if (_warn)                                                     \
+			dev_WARN(_device->dev, "timeout\n");                   \
 		_taken = -ETIMEDOUT;                                           \
 	}                                                                      \
 	_taken;                                                                \
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
index 71ebbfd4484f..d23209b62c25 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/top.h
@@ -11,6 +11,7 @@ struct nvkm_top {
 u32 nvkm_top_reset(struct nvkm_device *, enum nvkm_devidx);
 u32 nvkm_top_intr(struct nvkm_device *, u32 intr, u64 *subdevs);
 u32 nvkm_top_intr_mask(struct nvkm_device *, enum nvkm_devidx);
+int nvkm_top_fault_id(struct nvkm_device *, enum nvkm_devidx);
 enum nvkm_devidx nvkm_top_fault(struct nvkm_device *, int fault);
 enum nvkm_devidx nvkm_top_engine(struct nvkm_device *, int, int *runl, int *engn);
 
diff --git a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
index b765f4ffcde6..08ef9983c643 100644
--- a/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
+++ b/drivers/gpu/drm/nouveau/include/nvkm/subdev/volt.h
@@ -15,12 +15,28 @@ struct nvkm_volt {
 
 	u32 max_uv;
 	u32 min_uv;
+
+	/*
+	 * These are fully functional map entries creating a sw ceiling for
+	 * the voltage. These all can describe different kind of curves, so
+	 * that for any given temperature a different one can return the lowest
+	 * value of all three.
+	 */
+	u8 max0_id;
+	u8 max1_id;
+	u8 max2_id;
+
+	int speedo;
 };
 
+int nvkm_volt_map(struct nvkm_volt *volt, u8 id, u8 temperature);
+int nvkm_volt_map_min(struct nvkm_volt *volt, u8 id);
 int nvkm_volt_get(struct nvkm_volt *);
-int nvkm_volt_set_id(struct nvkm_volt *, u8 id, int condition);
+int nvkm_volt_set_id(struct nvkm_volt *, u8 id, u8 min_id, u8 temp,
+		     int condition);
 
 int nv40_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
+int gf100_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk104_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gk20a_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
 int gm20b_volt_new(struct nvkm_device *, int, struct nvkm_volt **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_abi16.c b/drivers/gpu/drm/nouveau/nouveau_abi16.c
index 7bd4683216d0..f98f800cc011 100644
--- a/drivers/gpu/drm/nouveau/nouveau_abi16.c
+++ b/drivers/gpu/drm/nouveau/nouveau_abi16.c
@@ -87,7 +87,7 @@ nouveau_abi16_put(struct nouveau_abi16 *abi16, int ret)
 s32
 nouveau_abi16_swclass(struct nouveau_drm *drm)
 {
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 		return NVIF_CLASS_SW_NV04;
 	case NV_DEVICE_INFO_V0_CELSIUS:
@@ -175,7 +175,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvkm_gr *gr = nvxx_gr(device);
 	struct drm_nouveau_getparam *getparam = data;
 
@@ -199,7 +199,7 @@ nouveau_abi16_ioctl_getparam(ABI16_IOCTL_ARGS)
 		if (!nvxx_device(device)->func->pci)
 			getparam->value = 3;
 		else
-		if (drm_pci_device_is_agp(dev))
+		if (pci_find_capability(dev->pdev, PCI_CAP_ID_AGP))
 			getparam->value = 0;
 		else
 		if (!pci_is_pcie(dev->pdev))
@@ -321,7 +321,7 @@ nouveau_abi16_ioctl_channel_alloc(ABI16_IOCTL_ARGS)
 	}
 
 	/* Named memory object area */
-	ret = nouveau_gem_new(dev, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
+	ret = nouveau_gem_new(cli, PAGE_SIZE, 0, NOUVEAU_GEM_DOMAIN_GART,
 			      0, 0, &chan->ntfy);
 	if (ret == 0)
 		ret = nouveau_bo_pin(chan->ntfy, TTM_PL_FLAG_TT, false);
diff --git a/drivers/gpu/drm/nouveau/nouveau_backlight.c b/drivers/gpu/drm/nouveau/nouveau_backlight.c
index f5101be806cb..380f340204e8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_backlight.c
+++ b/drivers/gpu/drm/nouveau/nouveau_backlight.c
@@ -30,17 +30,42 @@
  * Register locations derived from NVClock by Roderick Colenbrander
  */
 
+#include <linux/apple-gmux.h>
 #include <linux/backlight.h>
+#include <linux/idr.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_reg.h"
 #include "nouveau_encoder.h"
 
+static struct ida bl_ida;
+#define BL_NAME_SIZE 15 // 12 for name + 2 for digits + 1 for '\0'
+
+struct backlight_connector {
+	struct list_head head;
+	int id;
+};
+
+static bool
+nouveau_get_backlight_name(char backlight_name[BL_NAME_SIZE], struct backlight_connector
+		*connector)
+{
+	const int nb = ida_simple_get(&bl_ida, 0, 0, GFP_KERNEL);
+	if (nb < 0 || nb >= 100)
+		return false;
+	if (nb > 0)
+		snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight%d", nb);
+	else
+		snprintf(backlight_name, BL_NAME_SIZE, "nv_backlight");
+	connector->id = nb;
+	return true;
+}
+
 static int
 nv40_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = (nvif_rd32(device, NV40_PMC_BACKLIGHT) &
 				   NV40_PMC_BACKLIGHT_MASK) >> 16;
 
@@ -51,7 +76,7 @@ static int
 nv40_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_drm *drm = bl_get_data(bd);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int val = bd->props.brightness;
 	int reg = nvif_rd32(device, NV40_PMC_BACKLIGHT);
 
@@ -71,9 +96,11 @@ static int
 nv40_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct backlight_properties props;
 	struct backlight_device *bd;
+	struct backlight_connector bl_connector;
+	char backlight_name[BL_NAME_SIZE];
 
 	if (!(nvif_rd32(device, NV40_PMC_BACKLIGHT) & NV40_PMC_BACKLIGHT_MASK))
 		return 0;
@@ -81,10 +108,19 @@ nv40_backlight_init(struct drm_connector *connector)
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 31;
-	bd = backlight_device_register("nv_backlight", connector->kdev, drm,
+	if (!nouveau_get_backlight_name(backlight_name, &bl_connector)) {
+		NV_ERROR(drm, "Failed to retrieve a unique name for the backlight interface\n");
+		return 0;
+	}
+	bd = backlight_device_register(backlight_name , connector->kdev, drm,
 				       &nv40_bl_ops, &props);
-	if (IS_ERR(bd))
+
+	if (IS_ERR(bd)) {
+		if (bl_connector.id > 0)
+			ida_simple_remove(&bl_ida, bl_connector.id);
 		return PTR_ERR(bd);
+	}
+	list_add(&bl_connector.head, &drm->bl_connectors);
 	drm->backlight = bd;
 	bd->props.brightness = nv40_get_intensity(bd);
 	backlight_update_status(bd);
@@ -97,7 +133,7 @@ nv50_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val;
@@ -112,7 +148,7 @@ nv50_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div = 1025;
 	u32 val = (bd->props.brightness * div) / 100;
@@ -133,7 +169,7 @@ nva3_get_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -151,7 +187,7 @@ nva3_set_intensity(struct backlight_device *bd)
 {
 	struct nouveau_encoder *nv_encoder = bl_get_data(bd);
 	struct nouveau_drm *drm = nouveau_drm(nv_encoder->base.base.dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	int or = nv_encoder->or;
 	u32 div, val;
 
@@ -177,11 +213,13 @@ static int
 nv50_backlight_init(struct drm_connector *connector)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nouveau_encoder *nv_encoder;
 	struct backlight_properties props;
 	struct backlight_device *bd;
 	const struct backlight_ops *ops;
+	struct backlight_connector bl_connector;
+	char backlight_name[BL_NAME_SIZE];
 
 	nv_encoder = find_encoder(connector, DCB_OUTPUT_LVDS);
 	if (!nv_encoder) {
@@ -193,9 +231,9 @@ nv50_backlight_init(struct drm_connector *connector)
 	if (!nvif_rd32(device, NV50_PDISP_SOR_PWM_CTL(nv_encoder->or)))
 		return 0;
 
-	if (drm->device.info.chipset <= 0xa0 ||
-	    drm->device.info.chipset == 0xaa ||
-	    drm->device.info.chipset == 0xac)
+	if (drm->client.device.info.chipset <= 0xa0 ||
+	    drm->client.device.info.chipset == 0xaa ||
+	    drm->client.device.info.chipset == 0xac)
 		ops = &nv50_bl_ops;
 	else
 		ops = &nva3_bl_ops;
@@ -203,11 +241,20 @@ nv50_backlight_init(struct drm_connector *connector)
 	memset(&props, 0, sizeof(struct backlight_properties));
 	props.type = BACKLIGHT_RAW;
 	props.max_brightness = 100;
-	bd = backlight_device_register("nv_backlight", connector->kdev,
+	if (!nouveau_get_backlight_name(backlight_name, &bl_connector)) {
+		NV_ERROR(drm, "Failed to retrieve a unique name for the backlight interface\n");
+		return 0;
+	}
+	bd = backlight_device_register(backlight_name , connector->kdev,
 				       nv_encoder, ops, &props);
-	if (IS_ERR(bd))
+
+	if (IS_ERR(bd)) {
+		if (bl_connector.id > 0)
+			ida_simple_remove(&bl_ida, bl_connector.id);
 		return PTR_ERR(bd);
+	}
 
+	list_add(&bl_connector.head, &drm->bl_connectors);
 	drm->backlight = bd;
 	bd->props.brightness = bd->ops->get_brightness(bd);
 	backlight_update_status(bd);
@@ -218,9 +265,16 @@ int
 nouveau_backlight_init(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct drm_connector *connector;
 
+	if (apple_gmux_present()) {
+		NV_INFO(drm, "Apple GMUX detected: not registering Nouveau backlight interface\n");
+		return 0;
+	}
+
+	INIT_LIST_HEAD(&drm->bl_connectors);
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		if (connector->connector_type != DRM_MODE_CONNECTOR_LVDS &&
 		    connector->connector_type != DRM_MODE_CONNECTOR_eDP)
@@ -232,6 +286,7 @@ nouveau_backlight_init(struct drm_device *dev)
 		case NV_DEVICE_INFO_V0_TESLA:
 		case NV_DEVICE_INFO_V0_FERMI:
 		case NV_DEVICE_INFO_V0_KEPLER:
+		case NV_DEVICE_INFO_V0_MAXWELL:
 			return nv50_backlight_init(connector);
 		default:
 			break;
@@ -246,9 +301,27 @@ void
 nouveau_backlight_exit(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct backlight_connector *connector;
+
+	list_for_each_entry(connector, &drm->bl_connectors, head) {
+		if (connector->id >= 0)
+			ida_simple_remove(&bl_ida, connector->id);
+	}
 
 	if (drm->backlight) {
 		backlight_device_unregister(drm->backlight);
 		drm->backlight = NULL;
 	}
 }
+
+void
+nouveau_backlight_ctor(void)
+{
+	ida_init(&bl_ida);
+}
+
+void
+nouveau_backlight_dtor(void)
+{
+	ida_destroy(&bl_ida);
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.c b/drivers/gpu/drm/nouveau/nouveau_bios.c
index a1570b109434..9a0772ad495a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.c
@@ -215,7 +215,7 @@ int call_lvds_script(struct drm_device *dev, struct dcb_output *dcbent, int head
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	uint8_t lvds_ver = bios->data[bios->fp.lvdsmanufacturerpointer];
 	uint32_t sel_clk_binding, sel_clk;
@@ -319,7 +319,7 @@ static int
 get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
 	/*
 	 * The fp strap is normally dictated by the "User Strap" in
@@ -333,7 +333,10 @@ get_fp_strap(struct drm_device *dev, struct nvbios *bios)
 	if (bios->major_version < 5 && bios->data[0x48] & 0x4)
 		return NVReadVgaCrtc5758(dev, 0, 0xf) & 0xf;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_MAXWELL)
+		return nvif_rd32(device, 0x001800) & 0x0000000f;
+	else
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 24) & 0xf;
 	else
 		return (nvif_rd32(device, NV_PEXTDEV_BOOT_0) >> 16) & 0xf;
@@ -635,7 +638,7 @@ int run_tmds_table(struct drm_device *dev, struct dcb_output *dcbent, int head,
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	struct nvbios *bios = &drm->vbios;
 	int cv = bios->chip_version;
 	uint16_t clktable = 0, scriptptr;
@@ -1252,7 +1255,7 @@ olddcb_table(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	u8 *dcb = NULL;
 
-	if (drm->device.info.family > NV_DEVICE_INFO_V0_TNT)
+	if (drm->client.device.info.family > NV_DEVICE_INFO_V0_TNT)
 		dcb = ROMPTR(dev, drm->vbios.data[0x36]);
 	if (!dcb) {
 		NV_WARN(drm, "No DCB data found in VBIOS\n");
@@ -1915,7 +1918,7 @@ static int load_nv17_hwsq_ucode_entry(struct drm_device *dev, struct nvbios *bio
 	 */
 
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 	uint8_t bytes_to_write;
 	uint16_t hwsq_entry_offset;
 	int i;
@@ -2009,7 +2012,7 @@ uint8_t *nouveau_bios_embedded_edid(struct drm_device *dev)
 static bool NVInitVBIOS(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_bios *bios = nvxx_bios(&drm->device);
+	struct nvkm_bios *bios = nvxx_bios(&drm->client.device);
 	struct nvbios *legacy = &drm->vbios;
 
 	memset(legacy, 0, sizeof(struct nvbios));
@@ -2061,7 +2064,7 @@ nouveau_bios_posted(struct drm_device *dev)
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	unsigned htotal;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		return true;
 
 	htotal  = NVReadVgaCrtc(dev, 0, 0x06);
diff --git a/drivers/gpu/drm/nouveau/nouveau_bios.h b/drivers/gpu/drm/nouveau/nouveau_bios.h
index 0067586eb015..18eb061ccafb 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bios.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bios.h
@@ -31,10 +31,8 @@
 
 #define DCB_LOC_ON_CHIP 0
 
-#define ROM16(x) le16_to_cpu(*(u16 *)&(x))
-#define ROM32(x) le32_to_cpu(*(u32 *)&(x))
-#define ROM48(x) ({ u8 *p = &(x); (u64)ROM16(p[4]) << 32 | ROM32(p[0]); })
-#define ROM64(x) le64_to_cpu(*(u64 *)&(x))
+#define ROM16(x) get_unaligned_le16(&(x))
+#define ROM32(x) get_unaligned_le32(&(x))
 #define ROMPTR(d,x) ({            \
 	struct nouveau_drm *drm = nouveau_drm((d)); \
 	ROM16(x) ? &drm->vbios.data[ROM16(x)] : NULL; \
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.c b/drivers/gpu/drm/nouveau/nouveau_bo.c
index 343b8659472c..548f36d33924 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.c
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.c
@@ -48,7 +48,7 @@ nv10_bo_update_tile_region(struct drm_device *dev, struct nouveau_drm_tile *reg,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	int i = reg - drm->tile.reg;
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_fb *fb = device->fb;
 	struct nvkm_fb_tile *tile = &fb->tile.region[i];
 
@@ -83,13 +83,13 @@ nv10_bo_get_tile_region(struct drm_device *dev, int i)
 
 static void
 nv10_bo_put_tile_region(struct drm_device *dev, struct nouveau_drm_tile *tile,
-			struct fence *fence)
+			struct dma_fence *fence)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 
 	if (tile) {
 		spin_lock(&drm->tile.lock);
-		tile->fence = (struct nouveau_fence *)fence_get(fence);
+		tile->fence = (struct nouveau_fence *)dma_fence_get(fence);
 		tile->used = false;
 		spin_unlock(&drm->tile.lock);
 	}
@@ -100,7 +100,7 @@ nv10_bo_set_tiling(struct drm_device *dev, u32 addr,
 		   u32 size, u32 pitch, u32 flags)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct nouveau_drm_tile *tile, *found = NULL;
 	int i;
 
@@ -139,60 +139,62 @@ nouveau_bo_del_ttm(struct ttm_buffer_object *bo)
 	kfree(nvbo);
 }
 
+static inline u64
+roundup_64(u64 x, u32 y)
+{
+	x += y - 1;
+	do_div(x, y);
+	return x * y;
+}
+
 static void
 nouveau_bo_fixup_align(struct nouveau_bo *nvbo, u32 flags,
-		       int *align, int *size)
+		       int *align, u64 *size)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 
 	if (device->info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (nvbo->tile_mode) {
 			if (device->info.chipset >= 0x40) {
 				*align = 65536;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x30) {
 				*align = 32768;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x20) {
 				*align = 16384;
-				*size = roundup(*size, 64 * nvbo->tile_mode);
+				*size = roundup_64(*size, 64 * nvbo->tile_mode);
 
 			} else if (device->info.chipset >= 0x10) {
 				*align = 16384;
-				*size = roundup(*size, 32 * nvbo->tile_mode);
+				*size = roundup_64(*size, 32 * nvbo->tile_mode);
 			}
 		}
 	} else {
-		*size = roundup(*size, (1 << nvbo->page_shift));
+		*size = roundup_64(*size, (1 << nvbo->page_shift));
 		*align = max((1 <<  nvbo->page_shift), *align);
 	}
 
-	*size = roundup(*size, PAGE_SIZE);
+	*size = roundup_64(*size, PAGE_SIZE);
 }
 
 int
-nouveau_bo_new(struct drm_device *dev, int size, int align,
+nouveau_bo_new(struct nouveau_cli *cli, u64 size, int align,
 	       uint32_t flags, uint32_t tile_mode, uint32_t tile_flags,
 	       struct sg_table *sg, struct reservation_object *robj,
 	       struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	size_t acc_size;
 	int ret;
 	int type = ttm_bo_type_device;
-	int lpg_shift = 12;
-	int max_size;
-
-	if (drm->client.vm)
-		lpg_shift = drm->client.vm->mmu->lpg_shift;
-	max_size = INT_MAX & ~((1 << lpg_shift) - 1);
 
-	if (size <= 0 || size > max_size) {
-		NV_WARN(drm, "skipped size %x\n", (u32)size);
+	if (!size) {
+		NV_WARN(drm, "skipped size %016llx\n", size);
 		return -EINVAL;
 	}
 
@@ -208,8 +210,9 @@ nouveau_bo_new(struct drm_device *dev, int size, int align,
 	nvbo->tile_mode = tile_mode;
 	nvbo->tile_flags = tile_flags;
 	nvbo->bo.bdev = &drm->ttm.bdev;
+	nvbo->cli = cli;
 
-	if (!nvxx_device(&drm->device)->func->cpu_coherent)
+	if (!nvxx_device(&drm->client.device)->func->cpu_coherent)
 		nvbo->force_coherent = flags & TTM_PL_FLAG_UNCACHED;
 
 	nvbo->page_shift = 12;
@@ -255,10 +258,10 @@ static void
 set_placement_range(struct nouveau_bo *nvbo, uint32_t type)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	u32 vram_pages = drm->device.info.ram_size >> PAGE_SHIFT;
+	u32 vram_pages = drm->client.device.info.ram_size >> PAGE_SHIFT;
 	unsigned i, fpfn, lpfn;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 	    nvbo->tile_mode && (type & TTM_PL_FLAG_VRAM) &&
 	    nvbo->bo.mem.num_pages < vram_pages / 4) {
 		/*
@@ -316,12 +319,12 @@ nouveau_bo_pin(struct nouveau_bo *nvbo, uint32_t memtype, bool contig)
 	if (ret)
 		return ret;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA &&
 	    memtype == TTM_PL_FLAG_VRAM && contig) {
 		if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG) {
 			if (bo->mem.mem_type == TTM_PL_VRAM) {
 				struct nvkm_mem *mem = bo->mem.mm_node;
-				if (!list_is_singular(&mem->regions))
+				if (!nvkm_mm_contiguous(mem->mem))
 					evict = true;
 			}
 			nvbo->tile_flags &= ~NOUVEAU_GEM_TILE_NONCONTIG;
@@ -443,7 +446,7 @@ void
 nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -463,7 +466,7 @@ void
 nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(nvbo->bo.bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct ttm_dma_tt *ttm_dma = (struct ttm_dma_tt *)nvbo->bo.ttm;
 	int i;
 
@@ -579,9 +582,9 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 					 TTM_PL_FLAG_WC;
 		man->default_caching = TTM_PL_FLAG_WC;
 
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 			/* Some BARs do not support being ioremapped WC */
-			if (nvxx_bar(&drm->device)->iomap_uncached) {
+			if (nvxx_bar(&drm->client.device)->iomap_uncached) {
 				man->available_caching = TTM_PL_FLAG_UNCACHED;
 				man->default_caching = TTM_PL_FLAG_UNCACHED;
 			}
@@ -594,7 +597,7 @@ nouveau_bo_init_mem_type(struct ttm_bo_device *bdev, uint32_t type,
 		}
 		break;
 	case TTM_PL_TT:
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 			man->func = &nouveau_gart_manager;
 		else
 		if (!drm->agp.bridge)
@@ -654,20 +657,20 @@ nve0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nve0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 10);
 	if (ret == 0) {
 		BEGIN_NVC0(chan, NvSubCopy, 0x0400, 8);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
 		OUT_RING  (chan, PAGE_SIZE);
-		OUT_RING  (chan, new_mem->num_pages);
+		OUT_RING  (chan, new_reg->num_pages);
 		BEGIN_IMC0(chan, NvSubCopy, 0x0300, 0x0386);
 	}
 	return ret;
@@ -686,15 +689,15 @@ nvc0_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -724,15 +727,15 @@ nvc0_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -763,15 +766,15 @@ nvc0_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	u32 page_count = new_mem->num_pages;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 8191) ? 8191 : page_count;
 
@@ -801,35 +804,35 @@ nva3_bo_move_copy(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nv98_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0320, 6);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* COPY */);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
 	}
 	return ret;
 }
 
 static int
 nv84_bo_move_exec(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
+	struct nvkm_mem *mem = old_reg->mm_node;
 	int ret = RING_SPACE(chan, 7);
 	if (ret == 0) {
 		BEGIN_NV04(chan, NvSubCopy, 0x0304, 6);
-		OUT_RING  (chan, new_mem->num_pages << PAGE_SHIFT);
-		OUT_RING  (chan, upper_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[0].offset));
-		OUT_RING  (chan, upper_32_bits(node->vma[1].offset));
-		OUT_RING  (chan, lower_32_bits(node->vma[1].offset));
+		OUT_RING  (chan, new_reg->num_pages << PAGE_SHIFT);
+		OUT_RING  (chan, upper_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[0].offset));
+		OUT_RING  (chan, upper_32_bits(mem->vma[1].offset));
+		OUT_RING  (chan, lower_32_bits(mem->vma[1].offset));
 		OUT_RING  (chan, 0x00000000 /* MODE_COPY, QUERY_NONE */);
 	}
 	return ret;
@@ -853,14 +856,14 @@ nv50_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static int
 nv50_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	struct nvkm_mem *node = old_mem->mm_node;
-	u64 length = (new_mem->num_pages << PAGE_SHIFT);
-	u64 src_offset = node->vma[0].offset;
-	u64 dst_offset = node->vma[1].offset;
-	int src_tiled = !!node->memtype;
-	int dst_tiled = !!((struct nvkm_mem *)new_mem->mm_node)->memtype;
+	struct nvkm_mem *mem = old_reg->mm_node;
+	u64 length = (new_reg->num_pages << PAGE_SHIFT);
+	u64 src_offset = mem->vma[0].offset;
+	u64 dst_offset = mem->vma[1].offset;
+	int src_tiled = !!mem->memtype;
+	int dst_tiled = !!((struct nvkm_mem *)new_reg->mm_node)->memtype;
 	int ret;
 
 	while (length) {
@@ -940,20 +943,20 @@ nv04_bo_move_init(struct nouveau_channel *chan, u32 handle)
 
 static inline uint32_t
 nouveau_bo_mem_ctxdma(struct ttm_buffer_object *bo,
-		      struct nouveau_channel *chan, struct ttm_mem_reg *mem)
+		      struct nouveau_channel *chan, struct ttm_mem_reg *reg)
 {
-	if (mem->mem_type == TTM_PL_TT)
+	if (reg->mem_type == TTM_PL_TT)
 		return NvDmaTT;
 	return chan->vram.handle;
 }
 
 static int
 nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
-		  struct ttm_mem_reg *old_mem, struct ttm_mem_reg *new_mem)
+		  struct ttm_mem_reg *old_reg, struct ttm_mem_reg *new_reg)
 {
-	u32 src_offset = old_mem->start << PAGE_SHIFT;
-	u32 dst_offset = new_mem->start << PAGE_SHIFT;
-	u32 page_count = new_mem->num_pages;
+	u32 src_offset = old_reg->start << PAGE_SHIFT;
+	u32 dst_offset = new_reg->start << PAGE_SHIFT;
+	u32 page_count = new_reg->num_pages;
 	int ret;
 
 	ret = RING_SPACE(chan, 3);
@@ -961,10 +964,10 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 		return ret;
 
 	BEGIN_NV04(chan, NvSubCopy, NV_MEMORY_TO_MEMORY_FORMAT_DMA_SOURCE, 2);
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_mem));
-	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_mem));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, old_reg));
+	OUT_RING  (chan, nouveau_bo_mem_ctxdma(bo, chan, new_reg));
 
-	page_count = new_mem->num_pages;
+	page_count = new_reg->num_pages;
 	while (page_count) {
 		int line_count = (page_count > 2047) ? 2047 : page_count;
 
@@ -995,33 +998,33 @@ nv04_bo_move_m2mf(struct nouveau_channel *chan, struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move_prep(struct nouveau_drm *drm, struct ttm_buffer_object *bo,
-		     struct ttm_mem_reg *mem)
+		     struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *old_node = bo->mem.mm_node;
-	struct nvkm_mem *new_node = mem->mm_node;
-	u64 size = (u64)mem->num_pages << PAGE_SHIFT;
+	struct nvkm_mem *old_mem = bo->mem.mm_node;
+	struct nvkm_mem *new_mem = reg->mm_node;
+	u64 size = (u64)reg->num_pages << PAGE_SHIFT;
 	int ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, old_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[0]);
+	ret = nvkm_vm_get(drm->client.vm, size, old_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[0]);
 	if (ret)
 		return ret;
 
-	ret = nvkm_vm_get(drm->client.vm, size, new_node->page_shift,
-			  NV_MEM_ACCESS_RW, &old_node->vma[1]);
+	ret = nvkm_vm_get(drm->client.vm, size, new_mem->page_shift,
+			  NV_MEM_ACCESS_RW, &old_mem->vma[1]);
 	if (ret) {
-		nvkm_vm_put(&old_node->vma[0]);
+		nvkm_vm_put(&old_mem->vma[0]);
 		return ret;
 	}
 
-	nvkm_vm_map(&old_node->vma[0], old_node);
-	nvkm_vm_map(&old_node->vma[1], new_node);
+	nvkm_vm_map(&old_mem->vma[0], old_mem);
+	nvkm_vm_map(&old_mem->vma[1], new_mem);
 	return 0;
 }
 
 static int
 nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
-		     bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		     bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_channel *chan = drm->ttm.chan;
@@ -1033,8 +1036,8 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	 * old nvkm_mem node, these will get cleaned up after ttm has
 	 * destroyed the ttm_mem_reg
 	 */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_move_prep(drm, bo, new_mem);
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_move_prep(drm, bo, new_reg);
 		if (ret)
 			return ret;
 	}
@@ -1042,14 +1045,14 @@ nouveau_bo_move_m2mf(struct ttm_buffer_object *bo, int evict, bool intr,
 	mutex_lock_nested(&cli->mutex, SINGLE_DEPTH_NESTING);
 	ret = nouveau_fence_sync(nouveau_bo(bo), chan, true, intr);
 	if (ret == 0) {
-		ret = drm->ttm.move(chan, bo, &bo->mem, new_mem);
+		ret = drm->ttm.move(chan, bo, &bo->mem, new_reg);
 		if (ret == 0) {
 			ret = nouveau_fence_new(chan, false, &fence);
 			if (ret == 0) {
 				ret = ttm_bo_move_accel_cleanup(bo,
 								&fence->base,
 								evict,
-								new_mem);
+								new_reg);
 				nouveau_fence_unref(&fence);
 			}
 		}
@@ -1124,7 +1127,7 @@ nouveau_bo_move_init(struct nouveau_drm *drm)
 
 static int
 nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1132,35 +1135,35 @@ nouveau_bo_move_flipd(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_tt_bind(bo->ttm, &tmp_mem);
+	ret = ttm_tt_bind(bo->ttm, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, new_reg);
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static int
 nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
-		      bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		      bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct ttm_place placement_memtype = {
 		.fpfn = 0,
@@ -1168,33 +1171,34 @@ nouveau_bo_move_flips(struct ttm_buffer_object *bo, bool evict, bool intr,
 		.flags = TTM_PL_FLAG_TT | TTM_PL_MASK_CACHING
 	};
 	struct ttm_placement placement;
-	struct ttm_mem_reg tmp_mem;
+	struct ttm_mem_reg tmp_reg;
 	int ret;
 
 	placement.num_placement = placement.num_busy_placement = 1;
 	placement.placement = placement.busy_placement = &placement_memtype;
 
-	tmp_mem = *new_mem;
-	tmp_mem.mm_node = NULL;
-	ret = ttm_bo_mem_space(bo, &placement, &tmp_mem, intr, no_wait_gpu);
+	tmp_reg = *new_reg;
+	tmp_reg.mm_node = NULL;
+	ret = ttm_bo_mem_space(bo, &placement, &tmp_reg, intr, no_wait_gpu);
 	if (ret)
 		return ret;
 
-	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_mem);
+	ret = ttm_bo_move_ttm(bo, intr, no_wait_gpu, &tmp_reg);
 	if (ret)
 		goto out;
 
-	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_mem);
+	ret = nouveau_bo_move_m2mf(bo, true, intr, no_wait_gpu, new_reg);
 	if (ret)
 		goto out;
 
 out:
-	ttm_bo_mem_put(bo, &tmp_mem);
+	ttm_bo_mem_put(bo, &tmp_reg);
 	return ret;
 }
 
 static void
-nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem)
+nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, bool evict,
+		     struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_vma *vma;
@@ -1204,31 +1208,32 @@ nouveau_bo_move_ntfy(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem)
 		return;
 
 	list_for_each_entry(vma, &nvbo->vma_list, head) {
-		if (new_mem && new_mem->mem_type != TTM_PL_SYSTEM &&
-			      (new_mem->mem_type == TTM_PL_VRAM ||
+		if (new_reg && new_reg->mem_type != TTM_PL_SYSTEM &&
+			      (new_reg->mem_type == TTM_PL_VRAM ||
 			       nvbo->page_shift != vma->vm->mmu->lpg_shift)) {
-			nvkm_vm_map(vma, new_mem->mm_node);
+			nvkm_vm_map(vma, new_reg->mm_node);
 		} else {
+			WARN_ON(ttm_bo_wait(bo, false, false));
 			nvkm_vm_unmap(vma);
 		}
 	}
 }
 
 static int
-nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_mem,
+nouveau_bo_vm_bind(struct ttm_buffer_object *bo, struct ttm_mem_reg *new_reg,
 		   struct nouveau_drm_tile **new_tile)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	u64 offset = new_mem->start << PAGE_SHIFT;
+	u64 offset = new_reg->start << PAGE_SHIFT;
 
 	*new_tile = NULL;
-	if (new_mem->mem_type != TTM_PL_VRAM)
+	if (new_reg->mem_type != TTM_PL_VRAM)
 		return 0;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
-		*new_tile = nv10_bo_set_tiling(dev, offset, new_mem->size,
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_CELSIUS) {
+		*new_tile = nv10_bo_set_tiling(dev, offset, new_reg->size,
 						nvbo->tile_mode,
 						nvbo->tile_flags);
 	}
@@ -1243,7 +1248,7 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct drm_device *dev = drm->dev;
-	struct fence *fence = reservation_object_get_excl(bo->resv);
+	struct dma_fence *fence = reservation_object_get_excl(bo->resv);
 
 	nv10_bo_put_tile_region(dev, *old_tile, fence);
 	*old_tile = new_tile;
@@ -1251,11 +1256,11 @@ nouveau_bo_vm_cleanup(struct ttm_buffer_object *bo,
 
 static int
 nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
-		bool no_wait_gpu, struct ttm_mem_reg *new_mem)
+		bool no_wait_gpu, struct ttm_mem_reg *new_reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct ttm_mem_reg *old_mem = &bo->mem;
+	struct ttm_mem_reg *old_reg = &bo->mem;
 	struct nouveau_drm_tile *new_tile = NULL;
 	int ret = 0;
 
@@ -1266,31 +1271,31 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	if (nvbo->pin_refcnt)
 		NV_WARN(drm, "Moving pinned object %p!\n", nvbo);
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_vm_bind(bo, new_mem, &new_tile);
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		ret = nouveau_bo_vm_bind(bo, new_reg, &new_tile);
 		if (ret)
 			return ret;
 	}
 
 	/* Fake bo copy. */
-	if (old_mem->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
+	if (old_reg->mem_type == TTM_PL_SYSTEM && !bo->ttm) {
 		BUG_ON(bo->mem.mm_node != NULL);
-		bo->mem = *new_mem;
-		new_mem->mm_node = NULL;
+		bo->mem = *new_reg;
+		new_reg->mm_node = NULL;
 		goto out;
 	}
 
 	/* Hardware assisted copy. */
 	if (drm->ttm.move) {
-		if (new_mem->mem_type == TTM_PL_SYSTEM)
+		if (new_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flipd(bo, evict, intr,
-						    no_wait_gpu, new_mem);
-		else if (old_mem->mem_type == TTM_PL_SYSTEM)
+						    no_wait_gpu, new_reg);
+		else if (old_reg->mem_type == TTM_PL_SYSTEM)
 			ret = nouveau_bo_move_flips(bo, evict, intr,
-						    no_wait_gpu, new_mem);
+						    no_wait_gpu, new_reg);
 		else
 			ret = nouveau_bo_move_m2mf(bo, evict, intr,
-						   no_wait_gpu, new_mem);
+						   no_wait_gpu, new_reg);
 		if (!ret)
 			goto out;
 	}
@@ -1298,10 +1303,10 @@ nouveau_bo_move(struct ttm_buffer_object *bo, bool evict, bool intr,
 	/* Fallback to software copy. */
 	ret = ttm_bo_wait(bo, intr, no_wait_gpu);
 	if (ret == 0)
-		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_mem);
+		ret = ttm_bo_move_memcpy(bo, intr, no_wait_gpu, new_reg);
 
 out:
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		if (ret)
 			nouveau_bo_vm_cleanup(bo, NULL, &new_tile);
 		else
@@ -1321,54 +1326,54 @@ nouveau_bo_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 }
 
 static int
-nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct ttm_mem_type_manager *man = &bdev->man[mem->mem_type];
+	struct ttm_mem_type_manager *man = &bdev->man[reg->mem_type];
 	struct nouveau_drm *drm = nouveau_bdev(bdev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
+	struct nvkm_mem *mem = reg->mm_node;
 	int ret;
 
-	mem->bus.addr = NULL;
-	mem->bus.offset = 0;
-	mem->bus.size = mem->num_pages << PAGE_SHIFT;
-	mem->bus.base = 0;
-	mem->bus.is_iomem = false;
+	reg->bus.addr = NULL;
+	reg->bus.offset = 0;
+	reg->bus.size = reg->num_pages << PAGE_SHIFT;
+	reg->bus.base = 0;
+	reg->bus.is_iomem = false;
 	if (!(man->flags & TTM_MEMTYPE_FLAG_MAPPABLE))
 		return -EINVAL;
-	switch (mem->mem_type) {
+	switch (reg->mem_type) {
 	case TTM_PL_SYSTEM:
 		/* System memory */
 		return 0;
 	case TTM_PL_TT:
 #if IS_ENABLED(CONFIG_AGP)
 		if (drm->agp.bridge) {
-			mem->bus.offset = mem->start << PAGE_SHIFT;
-			mem->bus.base = drm->agp.base;
-			mem->bus.is_iomem = !drm->agp.cma;
+			reg->bus.offset = reg->start << PAGE_SHIFT;
+			reg->bus.base = drm->agp.base;
+			reg->bus.is_iomem = !drm->agp.cma;
 		}
 #endif
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA || !node->memtype)
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA || !mem->memtype)
 			/* untiled */
 			break;
 		/* fallthrough, tiled memory */
 	case TTM_PL_VRAM:
-		mem->bus.offset = mem->start << PAGE_SHIFT;
-		mem->bus.base = device->func->resource_addr(device, 1);
-		mem->bus.is_iomem = true;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-			struct nvkm_bar *bar = nvxx_bar(&drm->device);
+		reg->bus.offset = reg->start << PAGE_SHIFT;
+		reg->bus.base = device->func->resource_addr(device, 1);
+		reg->bus.is_iomem = true;
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+			struct nvkm_bar *bar = nvxx_bar(&drm->client.device);
 			int page_shift = 12;
-			if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
-				page_shift = node->page_shift;
+			if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+				page_shift = mem->page_shift;
 
-			ret = nvkm_bar_umap(bar, node->size << 12, page_shift,
-					    &node->bar_vma);
+			ret = nvkm_bar_umap(bar, mem->size << 12, page_shift,
+					    &mem->bar_vma);
 			if (ret)
 				return ret;
 
-			nvkm_vm_map(&node->bar_vma, node);
-			mem->bus.offset = node->bar_vma.offset;
+			nvkm_vm_map(&mem->bar_vma, mem);
+			reg->bus.offset = mem->bar_vma.offset;
 		}
 		break;
 	default:
@@ -1378,15 +1383,15 @@ nouveau_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
 }
 
 static void
-nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
+nouveau_ttm_io_mem_free(struct ttm_bo_device *bdev, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *mem = reg->mm_node;
 
-	if (!node->bar_vma.node)
+	if (!mem->bar_vma.node)
 		return;
 
-	nvkm_vm_unmap(&node->bar_vma);
-	nvkm_vm_put(&node->bar_vma);
+	nvkm_vm_unmap(&mem->bar_vma);
+	nvkm_vm_put(&mem->bar_vma);
 }
 
 static int
@@ -1394,7 +1399,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	u32 mappable = device->func->resource_size(device, 1) >> PAGE_SHIFT;
 	int i, ret;
 
@@ -1402,7 +1407,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	 * nothing to do here.
 	 */
 	if (bo->mem.mem_type != TTM_PL_VRAM) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA ||
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA ||
 		    !nouveau_bo_tile_layout(nvbo))
 			return 0;
 
@@ -1417,7 +1422,7 @@ nouveau_ttm_fault_reserve_notify(struct ttm_buffer_object *bo)
 	}
 
 	/* make sure bo is in mappable vram */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA ||
 	    bo->mem.start + bo->mem.num_pages < mappable)
 		return 0;
 
@@ -1459,7 +1464,7 @@ nouveau_ttm_tt_populate(struct ttm_tt *ttm)
 	}
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
@@ -1516,7 +1521,7 @@ nouveau_ttm_tt_unpopulate(struct ttm_tt *ttm)
 		return;
 
 	drm = nouveau_bdev(ttm->bdev);
-	device = nvxx_device(&drm->device);
+	device = nvxx_device(&drm->client.device);
 	dev = drm->dev;
 	pdev = device->dev;
 
@@ -1561,6 +1566,7 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.ttm_tt_unpopulate = &nouveau_ttm_tt_unpopulate,
 	.invalidate_caches = nouveau_bo_invalidate_caches,
 	.init_mem_type = nouveau_bo_init_mem_type,
+	.eviction_valuable = ttm_bo_eviction_valuable,
 	.evict_flags = nouveau_bo_evict_flags,
 	.move_notify = nouveau_bo_move_ntfy,
 	.move = nouveau_bo_move,
@@ -1568,8 +1574,6 @@ struct ttm_bo_driver nouveau_bo_driver = {
 	.fault_reserve_notify = &nouveau_ttm_fault_reserve_notify,
 	.io_mem_reserve = &nouveau_ttm_io_mem_reserve,
 	.io_mem_free = &nouveau_ttm_io_mem_free,
-	.lru_tail = &ttm_bo_default_lru_tail,
-	.swap_lru_tail = &ttm_bo_default_swap_lru_tail,
 };
 
 struct nvkm_vma *
diff --git a/drivers/gpu/drm/nouveau/nouveau_bo.h b/drivers/gpu/drm/nouveau/nouveau_bo.h
index e42360983229..b06a5385d6dd 100644
--- a/drivers/gpu/drm/nouveau/nouveau_bo.h
+++ b/drivers/gpu/drm/nouveau/nouveau_bo.h
@@ -26,6 +26,8 @@ struct nouveau_bo {
 	struct list_head vma_list;
 	unsigned page_shift;
 
+	struct nouveau_cli *cli;
+
 	u32 tile_mode;
 	u32 tile_flags;
 	struct nouveau_drm_tile *tile;
@@ -69,7 +71,7 @@ nouveau_bo_ref(struct nouveau_bo *ref, struct nouveau_bo **pnvbo)
 extern struct ttm_bo_driver nouveau_bo_driver;
 
 void nouveau_bo_move_init(struct nouveau_drm *);
-int  nouveau_bo_new(struct drm_device *, int size, int align, u32 flags,
+int  nouveau_bo_new(struct nouveau_cli *, u64 size, int align, u32 flags,
 		    u32 tile_mode, u32 tile_flags, struct sg_table *sg,
 		    struct reservation_object *robj,
 		    struct nouveau_bo **);
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.c b/drivers/gpu/drm/nouveau/nouveau_chan.c
index f9b3c811187e..dbc41fa86ee8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.c
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.c
@@ -45,10 +45,20 @@ MODULE_PARM_DESC(vram_pushbuf, "Create DMA push buffers in VRAM");
 int nouveau_vram_pushbuf;
 module_param_named(vram_pushbuf, nouveau_vram_pushbuf, int, 0400);
 
+static int
+nouveau_channel_killed(struct nvif_notify *ntfy)
+{
+	struct nouveau_channel *chan = container_of(ntfy, typeof(*chan), kill);
+	struct nouveau_cli *cli = (void *)chan->user.client;
+	NV_PRINTK(warn, cli, "channel %d killed!\n", chan->chid);
+	atomic_set(&chan->killed, 1);
+	return NVIF_NOTIFY_DROP;
+}
+
 int
 nouveau_channel_idle(struct nouveau_channel *chan)
 {
-	if (likely(chan && chan->fence)) {
+	if (likely(chan && chan->fence && !atomic_read(&chan->killed))) {
 		struct nouveau_cli *cli = (void *)chan->user.client;
 		struct nouveau_fence *fence = NULL;
 		int ret;
@@ -78,6 +88,7 @@ nouveau_channel_del(struct nouveau_channel **pchan)
 		nvif_object_fini(&chan->nvsw);
 		nvif_object_fini(&chan->gart);
 		nvif_object_fini(&chan->vram);
+		nvif_notify_fini(&chan->kill);
 		nvif_object_fini(&chan->user);
 		nvif_object_fini(&chan->push.ctxdma);
 		nouveau_bo_vma_del(chan->push.buffer, &chan->push.vma);
@@ -107,13 +118,14 @@ nouveau_channel_prep(struct nouveau_drm *drm, struct nvif_device *device,
 
 	chan->device = device;
 	chan->drm = drm;
+	atomic_set(&chan->killed, 0);
 
 	/* allocate memory for dma push buffer */
 	target = TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
 	if (nouveau_vram_pushbuf)
 		target = TTM_PL_FLAG_VRAM;
 
-	ret = nouveau_bo_new(drm->dev, size, 0, target, 0, 0, NULL, NULL,
+	ret = nouveau_bo_new(cli, size, 0, target, 0, 0, NULL, NULL,
 			    &chan->push.buffer);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(chan->push.buffer, target, false);
@@ -301,12 +313,26 @@ nouveau_channel_init(struct nouveau_channel *chan, u32 vram, u32 gart)
 {
 	struct nvif_device *device = chan->device;
 	struct nouveau_cli *cli = (void *)chan->user.client;
+	struct nouveau_drm *drm = chan->drm;
 	struct nvkm_mmu *mmu = nvxx_mmu(device);
 	struct nv_dma_v0 args = {};
 	int ret, i;
 
 	nvif_object_map(&chan->user);
 
+	if (chan->user.oclass >= FERMI_CHANNEL_GPFIFO) {
+		ret = nvif_notify_init(&chan->user, nouveau_channel_killed,
+				       true, NV906F_V0_NTFY_KILLED,
+				       NULL, 0, 0, &chan->kill);
+		if (ret == 0)
+			ret = nvif_notify_get(&chan->kill);
+		if (ret) {
+			NV_ERROR(drm, "Failed to request channel kill "
+				      "notification: %d\n", ret);
+			return ret;
+		}
+	}
+
 	/* allocate dma objects to cover all allowed vram, and gart */
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
 		if (device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
diff --git a/drivers/gpu/drm/nouveau/nouveau_chan.h b/drivers/gpu/drm/nouveau/nouveau_chan.h
index 48062c94f36d..46b947ba1cf4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_chan.h
+++ b/drivers/gpu/drm/nouveau/nouveau_chan.h
@@ -1,7 +1,7 @@
 #ifndef __NOUVEAU_CHAN_H__
 #define __NOUVEAU_CHAN_H__
-
 #include <nvif/object.h>
+#include <nvif/notify.h>
 struct nvif_device;
 
 struct nouveau_channel {
@@ -38,6 +38,9 @@ struct nouveau_channel {
 	u32 user_put;
 
 	struct nvif_object user;
+
+	struct nvif_notify kill;
+	atomic_t killed;
 };
 
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.c b/drivers/gpu/drm/nouveau/nouveau_connector.c
index c1084088f9e4..f5add64c093f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.c
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.c
@@ -30,8 +30,10 @@
 #include <linux/vga_switcheroo.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_atomic.h>
 
 #include "nouveau_reg.h"
 #include "nouveau_drv.h"
@@ -47,6 +49,301 @@
 #include <nvif/cl0046.h>
 #include <nvif/event.h>
 
+struct drm_display_mode *
+nouveau_conn_native_mode(struct drm_connector *connector)
+{
+	const struct drm_connector_helper_funcs *helper = connector->helper_private;
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct drm_device *dev = connector->dev;
+	struct drm_display_mode *mode, *largest = NULL;
+	int high_w = 0, high_h = 0, high_v = 0;
+
+	list_for_each_entry(mode, &connector->probed_modes, head) {
+		mode->vrefresh = drm_mode_vrefresh(mode);
+		if (helper->mode_valid(connector, mode) != MODE_OK ||
+		    (mode->flags & DRM_MODE_FLAG_INTERLACE))
+			continue;
+
+		/* Use preferred mode if there is one.. */
+		if (mode->type & DRM_MODE_TYPE_PREFERRED) {
+			NV_DEBUG(drm, "native mode from preferred\n");
+			return drm_mode_duplicate(dev, mode);
+		}
+
+		/* Otherwise, take the resolution with the largest width, then
+		 * height, then vertical refresh
+		 */
+		if (mode->hdisplay < high_w)
+			continue;
+
+		if (mode->hdisplay == high_w && mode->vdisplay < high_h)
+			continue;
+
+		if (mode->hdisplay == high_w && mode->vdisplay == high_h &&
+		    mode->vrefresh < high_v)
+			continue;
+
+		high_w = mode->hdisplay;
+		high_h = mode->vdisplay;
+		high_v = mode->vrefresh;
+		largest = mode;
+	}
+
+	NV_DEBUG(drm, "native mode from largest: %dx%d@%d\n",
+		      high_w, high_h, high_v);
+	return largest ? drm_mode_duplicate(dev, largest) : NULL;
+}
+
+int
+nouveau_conn_atomic_get_property(struct drm_connector *connector,
+				 const struct drm_connector_state *state,
+				 struct drm_property *property, u64 *val)
+{
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(state);
+	struct nouveau_display *disp = nouveau_display(connector->dev);
+	struct drm_device *dev = connector->dev;
+
+	if (property == dev->mode_config.scaling_mode_property)
+		*val = asyc->scaler.mode;
+	else if (property == disp->underscan_property)
+		*val = asyc->scaler.underscan.mode;
+	else if (property == disp->underscan_hborder_property)
+		*val = asyc->scaler.underscan.hborder;
+	else if (property == disp->underscan_vborder_property)
+		*val = asyc->scaler.underscan.vborder;
+	else if (property == disp->dithering_mode)
+		*val = asyc->dither.mode;
+	else if (property == disp->dithering_depth)
+		*val = asyc->dither.depth;
+	else if (property == disp->vibrant_hue_property)
+		*val = asyc->procamp.vibrant_hue;
+	else if (property == disp->color_vibrance_property)
+		*val = asyc->procamp.color_vibrance;
+	else
+		return -EINVAL;
+
+	return 0;
+}
+
+int
+nouveau_conn_atomic_set_property(struct drm_connector *connector,
+				 struct drm_connector_state *state,
+				 struct drm_property *property, u64 val)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(state);
+	struct nouveau_display *disp = nouveau_display(dev);
+
+	if (property == dev->mode_config.scaling_mode_property) {
+		switch (val) {
+		case DRM_MODE_SCALE_NONE:
+			/* We allow 'None' for EDID modes, even on a fixed
+			 * panel (some exist with support for lower refresh
+			 * rates, which people might want to use for power-
+			 * saving purposes).
+			 *
+			 * Non-EDID modes will force the use of GPU scaling
+			 * to the native mode regardless of this setting.
+			 */
+			switch (connector->connector_type) {
+			case DRM_MODE_CONNECTOR_LVDS:
+			case DRM_MODE_CONNECTOR_eDP:
+				/* ... except prior to G80, where the code
+				 * doesn't support such things.
+				 */
+				if (disp->disp.oclass < NV50_DISP)
+					return -EINVAL;
+				break;
+			default:
+				break;
+			}
+		case DRM_MODE_SCALE_FULLSCREEN:
+		case DRM_MODE_SCALE_CENTER:
+		case DRM_MODE_SCALE_ASPECT:
+			break;
+		default:
+			return -EINVAL;
+		}
+
+		if (asyc->scaler.mode != val) {
+			asyc->scaler.mode = val;
+			asyc->set.scaler = true;
+		}
+	} else
+	if (property == disp->underscan_property) {
+		if (asyc->scaler.underscan.mode != val) {
+			asyc->scaler.underscan.mode = val;
+			asyc->set.scaler = true;
+		}
+	} else
+	if (property == disp->underscan_hborder_property) {
+		if (asyc->scaler.underscan.hborder != val) {
+			asyc->scaler.underscan.hborder = val;
+			asyc->set.scaler = true;
+		}
+	} else
+	if (property == disp->underscan_vborder_property) {
+		if (asyc->scaler.underscan.vborder != val) {
+			asyc->scaler.underscan.vborder = val;
+			asyc->set.scaler = true;
+		}
+	} else
+	if (property == disp->dithering_mode) {
+		if (asyc->dither.mode != val) {
+			asyc->dither.mode = val;
+			asyc->set.dither = true;
+		}
+	} else
+	if (property == disp->dithering_depth) {
+		if (asyc->dither.mode != val) {
+			asyc->dither.depth = val;
+			asyc->set.dither = true;
+		}
+	} else
+	if (property == disp->vibrant_hue_property) {
+		if (asyc->procamp.vibrant_hue != val) {
+			asyc->procamp.vibrant_hue = val;
+			asyc->set.procamp = true;
+		}
+	} else
+	if (property == disp->color_vibrance_property) {
+		if (asyc->procamp.color_vibrance != val) {
+			asyc->procamp.color_vibrance = val;
+			asyc->set.procamp = true;
+		}
+	} else {
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void
+nouveau_conn_atomic_destroy_state(struct drm_connector *connector,
+				  struct drm_connector_state *state)
+{
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(state);
+	__drm_atomic_helper_connector_destroy_state(&asyc->state);
+	kfree(asyc);
+}
+
+struct drm_connector_state *
+nouveau_conn_atomic_duplicate_state(struct drm_connector *connector)
+{
+	struct nouveau_conn_atom *armc = nouveau_conn_atom(connector->state);
+	struct nouveau_conn_atom *asyc;
+	if (!(asyc = kmalloc(sizeof(*asyc), GFP_KERNEL)))
+		return NULL;
+	__drm_atomic_helper_connector_duplicate_state(connector, &asyc->state);
+	asyc->dither = armc->dither;
+	asyc->scaler = armc->scaler;
+	asyc->procamp = armc->procamp;
+	asyc->set.mask = 0;
+	return &asyc->state;
+}
+
+void
+nouveau_conn_reset(struct drm_connector *connector)
+{
+	struct nouveau_conn_atom *asyc;
+
+	if (WARN_ON(!(asyc = kzalloc(sizeof(*asyc), GFP_KERNEL))))
+		return;
+
+	if (connector->state)
+		__drm_atomic_helper_connector_destroy_state(connector->state);
+	__drm_atomic_helper_connector_reset(connector, &asyc->state);
+	asyc->dither.mode = DITHERING_MODE_AUTO;
+	asyc->dither.depth = DITHERING_DEPTH_AUTO;
+	asyc->scaler.mode = DRM_MODE_SCALE_NONE;
+	asyc->scaler.underscan.mode = UNDERSCAN_OFF;
+	asyc->procamp.color_vibrance = 150;
+	asyc->procamp.vibrant_hue = 90;
+
+	if (nouveau_display(connector->dev)->disp.oclass < NV50_DISP) {
+		switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_LVDS:
+			/* See note in nouveau_conn_atomic_set_property(). */
+			asyc->scaler.mode = DRM_MODE_SCALE_FULLSCREEN;
+			break;
+		default:
+			break;
+		}
+	}
+}
+
+void
+nouveau_conn_attach_properties(struct drm_connector *connector)
+{
+	struct drm_device *dev = connector->dev;
+	struct nouveau_conn_atom *armc = nouveau_conn_atom(connector->state);
+	struct nouveau_display *disp = nouveau_display(dev);
+
+	/* Init DVI-I specific properties. */
+	if (connector->connector_type == DRM_MODE_CONNECTOR_DVII)
+		drm_object_attach_property(&connector->base, dev->mode_config.
+					   dvi_i_subconnector_property, 0);
+
+	/* Add overscan compensation options to digital outputs. */
+	if (disp->underscan_property &&
+	    (connector->connector_type == DRM_MODE_CONNECTOR_DVID ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_DVII ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_HDMIA ||
+	     connector->connector_type == DRM_MODE_CONNECTOR_DisplayPort)) {
+		drm_object_attach_property(&connector->base,
+					   disp->underscan_property,
+					   UNDERSCAN_OFF);
+		drm_object_attach_property(&connector->base,
+					   disp->underscan_hborder_property, 0);
+		drm_object_attach_property(&connector->base,
+					   disp->underscan_vborder_property, 0);
+	}
+
+	/* Add hue and saturation options. */
+	if (disp->vibrant_hue_property)
+		drm_object_attach_property(&connector->base,
+					   disp->vibrant_hue_property,
+					   armc->procamp.vibrant_hue);
+	if (disp->color_vibrance_property)
+		drm_object_attach_property(&connector->base,
+					   disp->color_vibrance_property,
+					   armc->procamp.color_vibrance);
+
+	/* Scaling mode property. */
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_TV:
+		break;
+	case DRM_MODE_CONNECTOR_VGA:
+		if (disp->disp.oclass < NV50_DISP)
+			break; /* Can only scale on DFPs. */
+		/* Fall-through. */
+	default:
+		drm_object_attach_property(&connector->base, dev->mode_config.
+					   scaling_mode_property,
+					   armc->scaler.mode);
+		break;
+	}
+
+	/* Dithering properties. */
+	switch (connector->connector_type) {
+	case DRM_MODE_CONNECTOR_TV:
+	case DRM_MODE_CONNECTOR_VGA:
+		break;
+	default:
+		if (disp->dithering_mode) {
+			drm_object_attach_property(&connector->base,
+						   disp->dithering_mode,
+						   armc->dither.mode);
+		}
+		if (disp->dithering_depth) {
+			drm_object_attach_property(&connector->base,
+						   disp->dithering_depth,
+						   armc->dither.depth);
+		}
+		break;
+	}
+}
+
 MODULE_PARM_DESC(tv_disable, "Disable TV-out detection");
 int nouveau_tv_disable = 0;
 module_param_named(tv_disable, nouveau_tv_disable, int, 0400);
@@ -122,7 +419,7 @@ nouveau_connector_ddc_detect(struct drm_connector *connector)
 	struct drm_device *dev = connector->dev;
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_gpio *gpio = nvxx_gpio(&drm->device);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
 	int i, panel = -ENODEV;
@@ -151,7 +448,9 @@ nouveau_connector_ddc_detect(struct drm_connector *connector)
 
 		if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
 			int ret = nouveau_dp_detect(nv_encoder);
-			if (ret == 0)
+			if (ret == NOUVEAU_DP_MST)
+				return NULL;
+			if (ret == NOUVEAU_DP_SST)
 				break;
 		} else
 		if ((vga_switcheroo_handler_flags() &
@@ -222,7 +521,7 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		return;
 	nv_connector->detected_encoder = nv_encoder;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
 		connector->interlace_allowed = true;
 		connector->doublescan_allowed = true;
 	} else
@@ -232,8 +531,8 @@ nouveau_connector_set_encoder(struct drm_connector *connector,
 		connector->interlace_allowed = false;
 	} else {
 		connector->doublescan_allowed = true;
-		if (drm->device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
-		    (drm->device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
+		if (drm->client.device.info.family == NV_DEVICE_INFO_V0_KELVIN ||
+		    (drm->client.device.info.family == NV_DEVICE_INFO_V0_CELSIUS &&
 		     (dev->pdev->device & 0x0ff0) != 0x0100 &&
 		     (dev->pdev->device & 0x0ff0) != 0x0150))
 			/* HW is broken */
@@ -465,199 +764,39 @@ static int
 nouveau_connector_set_property(struct drm_connector *connector,
 			       struct drm_property *property, uint64_t value)
 {
-	struct nouveau_display *disp = nouveau_display(connector->dev);
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(connector->state);
 	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_encoder *nv_encoder = nv_connector->detected_encoder;
 	struct drm_encoder *encoder = to_drm_encoder(nv_encoder);
-	struct drm_device *dev = connector->dev;
-	struct nouveau_crtc *nv_crtc;
 	int ret;
 
-	nv_crtc = NULL;
-	if (connector->encoder && connector->encoder->crtc)
-		nv_crtc = nouveau_crtc(connector->encoder->crtc);
-
-	/* Scaling mode */
-	if (property == dev->mode_config.scaling_mode_property) {
-		bool modeset = false;
-
-		switch (value) {
-		case DRM_MODE_SCALE_NONE:
-			/* We allow 'None' for EDID modes, even on a fixed
-			 * panel (some exist with support for lower refresh
-			 * rates, which people might want to use for power
-			 * saving purposes).
-			 *
-			 * Non-EDID modes will force the use of GPU scaling
-			 * to the native mode regardless of this setting.
-			 */
-			switch (nv_connector->type) {
-			case DCB_CONNECTOR_LVDS:
-			case DCB_CONNECTOR_LVDS_SPWG:
-			case DCB_CONNECTOR_eDP:
-				/* ... except prior to G80, where the code
-				 * doesn't support such things.
-				 */
-				if (disp->disp.oclass < NV50_DISP)
-					return -EINVAL;
-				break;
-			default:
-				break;
-			}
-			break;
-		case DRM_MODE_SCALE_FULLSCREEN:
-		case DRM_MODE_SCALE_CENTER:
-		case DRM_MODE_SCALE_ASPECT:
-			break;
-		default:
-			return -EINVAL;
-		}
-
-		/* Changing between GPU and panel scaling requires a full
-		 * modeset
-		 */
-		if ((nv_connector->scaling_mode == DRM_MODE_SCALE_NONE) ||
-		    (value == DRM_MODE_SCALE_NONE))
-			modeset = true;
-		nv_connector->scaling_mode = value;
-
-		if (!nv_crtc)
-			return 0;
-
-		if (modeset || !nv_crtc->set_scale) {
-			ret = drm_crtc_helper_set_mode(&nv_crtc->base,
-							&nv_crtc->base.mode,
-							nv_crtc->base.x,
-							nv_crtc->base.y, NULL);
-			if (!ret)
-				return -EINVAL;
-		} else {
-			ret = nv_crtc->set_scale(nv_crtc, true);
-			if (ret)
-				return ret;
-		}
-
-		return 0;
-	}
-
-	/* Underscan */
-	if (property == disp->underscan_property) {
-		if (nv_connector->underscan != value) {
-			nv_connector->underscan = value;
-			if (!nv_crtc || !nv_crtc->set_scale)
-				return 0;
-
-			return nv_crtc->set_scale(nv_crtc, true);
-		}
-
-		return 0;
-	}
-
-	if (property == disp->underscan_hborder_property) {
-		if (nv_connector->underscan_hborder != value) {
-			nv_connector->underscan_hborder = value;
-			if (!nv_crtc || !nv_crtc->set_scale)
-				return 0;
-
-			return nv_crtc->set_scale(nv_crtc, true);
-		}
-
-		return 0;
-	}
-
-	if (property == disp->underscan_vborder_property) {
-		if (nv_connector->underscan_vborder != value) {
-			nv_connector->underscan_vborder = value;
-			if (!nv_crtc || !nv_crtc->set_scale)
-				return 0;
-
-			return nv_crtc->set_scale(nv_crtc, true);
-		}
-
-		return 0;
-	}
+	if (drm_drv_uses_atomic_modeset(connector->dev))
+		return drm_atomic_helper_connector_set_property(connector, property, value);
 
-	/* Dithering */
-	if (property == disp->dithering_mode) {
-		nv_connector->dithering_mode = value;
-		if (!nv_crtc || !nv_crtc->set_dither)
-			return 0;
-
-		return nv_crtc->set_dither(nv_crtc, true);
-	}
-
-	if (property == disp->dithering_depth) {
-		nv_connector->dithering_depth = value;
-		if (!nv_crtc || !nv_crtc->set_dither)
-			return 0;
-
-		return nv_crtc->set_dither(nv_crtc, true);
-	}
-
-	if (nv_crtc && nv_crtc->set_color_vibrance) {
-		/* Hue */
-		if (property == disp->vibrant_hue_property) {
-			nv_crtc->vibrant_hue = value - 90;
-			return nv_crtc->set_color_vibrance(nv_crtc, true);
-		}
-		/* Saturation */
-		if (property == disp->color_vibrance_property) {
-			nv_crtc->color_vibrance = value - 100;
-			return nv_crtc->set_color_vibrance(nv_crtc, true);
-		}
+	ret = connector->funcs->atomic_set_property(&nv_connector->base,
+						    &asyc->state,
+						    property, value);
+	if (ret) {
+		if (nv_encoder && nv_encoder->dcb->type == DCB_OUTPUT_TV)
+			return get_slave_funcs(encoder)->set_property(
+				encoder, connector, property, value);
+		return ret;
 	}
 
-	if (nv_encoder && nv_encoder->dcb->type == DCB_OUTPUT_TV)
-		return get_slave_funcs(encoder)->set_property(
-			encoder, connector, property, value);
-
-	return -EINVAL;
-}
-
-static struct drm_display_mode *
-nouveau_connector_native_mode(struct drm_connector *connector)
-{
-	const struct drm_connector_helper_funcs *helper = connector->helper_private;
-	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nouveau_connector *nv_connector = nouveau_connector(connector);
-	struct drm_device *dev = connector->dev;
-	struct drm_display_mode *mode, *largest = NULL;
-	int high_w = 0, high_h = 0, high_v = 0;
+	nv_connector->scaling_mode = asyc->scaler.mode;
+	nv_connector->dithering_mode = asyc->dither.mode;
 
-	list_for_each_entry(mode, &nv_connector->base.probed_modes, head) {
-		mode->vrefresh = drm_mode_vrefresh(mode);
-		if (helper->mode_valid(connector, mode) != MODE_OK ||
-		    (mode->flags & DRM_MODE_FLAG_INTERLACE))
-			continue;
-
-		/* Use preferred mode if there is one.. */
-		if (mode->type & DRM_MODE_TYPE_PREFERRED) {
-			NV_DEBUG(drm, "native mode from preferred\n");
-			return drm_mode_duplicate(dev, mode);
-		}
-
-		/* Otherwise, take the resolution with the largest width, then
-		 * height, then vertical refresh
-		 */
-		if (mode->hdisplay < high_w)
-			continue;
-
-		if (mode->hdisplay == high_w && mode->vdisplay < high_h)
-			continue;
-
-		if (mode->hdisplay == high_w && mode->vdisplay == high_h &&
-		    mode->vrefresh < high_v)
-			continue;
-
-		high_w = mode->hdisplay;
-		high_h = mode->vdisplay;
-		high_v = mode->vrefresh;
-		largest = mode;
+	if (connector->encoder && connector->encoder->crtc) {
+		ret = drm_crtc_helper_set_mode(connector->encoder->crtc,
+					      &connector->encoder->crtc->mode,
+					       connector->encoder->crtc->x,
+					       connector->encoder->crtc->y,
+					       NULL);
+		if (!ret)
+			return -EINVAL;
 	}
 
-	NV_DEBUG(drm, "native mode from largest: %dx%d@%d\n",
-		      high_w, high_h, high_v);
-	return largest ? drm_mode_duplicate(dev, largest) : NULL;
+	return 0;
 }
 
 struct moderec {
@@ -805,8 +944,7 @@ nouveau_connector_get_modes(struct drm_connector *connector)
 	 * the list of modes.
 	 */
 	if (!nv_connector->native_mode)
-		nv_connector->native_mode =
-			nouveau_connector_native_mode(connector);
+		nv_connector->native_mode = nouveau_conn_native_mode(connector);
 	if (ret == 0 && nv_connector->native_mode) {
 		struct drm_display_mode *mode;
 
@@ -846,17 +984,17 @@ get_tmds_link_bandwidth(struct drm_connector *connector, bool hdmi)
 		/* Note: these limits are conservative, some Fermi's
 		 * can do 297 MHz. Unclear how this can be determined.
 		 */
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_KEPLER)
 			return 297000;
-		if (drm->device.info.family >= NV_DEVICE_INFO_V0_FERMI)
+		if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_FERMI)
 			return 225000;
 	}
 	if (dcb->location != DCB_LOC_ON_CHIP ||
-	    drm->device.info.chipset >= 0x46)
+	    drm->client.device.info.chipset >= 0x46)
 		return 165000;
-	else if (drm->device.info.chipset >= 0x40)
+	else if (drm->client.device.info.chipset >= 0x40)
 		return 155000;
-	else if (drm->device.info.chipset >= 0x18)
+	else if (drm->client.device.info.chipset >= 0x18)
 		return 135000;
 	else
 		return 112000;
@@ -903,7 +1041,7 @@ nouveau_connector_mode_valid(struct drm_connector *connector,
 		clock = clock * (connector->display_info.bpc * 3) / 10;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		return MODE_BAD;
 	}
 
@@ -934,56 +1072,42 @@ nouveau_connector_helper_funcs = {
 	.best_encoder = nouveau_connector_best_encoder,
 };
 
+static int
+nouveau_connector_dpms(struct drm_connector *connector, int mode)
+{
+	if (drm_drv_uses_atomic_modeset(connector->dev))
+		return drm_atomic_helper_connector_dpms(connector, mode);
+	return drm_helper_connector_dpms(connector, mode);
+}
+
 static const struct drm_connector_funcs
 nouveau_connector_funcs = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = nouveau_connector_dpms,
+	.reset = nouveau_conn_reset,
 	.detect = nouveau_connector_detect,
-	.destroy = nouveau_connector_destroy,
+	.force = nouveau_connector_force,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = nouveau_connector_set_property,
-	.force = nouveau_connector_force
+	.destroy = nouveau_connector_destroy,
+	.atomic_duplicate_state = nouveau_conn_atomic_duplicate_state,
+	.atomic_destroy_state = nouveau_conn_atomic_destroy_state,
+	.atomic_set_property = nouveau_conn_atomic_set_property,
+	.atomic_get_property = nouveau_conn_atomic_get_property,
 };
 
 static const struct drm_connector_funcs
 nouveau_connector_funcs_lvds = {
-	.dpms = drm_helper_connector_dpms,
+	.dpms = nouveau_connector_dpms,
+	.reset = nouveau_conn_reset,
 	.detect = nouveau_connector_detect_lvds,
-	.destroy = nouveau_connector_destroy,
+	.force = nouveau_connector_force,
 	.fill_modes = drm_helper_probe_single_connector_modes,
 	.set_property = nouveau_connector_set_property,
-	.force = nouveau_connector_force
-};
-
-static int
-nouveau_connector_dp_dpms(struct drm_connector *connector, int mode)
-{
-	struct nouveau_encoder *nv_encoder = NULL;
-
-	if (connector->encoder)
-		nv_encoder = nouveau_encoder(connector->encoder);
-	if (nv_encoder && nv_encoder->dcb &&
-	    nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		if (mode == DRM_MODE_DPMS_ON) {
-			u8 data = DP_SET_POWER_D0;
-			nvkm_wraux(nv_encoder->aux, DP_SET_POWER, &data, 1);
-			usleep_range(1000, 2000);
-		} else {
-			u8 data = DP_SET_POWER_D3;
-			nvkm_wraux(nv_encoder->aux, DP_SET_POWER, &data, 1);
-		}
-	}
-
-	return drm_helper_connector_dpms(connector, mode);
-}
-
-static const struct drm_connector_funcs
-nouveau_connector_funcs_dp = {
-	.dpms = nouveau_connector_dp_dpms,
-	.detect = nouveau_connector_detect,
 	.destroy = nouveau_connector_destroy,
-	.fill_modes = drm_helper_probe_single_connector_modes,
-	.set_property = nouveau_connector_set_property,
-	.force = nouveau_connector_force
+	.atomic_duplicate_state = nouveau_conn_atomic_duplicate_state,
+	.atomic_destroy_state = nouveau_conn_atomic_destroy_state,
+	.atomic_set_property = nouveau_conn_atomic_set_property,
+	.atomic_get_property = nouveau_conn_atomic_get_property,
 };
 
 static int
@@ -995,19 +1119,20 @@ nouveau_connector_hotplug(struct nvif_notify *notify)
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
 	const struct nvif_notify_conn_rep_v0 *rep = notify->data;
 	const char *name = connector->name;
+	struct nouveau_encoder *nv_encoder;
 
 	if (rep->mask & NVIF_NOTIFY_CONN_V0_IRQ) {
+		NV_DEBUG(drm, "service %s\n", name);
+		if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP)))
+			nv50_mstm_service(nv_encoder->dp.mstm);
 	} else {
 		bool plugged = (rep->mask != NVIF_NOTIFY_CONN_V0_UNPLUG);
 
 		NV_DEBUG(drm, "%splugged %s\n", plugged ? "" : "un", name);
-
-		mutex_lock(&drm->dev->mode_config.mutex);
-		if (plugged)
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_ON);
-		else
-			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
-		mutex_unlock(&drm->dev->mode_config.mutex);
+		if ((nv_encoder = find_encoder(connector, DCB_OUTPUT_DP))) {
+			if (!plugged)
+				nv50_mstm_remove(nv_encoder->dp.mstm);
+		}
 
 		drm_helper_hpd_irq_event(connector->dev);
 	}
@@ -1188,7 +1313,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 			return ERR_PTR(ret);
 		}
 
-		funcs = &nouveau_connector_funcs_dp;
+		funcs = &nouveau_connector_funcs;
 		break;
 	default:
 		funcs = &nouveau_connector_funcs;
@@ -1202,38 +1327,10 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	drm_connector_init(dev, connector, funcs, type);
 	drm_connector_helper_add(connector, &nouveau_connector_helper_funcs);
 
-	/* Init DVI-I specific properties */
-	if (nv_connector->type == DCB_CONNECTOR_DVI_I)
-		drm_object_attach_property(&connector->base, dev->mode_config.dvi_i_subconnector_property, 0);
+	connector->funcs->reset(connector);
+	nouveau_conn_attach_properties(connector);
 
-	/* Add overscan compensation options to digital outputs */
-	if (disp->underscan_property &&
-	    (type == DRM_MODE_CONNECTOR_DVID ||
-	     type == DRM_MODE_CONNECTOR_DVII ||
-	     type == DRM_MODE_CONNECTOR_HDMIA ||
-	     type == DRM_MODE_CONNECTOR_DisplayPort)) {
-		drm_object_attach_property(&connector->base,
-					      disp->underscan_property,
-					      UNDERSCAN_OFF);
-		drm_object_attach_property(&connector->base,
-					      disp->underscan_hborder_property,
-					      0);
-		drm_object_attach_property(&connector->base,
-					      disp->underscan_vborder_property,
-					      0);
-	}
-
-	/* Add hue and saturation options */
-	if (disp->vibrant_hue_property)
-		drm_object_attach_property(&connector->base,
-					      disp->vibrant_hue_property,
-					      90);
-	if (disp->color_vibrance_property)
-		drm_object_attach_property(&connector->base,
-					      disp->color_vibrance_property,
-					      150);
-
-	/* default scaling mode */
+	/* Default scaling mode */
 	switch (nv_connector->type) {
 	case DCB_CONNECTOR_LVDS:
 	case DCB_CONNECTOR_LVDS_SPWG:
@@ -1250,23 +1347,6 @@ nouveau_connector_create(struct drm_device *dev, int index)
 		break;
 	}
 
-	/* scaling mode property */
-	switch (nv_connector->type) {
-	case DCB_CONNECTOR_TV_0:
-	case DCB_CONNECTOR_TV_1:
-	case DCB_CONNECTOR_TV_3:
-		break;
-	case DCB_CONNECTOR_VGA:
-		if (disp->disp.oclass < NV50_DISP)
-			break; /* can only scale on DFPs */
-		/* fall-through */
-	default:
-		drm_object_attach_property(&connector->base, dev->mode_config.
-					   scaling_mode_property,
-					   nv_connector->scaling_mode);
-		break;
-	}
-
 	/* dithering properties */
 	switch (nv_connector->type) {
 	case DCB_CONNECTOR_TV_0:
@@ -1275,20 +1355,7 @@ nouveau_connector_create(struct drm_device *dev, int index)
 	case DCB_CONNECTOR_VGA:
 		break;
 	default:
-		if (disp->dithering_mode) {
-			nv_connector->dithering_mode = DITHERING_MODE_AUTO;
-			drm_object_attach_property(&connector->base,
-						   disp->dithering_mode,
-						   nv_connector->
-						   dithering_mode);
-		}
-		if (disp->dithering_depth) {
-			nv_connector->dithering_depth = DITHERING_DEPTH_AUTO;
-			drm_object_attach_property(&connector->base,
-						   disp->dithering_depth,
-						   nv_connector->
-						   dithering_depth);
-		}
+		nv_connector->dithering_mode = DITHERING_MODE_AUTO;
 		break;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_connector.h b/drivers/gpu/drm/nouveau/nouveau_connector.h
index 7446ee66ea04..a4d1a059bd3d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_connector.h
+++ b/drivers/gpu/drm/nouveau/nouveau_connector.h
@@ -30,35 +30,12 @@
 #include <nvif/notify.h>
 
 #include <drm/drm_edid.h>
+#include <drm/drm_encoder.h>
 #include <drm/drm_dp_helper.h>
 #include "nouveau_crtc.h"
 
 struct nvkm_i2c_port;
 
-enum nouveau_underscan_type {
-	UNDERSCAN_OFF,
-	UNDERSCAN_ON,
-	UNDERSCAN_AUTO,
-};
-
-/* the enum values specifically defined here match nv50/nvd0 hw values, and
- * the code relies on this
- */
-enum nouveau_dithering_mode {
-	DITHERING_MODE_OFF = 0x00,
-	DITHERING_MODE_ON = 0x01,
-	DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON,
-	DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON,
-	DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON,
-	DITHERING_MODE_AUTO
-};
-
-enum nouveau_dithering_depth {
-	DITHERING_DEPTH_6BPC = 0x00,
-	DITHERING_DEPTH_8BPC = 0x02,
-	DITHERING_DEPTH_AUTO
-};
-
 struct nouveau_connector {
 	struct drm_connector base;
 	enum dcb_connector_type type;
@@ -70,12 +47,7 @@ struct nouveau_connector {
 	struct drm_dp_aux aux;
 
 	int dithering_mode;
-	int dithering_depth;
 	int scaling_mode;
-	bool scaling_full;
-	enum nouveau_underscan_type underscan;
-	u32 underscan_hborder;
-	u32 underscan_vborder;
 
 	struct nouveau_encoder *detected_encoder;
 	struct edid *edid;
@@ -109,5 +81,74 @@ nouveau_connector_create(struct drm_device *, int index);
 extern int nouveau_tv_disable;
 extern int nouveau_ignorelid;
 extern int nouveau_duallink;
+extern int nouveau_hdmimhz;
+
+#include <drm/drm_crtc.h>
+#define nouveau_conn_atom(p)                                                   \
+	container_of((p), struct nouveau_conn_atom, state)
+
+struct nouveau_conn_atom {
+	struct drm_connector_state state;
+
+	struct {
+		/* The enum values specifically defined here match nv50/gf119
+		 * hw values, and the code relies on this.
+		 */
+		enum {
+			DITHERING_MODE_OFF = 0x00,
+			DITHERING_MODE_ON = 0x01,
+			DITHERING_MODE_DYNAMIC2X2 = 0x10 | DITHERING_MODE_ON,
+			DITHERING_MODE_STATIC2X2 = 0x18 | DITHERING_MODE_ON,
+			DITHERING_MODE_TEMPORAL = 0x20 | DITHERING_MODE_ON,
+			DITHERING_MODE_AUTO
+		} mode;
+		enum {
+			DITHERING_DEPTH_6BPC = 0x00,
+			DITHERING_DEPTH_8BPC = 0x02,
+			DITHERING_DEPTH_AUTO
+		} depth;
+	} dither;
+
+	struct {
+		int mode;	/* DRM_MODE_SCALE_* */
+		struct {
+			enum {
+				UNDERSCAN_OFF,
+				UNDERSCAN_ON,
+				UNDERSCAN_AUTO,
+			} mode;
+			u32 hborder;
+			u32 vborder;
+		} underscan;
+		bool full;
+	} scaler;
+
+	struct {
+		int color_vibrance;
+		int vibrant_hue;
+	} procamp;
+
+	union {
+		struct {
+			bool dither:1;
+			bool scaler:1;
+			bool procamp:1;
+		};
+		u8 mask;
+	} set;
+};
 
+void nouveau_conn_attach_properties(struct drm_connector *);
+void nouveau_conn_reset(struct drm_connector *);
+struct drm_connector_state *
+nouveau_conn_atomic_duplicate_state(struct drm_connector *);
+void nouveau_conn_atomic_destroy_state(struct drm_connector *,
+				       struct drm_connector_state *);
+int nouveau_conn_atomic_set_property(struct drm_connector *,
+				     struct drm_connector_state *,
+				     struct drm_property *, u64);
+int nouveau_conn_atomic_get_property(struct drm_connector *,
+				     const struct drm_connector_state *,
+				     struct drm_property *, u64 *);
+struct drm_display_mode *nouveau_conn_native_mode(struct drm_connector *);
 #endif /* __NOUVEAU_CONNECTOR_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_crtc.h b/drivers/gpu/drm/nouveau/nouveau_crtc.h
index 863f10b8d818..050fcf30a0d2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_crtc.h
+++ b/drivers/gpu/drm/nouveau/nouveau_crtc.h
@@ -38,8 +38,6 @@ struct nouveau_crtc {
 	uint32_t dpms_saved_fp_control;
 	uint32_t fp_users;
 	int saturation;
-	int color_vibrance;
-	int vibrant_hue;
 	int sharpness;
 	int last_dpms;
 
@@ -54,7 +52,6 @@ struct nouveau_crtc {
 
 	struct {
 		struct nouveau_bo *nvbo;
-		bool visible;
 		uint32_t offset;
 		void (*set_offset)(struct nouveau_crtc *, uint32_t offset);
 		void (*set_pos)(struct nouveau_crtc *, int x, int y);
@@ -70,10 +67,6 @@ struct nouveau_crtc {
 		int depth;
 	} lut;
 
-	int (*set_dither)(struct nouveau_crtc *crtc, bool update);
-	int (*set_scale)(struct nouveau_crtc *crtc, bool update);
-	int (*set_color_vibrance)(struct nouveau_crtc *crtc, bool update);
-
 	void (*save)(struct drm_crtc *crtc);
 	void (*restore)(struct drm_crtc *crtc);
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_debugfs.c b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
index 411c12cdb249..fd64dfdc7d4f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_debugfs.c
+++ b/drivers/gpu/drm/nouveau/nouveau_debugfs.c
@@ -259,8 +259,9 @@ nouveau_debugfs_init(struct nouveau_drm *drm)
 	if (!drm->debugfs)
 		return -ENOMEM;
 
-	ret = nvif_object_init(&drm->device.object, 0, NVIF_CLASS_CONTROL,
-			       NULL, 0, &drm->debugfs->ctrl);
+	ret = nvif_object_init(&drm->client.device.object, 0,
+			       NVIF_CLASS_CONTROL, NULL, 0,
+			       &drm->debugfs->ctrl);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.c b/drivers/gpu/drm/nouveau/nouveau_display.c
index afbf557b23d4..72fdba1a1c5d 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.c
+++ b/drivers/gpu/drm/nouveau/nouveau_display.c
@@ -24,7 +24,10 @@
  *
  */
 
+#include <acpi/video.h>
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
 
 #include <nvif/class.h>
@@ -55,27 +58,30 @@ int
 nouveau_display_vblank_enable(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_crtc *crtc;
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-		if (nv_crtc->index == pipe) {
-			nvif_notify_get(&nv_crtc->vblank);
-			return 0;
-		}
-	}
-	return -EINVAL;
+	struct nouveau_crtc *nv_crtc;
+
+	crtc = drm_crtc_from_index(dev, pipe);
+	if (!crtc)
+		return -EINVAL;
+
+	nv_crtc = nouveau_crtc(crtc);
+	nvif_notify_get(&nv_crtc->vblank);
+
+	return 0;
 }
 
 void
 nouveau_display_vblank_disable(struct drm_device *dev, unsigned int pipe)
 {
 	struct drm_crtc *crtc;
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-		if (nv_crtc->index == pipe) {
-			nvif_notify_put(&nv_crtc->vblank);
-			return;
-		}
-	}
+	struct nouveau_crtc *nv_crtc;
+
+	crtc = drm_crtc_from_index(dev, pipe);
+	if (!crtc)
+		return;
+
+	nv_crtc = nouveau_crtc(crtc);
+	nvif_notify_put(&nv_crtc->vblank);
 }
 
 static inline int
@@ -92,7 +98,7 @@ calc(int blanks, int blanke, int total, int line)
 	return line;
 }
 
-int
+static int
 nouveau_display_scanoutpos_head(struct drm_crtc *crtc, int *vpos, int *hpos,
 				ktime_t *stime, ktime_t *etime)
 {
@@ -158,9 +164,13 @@ nouveau_display_vblstamp(struct drm_device *dev, unsigned int pipe,
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		if (nouveau_crtc(crtc)->index == pipe) {
+			struct drm_display_mode *mode;
+			if (drm_drv_uses_atomic_modeset(dev))
+				mode = &crtc->state->adjusted_mode;
+			else
+				mode = &crtc->hwmode;
 			return drm_calc_vbltimestamp_from_scanoutpos(dev,
-					pipe, max_error, time, flags,
-					&crtc->hwmode);
+					pipe, max_error, time, flags, mode);
 		}
 	}
 
@@ -217,10 +227,6 @@ static void
 nouveau_user_framebuffer_destroy(struct drm_framebuffer *drm_fb)
 {
 	struct nouveau_framebuffer *fb = nouveau_framebuffer(drm_fb);
-	struct nouveau_display *disp = nouveau_display(drm_fb->dev);
-
-	if (disp->fb_dtor)
-		disp->fb_dtor(drm_fb);
 
 	if (fb->nvbo)
 		drm_gem_object_unreference_unlocked(&fb->nvbo->gem);
@@ -245,57 +251,45 @@ static const struct drm_framebuffer_funcs nouveau_framebuffer_funcs = {
 };
 
 int
-nouveau_framebuffer_init(struct drm_device *dev,
-			 struct nouveau_framebuffer *nv_fb,
-			 const struct drm_mode_fb_cmd2 *mode_cmd,
-			 struct nouveau_bo *nvbo)
+nouveau_framebuffer_new(struct drm_device *dev,
+			const struct drm_mode_fb_cmd2 *mode_cmd,
+			struct nouveau_bo *nvbo,
+			struct nouveau_framebuffer **pfb)
 {
-	struct nouveau_display *disp = nouveau_display(dev);
-	struct drm_framebuffer *fb = &nv_fb->base;
+	struct nouveau_framebuffer *fb;
 	int ret;
 
-	drm_helper_mode_fill_fb_struct(fb, mode_cmd);
-	nv_fb->nvbo = nvbo;
-
-	ret = drm_framebuffer_init(dev, fb, &nouveau_framebuffer_funcs);
-	if (ret)
-		return ret;
+	if (!(fb = *pfb = kzalloc(sizeof(*fb), GFP_KERNEL)))
+		return -ENOMEM;
 
-	if (disp->fb_ctor) {
-		ret = disp->fb_ctor(fb);
-		if (ret)
-			disp->fb_dtor(fb);
-	}
+	drm_helper_mode_fill_fb_struct(dev, &fb->base, mode_cmd);
+	fb->nvbo = nvbo;
 
+	ret = drm_framebuffer_init(dev, &fb->base, &nouveau_framebuffer_funcs);
+	if (ret)
+		kfree(fb);
 	return ret;
 }
 
-static struct drm_framebuffer *
+struct drm_framebuffer *
 nouveau_user_framebuffer_create(struct drm_device *dev,
 				struct drm_file *file_priv,
 				const struct drm_mode_fb_cmd2 *mode_cmd)
 {
-	struct nouveau_framebuffer *nouveau_fb;
+	struct nouveau_framebuffer *fb;
+	struct nouveau_bo *nvbo;
 	struct drm_gem_object *gem;
-	int ret = -ENOMEM;
+	int ret;
 
 	gem = drm_gem_object_lookup(file_priv, mode_cmd->handles[0]);
 	if (!gem)
 		return ERR_PTR(-ENOENT);
+	nvbo = nouveau_gem_object(gem);
 
-	nouveau_fb = kzalloc(sizeof(struct nouveau_framebuffer), GFP_KERNEL);
-	if (!nouveau_fb)
-		goto err_unref;
-
-	ret = nouveau_framebuffer_init(dev, nouveau_fb, mode_cmd, nouveau_gem_object(gem));
-	if (ret)
-		goto err;
-
-	return &nouveau_fb->base;
+	ret = nouveau_framebuffer_new(dev, mode_cmd, nvbo, &fb);
+	if (ret == 0)
+		return &fb->base;
 
-err:
-	kfree(nouveau_fb);
-err_unref:
 	drm_gem_object_unreference_unlocked(gem);
 	return ERR_PTR(ret);
 }
@@ -358,6 +352,55 @@ static struct nouveau_drm_prop_enum_list dither_depth[] = {
 	}                                                                      \
 } while(0)
 
+static void
+nouveau_display_hpd_work(struct work_struct *work)
+{
+	struct nouveau_drm *drm = container_of(work, typeof(*drm), hpd_work);
+
+	pm_runtime_get_sync(drm->dev->dev);
+
+	drm_helper_hpd_irq_event(drm->dev);
+
+	pm_runtime_mark_last_busy(drm->dev->dev);
+	pm_runtime_put_sync(drm->dev->dev);
+}
+
+#ifdef CONFIG_ACPI
+
+/*
+ * Hans de Goede: This define belongs in acpi/video.h, I've submitted a patch
+ * to the acpi subsys to move it there from drivers/acpi/acpi_video.c .
+ * This should be dropped once that is merged.
+ */
+#ifndef ACPI_VIDEO_NOTIFY_PROBE
+#define ACPI_VIDEO_NOTIFY_PROBE			0x81
+#endif
+
+static int
+nouveau_display_acpi_ntfy(struct notifier_block *nb, unsigned long val,
+			  void *data)
+{
+	struct nouveau_drm *drm = container_of(nb, typeof(*drm), acpi_nb);
+	struct acpi_bus_event *info = data;
+
+	if (!strcmp(info->device_class, ACPI_VIDEO_CLASS)) {
+		if (info->type == ACPI_VIDEO_NOTIFY_PROBE) {
+			/*
+			 * This may be the only indication we receive of a
+			 * connector hotplug on a runtime suspended GPU,
+			 * schedule hpd_work to check.
+			 */
+			schedule_work(&drm->hpd_work);
+
+			/* acpi-video should not generate keypresses for this */
+			return NOTIFY_BAD;
+		}
+	}
+
+	return NOTIFY_DONE;
+}
+#endif
+
 int
 nouveau_display_init(struct drm_device *dev)
 {
@@ -371,7 +414,8 @@ nouveau_display_init(struct drm_device *dev)
 		return ret;
 
 	/* enable polling for external displays */
-	drm_kms_helper_poll_enable(dev);
+	if (!dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(dev);
 
 	/* enable hotplug interrupts */
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
@@ -385,16 +429,19 @@ nouveau_display_init(struct drm_device *dev)
 }
 
 void
-nouveau_display_fini(struct drm_device *dev)
+nouveau_display_fini(struct drm_device *dev, bool suspend)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct drm_connector *connector;
-	int head;
+	struct drm_crtc *crtc;
+
+	if (!suspend)
+		drm_crtc_force_disable_all(dev);
 
 	/* Make sure that drm and hw vblank irqs get properly disabled. */
-	for (head = 0; head < dev->mode_config.num_crtc; head++)
-		drm_vblank_off(dev, head);
+	drm_for_each_crtc(crtc, dev)
+		drm_crtc_vblank_off(crtc);
 
 	/* disable flip completion events */
 	nvif_notify_put(&drm->flip);
@@ -449,7 +496,7 @@ int
 nouveau_display_create(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nouveau_display *disp;
 	int ret;
 
@@ -466,15 +513,15 @@ nouveau_display_create(struct drm_device *dev)
 
 	dev->mode_config.min_width = 0;
 	dev->mode_config.min_height = 0;
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_CELSIUS) {
 		dev->mode_config.max_width = 2048;
 		dev->mode_config.max_height = 2048;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 		dev->mode_config.max_width = 4096;
 		dev->mode_config.max_height = 4096;
 	} else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI) {
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI) {
 		dev->mode_config.max_width = 8192;
 		dev->mode_config.max_height = 8192;
 	} else {
@@ -485,7 +532,7 @@ nouveau_display_create(struct drm_device *dev)
 	dev->mode_config.preferred_depth = 24;
 	dev->mode_config.prefer_shadow = 1;
 
-	if (drm->device.info.chipset < 0x11)
+	if (drm->client.device.info.chipset < 0x11)
 		dev->mode_config.async_page_flip = false;
 	else
 		dev->mode_config.async_page_flip = true;
@@ -495,7 +542,7 @@ nouveau_display_create(struct drm_device *dev)
 
 	if (nouveau_modeset != 2 && drm->vbios.dcb.entries) {
 		static const u16 oclass[] = {
-			GP104_DISP,
+			GP102_DISP,
 			GP100_DISP,
 			GM200_DISP,
 			GM107_DISP,
@@ -512,7 +559,7 @@ nouveau_display_create(struct drm_device *dev)
 		int i;
 
 		for (i = 0, ret = -ENODEV; ret && i < ARRAY_SIZE(oclass); i++) {
-			ret = nvif_object_init(&drm->device.object, 0,
+			ret = nvif_object_init(&drm->client.device.object, 0,
 					       oclass[i], NULL, 0, &disp->disp);
 		}
 
@@ -530,6 +577,8 @@ nouveau_display_create(struct drm_device *dev)
 	if (ret)
 		goto disp_create_err;
 
+	drm_mode_config_reset(dev);
+
 	if (dev->mode_config.num_crtc) {
 		ret = nouveau_display_vblank_init(dev);
 		if (ret)
@@ -537,6 +586,12 @@ nouveau_display_create(struct drm_device *dev)
 	}
 
 	nouveau_backlight_init(dev);
+	INIT_WORK(&drm->hpd_work, nouveau_display_hpd_work);
+#ifdef CONFIG_ACPI
+	drm->acpi_nb.notifier_call = nouveau_display_acpi_ntfy;
+	register_acpi_notifier(&drm->acpi_nb);
+#endif
+
 	return 0;
 
 vblank_err:
@@ -552,11 +607,13 @@ nouveau_display_destroy(struct drm_device *dev)
 {
 	struct nouveau_display *disp = nouveau_display(dev);
 
+#ifdef CONFIG_ACPI
+	unregister_acpi_notifier(&nouveau_drm(dev)->acpi_nb);
+#endif
 	nouveau_backlight_exit(dev);
 	nouveau_display_vblank_fini(dev);
 
 	drm_kms_helper_poll_fini(dev);
-	drm_crtc_force_disable_all(dev);
 	drm_mode_config_cleanup(dev);
 
 	if (disp->dtor)
@@ -568,12 +625,138 @@ nouveau_display_destroy(struct drm_device *dev)
 	kfree(disp);
 }
 
+static int
+nouveau_atomic_disable_connector(struct drm_atomic_state *state,
+				 struct drm_connector *connector)
+{
+	struct drm_connector_state *connector_state;
+	struct drm_crtc *crtc;
+	struct drm_crtc_state *crtc_state;
+	struct drm_plane_state *plane_state;
+	struct drm_plane *plane;
+	int ret;
+
+	if (!(crtc = connector->state->crtc))
+		return 0;
+
+	connector_state = drm_atomic_get_connector_state(state, connector);
+	if (IS_ERR(connector_state))
+		return PTR_ERR(connector_state);
+
+	ret = drm_atomic_set_crtc_for_connector(connector_state, NULL);
+	if (ret)
+		return ret;
+
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state))
+		return PTR_ERR(crtc_state);
+
+	ret = drm_atomic_set_mode_for_crtc(crtc_state, NULL);
+	if (ret)
+		return ret;
+
+	crtc_state->active = false;
+
+	drm_for_each_plane_mask(plane, connector->dev, crtc_state->plane_mask) {
+		plane_state = drm_atomic_get_plane_state(state, plane);
+		if (IS_ERR(plane_state))
+			return PTR_ERR(plane_state);
+
+		ret = drm_atomic_set_crtc_for_plane(plane_state, NULL);
+		if (ret)
+			return ret;
+
+		drm_atomic_set_fb_for_plane(plane_state, NULL);
+	}
+
+	return 0;
+}
+
+static int
+nouveau_atomic_disable(struct drm_device *dev,
+		       struct drm_modeset_acquire_ctx *ctx)
+{
+	struct drm_atomic_state *state;
+	struct drm_connector *connector;
+	int ret;
+
+	state = drm_atomic_state_alloc(dev);
+	if (!state)
+		return -ENOMEM;
+
+	state->acquire_ctx = ctx;
+
+	drm_for_each_connector(connector, dev) {
+		ret = nouveau_atomic_disable_connector(state, connector);
+		if (ret)
+			break;
+	}
+
+	if (ret == 0)
+		ret = drm_atomic_commit(state);
+	drm_atomic_state_put(state);
+	return ret;
+}
+
+static struct drm_atomic_state *
+nouveau_atomic_suspend(struct drm_device *dev)
+{
+	struct drm_modeset_acquire_ctx ctx;
+	struct drm_atomic_state *state;
+	int ret;
+
+	drm_modeset_acquire_init(&ctx, 0);
+
+retry:
+	ret = drm_modeset_lock_all_ctx(dev, &ctx);
+	if (ret < 0) {
+		state = ERR_PTR(ret);
+		goto unlock;
+	}
+
+	state = drm_atomic_helper_duplicate_state(dev, &ctx);
+	if (IS_ERR(state))
+		goto unlock;
+
+	ret = nouveau_atomic_disable(dev, &ctx);
+	if (ret < 0) {
+		drm_atomic_state_put(state);
+		state = ERR_PTR(ret);
+		goto unlock;
+	}
+
+unlock:
+	if (PTR_ERR(state) == -EDEADLK) {
+		drm_modeset_backoff(&ctx);
+		goto retry;
+	}
+
+	drm_modeset_drop_locks(&ctx);
+	drm_modeset_acquire_fini(&ctx);
+	return state;
+}
+
 int
 nouveau_display_suspend(struct drm_device *dev, bool runtime)
 {
+	struct nouveau_display *disp = nouveau_display(dev);
 	struct drm_crtc *crtc;
 
-	nouveau_display_fini(dev);
+	if (drm_drv_uses_atomic_modeset(dev)) {
+		if (!runtime) {
+			disp->suspend = nouveau_atomic_suspend(dev);
+			if (IS_ERR(disp->suspend)) {
+				int ret = PTR_ERR(disp->suspend);
+				disp->suspend = NULL;
+				return ret;
+			}
+		}
+
+		nouveau_display_fini(dev, true);
+		return 0;
+	}
+
+	nouveau_display_fini(dev, true);
 
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_framebuffer *nouveau_fb;
@@ -600,9 +783,19 @@ nouveau_display_suspend(struct drm_device *dev, bool runtime)
 void
 nouveau_display_resume(struct drm_device *dev, bool runtime)
 {
+	struct nouveau_display *disp = nouveau_display(dev);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct drm_crtc *crtc;
-	int ret, head;
+	int ret;
+
+	if (drm_drv_uses_atomic_modeset(dev)) {
+		nouveau_display_init(dev);
+		if (disp->suspend) {
+			drm_atomic_helper_resume(dev, disp->suspend);
+			disp->suspend = NULL;
+		}
+		return;
+	}
 
 	/* re-pin fb/cursors */
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@@ -647,10 +840,6 @@ nouveau_display_resume(struct drm_device *dev, bool runtime)
 
 	drm_helper_resume_force_mode(dev);
 
-	/* Make sure that drm and hw vblank irqs get resumed if needed. */
-	for (head = 0; head < dev->mode_config.num_crtc; head++)
-		drm_vblank_on(dev, head);
-
 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
 		struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 
@@ -692,10 +881,7 @@ nouveau_page_flip_emit(struct nouveau_channel *chan,
 	if (ret)
 		goto fail;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI)
-		BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
-	else
-		BEGIN_NVC0(chan, FermiSw, NV_SW_PAGE_FLIP, 1);
+	BEGIN_NV04(chan, NvSubSw, NV_SW_PAGE_FLIP, 1);
 	OUT_RING  (chan, 0x00000000);
 	FIRE_RING (chan);
 
@@ -724,6 +910,8 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 	struct nouveau_channel *chan;
 	struct nouveau_cli *cli;
 	struct nouveau_fence *fence;
+	struct nv04_display *dispnv04 = nv04_display(dev);
+	int head = nouveau_crtc(crtc)->index;
 	int ret;
 
 	chan = drm->channel;
@@ -763,39 +951,30 @@ nouveau_crtc_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
 
 	/* Initialize a page flip struct */
 	*s = (struct nouveau_page_flip_state)
-		{ { }, event, crtc, fb->bits_per_pixel, fb->pitches[0],
+		{ { }, event, crtc, fb->format->cpp[0] * 8, fb->pitches[0],
 		  new_bo->bo.offset };
 
 	/* Keep vblanks on during flip, for the target crtc of this flip */
 	drm_crtc_vblank_get(crtc);
 
 	/* Emit a page flip */
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nv50_display_flip_next(crtc, fb, chan, swap_interval);
+	if (swap_interval) {
+		ret = RING_SPACE(chan, 8);
 		if (ret)
 			goto fail_unreserve;
-	} else {
-		struct nv04_display *dispnv04 = nv04_display(dev);
-		int head = nouveau_crtc(crtc)->index;
-
-		if (swap_interval) {
-			ret = RING_SPACE(chan, 8);
-			if (ret)
-				goto fail_unreserve;
-
-			BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1);
-			OUT_RING  (chan, 0);
-			BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1);
-			OUT_RING  (chan, head);
-			BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1);
-			OUT_RING  (chan, 0);
-			BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1);
-			OUT_RING  (chan, 0);
-		}
 
-		nouveau_bo_ref(new_bo, &dispnv04->image[head]);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x012c, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0134, 1);
+		OUT_RING  (chan, head);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0100, 1);
+		OUT_RING  (chan, 0);
+		BEGIN_NV04(chan, NvSubImageBlit, 0x0130, 1);
+		OUT_RING  (chan, 0);
 	}
 
+	nouveau_bo_ref(new_bo, &dispnv04->image[head]);
+
 	ret = nouveau_page_flip_emit(chan, old_bo, new_bo, s, &fence);
 	if (ret)
 		goto fail_unreserve;
@@ -843,16 +1022,8 @@ nouveau_finish_page_flip(struct nouveau_channel *chan,
 
 	s = list_first_entry(&fctx->flip, struct nouveau_page_flip_state, head);
 	if (s->event) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
-			drm_crtc_arm_vblank_event(s->crtc, s->event);
-		} else {
-			drm_crtc_send_vblank_event(s->crtc, s->event);
-
-			/* Give up ownership of vblank for page-flipped crtc */
-			drm_crtc_vblank_put(s->crtc);
-		}
-	}
-	else {
+		drm_crtc_arm_vblank_event(s->crtc, s->event);
+	} else {
 		/* Give up ownership of vblank for page-flipped crtc */
 		drm_crtc_vblank_put(s->crtc);
 	}
@@ -874,12 +1045,10 @@ nouveau_flip_complete(struct nvif_notify *notify)
 	struct nouveau_page_flip_state state;
 
 	if (!nouveau_finish_page_flip(chan, &state)) {
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
-			nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc),
-					 state.offset + state.crtc->y *
-					 state.pitch + state.crtc->x *
-					 state.bpp / 8);
-		}
+		nv_set_crtc_base(drm->dev, drm_crtc_index(state.crtc),
+				 state.offset + state.crtc->y *
+				 state.pitch + state.crtc->x *
+				 state.bpp / 8);
 	}
 
 	return NVIF_NOTIFY_KEEP;
@@ -889,6 +1058,7 @@ int
 nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 			    struct drm_mode_create_dumb *args)
 {
+	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	struct nouveau_bo *bo;
 	uint32_t domain;
 	int ret;
@@ -898,12 +1068,12 @@ nouveau_display_dumb_create(struct drm_file *file_priv, struct drm_device *dev,
 	args->size = roundup(args->size, PAGE_SIZE);
 
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	if (nouveau_drm(dev)->device.info.ram_size != 0)
+	if (nouveau_drm(dev)->client.device.info.ram_size != 0)
 		domain = NOUVEAU_GEM_DOMAIN_VRAM;
 	else
 		domain = NOUVEAU_GEM_DOMAIN_GART;
 
-	ret = nouveau_gem_new(dev, args->size, 0, domain, 0, 0, &bo);
+	ret = nouveau_gem_new(cli, args->size, 0, domain, 0, 0, &bo);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_display.h b/drivers/gpu/drm/nouveau/nouveau_display.h
index 0420ee861ea4..4a75df06c139 100644
--- a/drivers/gpu/drm/nouveau/nouveau_display.h
+++ b/drivers/gpu/drm/nouveau/nouveau_display.h
@@ -22,8 +22,9 @@ nouveau_framebuffer(struct drm_framebuffer *fb)
 	return container_of(fb, struct nouveau_framebuffer, base);
 }
 
-int nouveau_framebuffer_init(struct drm_device *, struct nouveau_framebuffer *,
-			     const struct drm_mode_fb_cmd2 *, struct nouveau_bo *);
+int nouveau_framebuffer_new(struct drm_device *,
+			    const struct drm_mode_fb_cmd2 *,
+			    struct nouveau_bo *, struct nouveau_framebuffer **);
 
 struct nouveau_page_flip_state {
 	struct list_head head;
@@ -39,9 +40,6 @@ struct nouveau_display {
 	int  (*init)(struct drm_device *);
 	void (*fini)(struct drm_device *);
 
-	int  (*fb_ctor)(struct drm_framebuffer *);
-	void (*fb_dtor)(struct drm_framebuffer *);
-
 	struct nvif_object disp;
 
 	struct drm_property *dithering_mode;
@@ -52,6 +50,8 @@ struct nouveau_display {
 	/* not really hue and saturation: */
 	struct drm_property *vibrant_hue_property;
 	struct drm_property *color_vibrance_property;
+
+	struct drm_atomic_state *suspend;
 };
 
 static inline struct nouveau_display *
@@ -63,7 +63,7 @@ nouveau_display(struct drm_device *dev)
 int  nouveau_display_create(struct drm_device *dev);
 void nouveau_display_destroy(struct drm_device *dev);
 int  nouveau_display_init(struct drm_device *dev);
-void nouveau_display_fini(struct drm_device *dev);
+void nouveau_display_fini(struct drm_device *dev, bool suspend);
 int  nouveau_display_suspend(struct drm_device *dev, bool runtime);
 void nouveau_display_resume(struct drm_device *dev, bool runtime);
 int  nouveau_display_vblank_enable(struct drm_device *, unsigned int);
@@ -91,6 +91,8 @@ int nouveau_crtc_set_config(struct drm_mode_set *set);
 #ifdef CONFIG_DRM_NOUVEAU_BACKLIGHT
 extern int nouveau_backlight_init(struct drm_device *);
 extern void nouveau_backlight_exit(struct drm_device *);
+extern void nouveau_backlight_ctor(void);
+extern void nouveau_backlight_dtor(void);
 #else
 static inline int
 nouveau_backlight_init(struct drm_device *dev)
@@ -101,6 +103,17 @@ nouveau_backlight_init(struct drm_device *dev)
 static inline void
 nouveau_backlight_exit(struct drm_device *dev) {
 }
+
+static inline void
+nouveau_backlight_ctor(void) {
+}
+
+static inline void
+nouveau_backlight_dtor(void) {
+}
 #endif
 
+struct drm_framebuffer *
+nouveau_user_framebuffer_create(struct drm_device *, struct drm_file *,
+				const struct drm_mode_fb_cmd2 *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_dp.c b/drivers/gpu/drm/nouveau/nouveau_dp.c
index 87d52d36f4fc..0d052e1660f8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dp.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dp.c
@@ -30,6 +30,13 @@
 #include "nouveau_encoder.h"
 #include "nouveau_crtc.h"
 
+#include <nvif/class.h>
+#include <nvif/cl5070.h>
+
+MODULE_PARM_DESC(mst, "Enable DisplayPort multi-stream (default: enabled)");
+static int nouveau_mst = 1;
+module_param_named(mst, nouveau_mst, int, 0400);
+
 static void
 nouveau_dp_probe_oui(struct drm_device *dev, struct nvkm_i2c_aux *aux, u8 *dpcd)
 {
@@ -55,14 +62,14 @@ nouveau_dp_detect(struct nouveau_encoder *nv_encoder)
 	struct drm_device *dev = nv_encoder->base.base.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nvkm_i2c_aux *aux;
-	u8 *dpcd = nv_encoder->dp.dpcd;
+	u8 dpcd[8];
 	int ret;
 
 	aux = nv_encoder->aux;
 	if (!aux)
 		return -ENODEV;
 
-	ret = nvkm_rdaux(aux, DP_DPCD_REV, dpcd, 8);
+	ret = nvkm_rdaux(aux, DP_DPCD_REV, dpcd, sizeof(dpcd));
 	if (ret)
 		return ret;
 
@@ -84,5 +91,11 @@ nouveau_dp_detect(struct nouveau_encoder *nv_encoder)
 		     nv_encoder->dp.link_nr, nv_encoder->dp.link_bw);
 
 	nouveau_dp_probe_oui(dev, aux, dpcd);
-	return 0;
+
+	ret = nv50_mstm_detect(nv_encoder->dp.mstm, dpcd, nouveau_mst);
+	if (ret == 1)
+		return NOUVEAU_DP_MST;
+	if (ret == 0)
+		return NOUVEAU_DP_SST;
+	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nouveau_drm.c b/drivers/gpu/drm/nouveau/nouveau_drm.c
index 3100fd88a015..468ed1d3bb26 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_drm.c
@@ -37,6 +37,8 @@
 #include <core/pci.h>
 #include <core/tegra.h>
 
+#include <nvif/driver.h>
+
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
 #include <nvif/cla06f.h>
@@ -47,6 +49,7 @@
 #include "nouveau_ttm.h"
 #include "nouveau_gem.h"
 #include "nouveau_vga.h"
+#include "nouveau_led.h"
 #include "nouveau_hwmon.h"
 #include "nouveau_acpi.h"
 #include "nouveau_bios.h"
@@ -108,35 +111,53 @@ nouveau_name(struct drm_device *dev)
 		return nouveau_platform_name(dev->platformdev);
 }
 
+static void
+nouveau_cli_fini(struct nouveau_cli *cli)
+{
+	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
+	usif_client_fini(cli);
+	nvif_device_fini(&cli->device);
+	nvif_client_fini(&cli->base);
+}
+
 static int
-nouveau_cli_create(struct drm_device *dev, const char *sname,
-		   int size, void **pcli)
+nouveau_cli_init(struct nouveau_drm *drm, const char *sname,
+		 struct nouveau_cli *cli)
 {
-	struct nouveau_cli *cli = *pcli = kzalloc(size, GFP_KERNEL);
+	u64 device = nouveau_name(drm->dev);
 	int ret;
-	if (cli) {
-		snprintf(cli->name, sizeof(cli->name), "%s", sname);
-		cli->dev = dev;
 
-		ret = nvif_client_init(NULL, cli->name, nouveau_name(dev),
-				       nouveau_config, nouveau_debug,
+	snprintf(cli->name, sizeof(cli->name), "%s", sname);
+	cli->dev = drm->dev;
+	mutex_init(&cli->mutex);
+	usif_client_init(cli);
+
+	if (cli == &drm->client) {
+		ret = nvif_driver_init(NULL, nouveau_config, nouveau_debug,
+				       cli->name, device, &cli->base);
+	} else {
+		ret = nvif_client_init(&drm->client.base, cli->name, device,
 				       &cli->base);
-		if (ret == 0) {
-			mutex_init(&cli->mutex);
-			usif_client_init(cli);
-		}
-		return ret;
 	}
-	return -ENOMEM;
-}
+	if (ret) {
+		NV_ERROR(drm, "Client allocation failed: %d\n", ret);
+		goto done;
+	}
 
-static void
-nouveau_cli_destroy(struct nouveau_cli *cli)
-{
-	nvkm_vm_ref(NULL, &nvxx_client(&cli->base)->vm, NULL);
-	nvif_client_fini(&cli->base);
-	usif_client_fini(cli);
-	kfree(cli);
+	ret = nvif_device_init(&cli->base.object, 0, NV_DEVICE,
+			       &(struct nv_device_v0) {
+					.device = ~0,
+			       }, sizeof(struct nv_device_v0),
+			       &cli->device);
+	if (ret) {
+		NV_ERROR(drm, "Device allocation failed: %d\n", ret);
+		goto done;
+	}
+
+done:
+	if (ret)
+		nouveau_cli_fini(cli);
+	return ret;
 }
 
 static void
@@ -160,7 +181,7 @@ nouveau_accel_fini(struct nouveau_drm *drm)
 static void
 nouveau_accel_init(struct nouveau_drm *drm)
 {
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nvif_sclass *sclass;
 	u32 arg0, arg1;
 	int ret, i, n;
@@ -214,7 +235,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family >= NV_DEVICE_INFO_V0_KEPLER) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NVA06F_V0_ENGINE_CE0 |
 					  NVA06F_V0_ENGINE_CE1,
 					  0, &drm->cechan);
@@ -227,7 +248,7 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	if (device->info.chipset >= 0xa3 &&
 	    device->info.chipset != 0xaa &&
 	    device->info.chipset != 0xac) {
-		ret = nouveau_channel_new(drm, &drm->device,
+		ret = nouveau_channel_new(drm, &drm->client.device,
 					  NvDmaFB, NvDmaTT, &drm->cechan);
 		if (ret)
 			NV_ERROR(drm, "failed to create ce channel, %d\n", ret);
@@ -239,7 +260,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 		arg1 = NvDmaTT;
 	}
 
-	ret = nouveau_channel_new(drm, &drm->device, arg0, arg1, &drm->channel);
+	ret = nouveau_channel_new(drm, &drm->client.device,
+				  arg0, arg1, &drm->channel);
 	if (ret) {
 		NV_ERROR(drm, "failed to create kernel channel, %d\n", ret);
 		nouveau_accel_fini(drm);
@@ -279,8 +301,8 @@ nouveau_accel_init(struct nouveau_drm *drm)
 	}
 
 	if (device->info.family < NV_DEVICE_INFO_V0_FERMI) {
-		ret = nvkm_gpuobj_new(nvxx_device(&drm->device), 32, 0, false,
-				      NULL, &drm->notify);
+		ret = nvkm_gpuobj_new(nvxx_device(&drm->client.device), 32, 0,
+				      false, NULL, &drm->notify);
 		if (ret) {
 			NV_ERROR(drm, "failed to allocate notifier, %d\n", ret);
 			nouveau_accel_fini(drm);
@@ -406,12 +428,17 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	struct nouveau_drm *drm;
 	int ret;
 
-	ret = nouveau_cli_create(dev, "DRM", sizeof(*drm), (void **)&drm);
+	if (!(drm = kzalloc(sizeof(*drm), GFP_KERNEL)))
+		return -ENOMEM;
+	dev->dev_private = drm;
+	drm->dev = dev;
+
+	ret = nouveau_cli_init(drm, "DRM", &drm->client);
 	if (ret)
 		return ret;
 
-	dev->dev_private = drm;
-	drm->dev = dev;
+	dev->irq_enabled = true;
+
 	nvxx_client(&drm->client.base)->debug =
 		nvkm_dbgopt(nouveau_debug, "DRM");
 
@@ -420,33 +447,24 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 
 	nouveau_get_hdmi_dev(drm);
 
-	ret = nvif_device_init(&drm->client.base.object, 0, NV_DEVICE,
-			       &(struct nv_device_v0) {
-					.device = ~0,
-			       }, sizeof(struct nv_device_v0),
-			       &drm->device);
-	if (ret)
-		goto fail_device;
-
-	dev->irq_enabled = true;
-
 	/* workaround an odd issue on nvc1 by disabling the device's
 	 * nosnoop capability.  hopefully won't cause issues until a
 	 * better fix is found - assuming there is one...
 	 */
-	if (drm->device.info.chipset == 0xc1)
-		nvif_mask(&drm->device.object, 0x00088080, 0x00000800, 0x00000000);
+	if (drm->client.device.info.chipset == 0xc1)
+		nvif_mask(&drm->client.device.object, 0x00088080, 0x00000800, 0x00000000);
 
 	nouveau_vga_init(drm);
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		if (!nvxx_device(&drm->device)->mmu) {
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		if (!nvxx_device(&drm->client.device)->mmu) {
 			ret = -ENOSYS;
 			goto fail_device;
 		}
 
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &drm->client.vm);
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device),
+				  0, (1ULL << 40), 0x1000, NULL,
+				  &drm->client.vm);
 		if (ret)
 			goto fail_device;
 
@@ -475,6 +493,7 @@ nouveau_drm_load(struct drm_device *dev, unsigned long flags)
 	nouveau_hwmon_init(dev);
 	nouveau_accel_init(drm);
 	nouveau_fbcon_init(dev);
+	nouveau_led_init(dev);
 
 	if (nouveau_runtime_pm != 0) {
 		pm_runtime_use_autosuspend(dev->dev);
@@ -495,12 +514,12 @@ fail_bios:
 fail_ttm:
 	nouveau_vga_fini(drm);
 fail_device:
-	nvif_device_fini(&drm->device);
-	nouveau_cli_destroy(&drm->client);
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 	return ret;
 }
 
-static int
+static void
 nouveau_drm_unload(struct drm_device *dev)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
@@ -510,13 +529,14 @@ nouveau_drm_unload(struct drm_device *dev)
 		pm_runtime_forbid(dev->dev);
 	}
 
+	nouveau_led_fini(dev);
 	nouveau_fbcon_fini(dev);
 	nouveau_accel_fini(drm);
 	nouveau_hwmon_fini(dev);
 	nouveau_debugfs_fini(drm);
 
 	if (dev->mode_config.num_crtc)
-		nouveau_display_fini(dev);
+		nouveau_display_fini(dev, false);
 	nouveau_display_destroy(dev);
 
 	nouveau_bios_takedown(dev);
@@ -524,11 +544,10 @@ nouveau_drm_unload(struct drm_device *dev)
 	nouveau_ttm_fini(drm);
 	nouveau_vga_fini(drm);
 
-	nvif_device_fini(&drm->device);
 	if (drm->hdmi_device)
 		pci_dev_put(drm->hdmi_device);
-	nouveau_cli_destroy(&drm->client);
-	return 0;
+	nouveau_cli_fini(&drm->client);
+	kfree(drm);
 }
 
 void
@@ -558,9 +577,10 @@ static int
 nouveau_do_suspend(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 	int ret;
 
+	nouveau_led_suspend(dev);
+
 	if (dev->mode_config.num_crtc) {
 		NV_INFO(drm, "suspending console...\n");
 		nouveau_fbcon_set_suspend(dev, 1);
@@ -586,7 +606,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 			goto fail_display;
 	}
 
-	NV_INFO(drm, "suspending client object trees...\n");
+	NV_INFO(drm, "suspending fence...\n");
 	if (drm->fence && nouveau_fence(drm)->suspend) {
 		if (!nouveau_fence(drm)->suspend(drm)) {
 			ret = -ENOMEM;
@@ -594,13 +614,7 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 		}
 	}
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		ret = nvif_client_suspend(&cli->base);
-		if (ret)
-			goto fail_client;
-	}
-
-	NV_INFO(drm, "suspending kernel object tree...\n");
+	NV_INFO(drm, "suspending object tree...\n");
 	ret = nvif_client_suspend(&drm->client.base);
 	if (ret)
 		goto fail_client;
@@ -608,10 +622,6 @@ nouveau_do_suspend(struct drm_device *dev, bool runtime)
 	return 0;
 
 fail_client:
-	list_for_each_entry_continue_reverse(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
@@ -627,19 +637,14 @@ static int
 nouveau_do_resume(struct drm_device *dev, bool runtime)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nouveau_cli *cli;
 
-	NV_INFO(drm, "resuming kernel object tree...\n");
+	NV_INFO(drm, "resuming object tree...\n");
 	nvif_client_resume(&drm->client.base);
 
-	NV_INFO(drm, "resuming client object trees...\n");
+	NV_INFO(drm, "resuming fence...\n");
 	if (drm->fence && nouveau_fence(drm)->resume)
 		nouveau_fence(drm)->resume(drm);
 
-	list_for_each_entry(cli, &drm->clients, head) {
-		nvif_client_resume(&cli->base);
-	}
-
 	nouveau_run_vbios_init(dev);
 
 	if (dev->mode_config.num_crtc) {
@@ -649,6 +654,8 @@ nouveau_do_resume(struct drm_device *dev, bool runtime)
 		nouveau_fbcon_set_suspend(dev, 0);
 	}
 
+	nouveau_led_resume(dev);
+
 	return 0;
 }
 
@@ -692,7 +699,12 @@ nouveau_pmops_resume(struct device *dev)
 		return ret;
 	pci_set_master(pdev);
 
-	return nouveau_do_resume(drm_dev, false);
+	ret = nouveau_do_resume(drm_dev, false);
+
+	/* Monitors may have been connected / disconnected during suspend */
+	schedule_work(&nouveau_drm(drm_dev)->hpd_work);
+
+	return ret;
 }
 
 static int
@@ -747,7 +759,7 @@ nouveau_pmops_runtime_resume(struct device *dev)
 {
 	struct pci_dev *pdev = to_pci_dev(dev);
 	struct drm_device *drm_dev = pci_get_drvdata(pdev);
-	struct nvif_device *device = &nouveau_drm(drm_dev)->device;
+	struct nvif_device *device = &nouveau_drm(drm_dev)->client.device;
 	int ret;
 
 	if (nouveau_runtime_pm == 0)
@@ -761,11 +773,18 @@ nouveau_pmops_runtime_resume(struct device *dev)
 	pci_set_master(pdev);
 
 	ret = nouveau_do_resume(drm_dev, true);
-	drm_kms_helper_poll_enable(drm_dev);
+
+	if (!drm_dev->mode_config.poll_enabled)
+		drm_kms_helper_poll_enable(drm_dev);
+
 	/* do magic */
 	nvif_mask(&device->object, 0x088488, (1 << 25), (1 << 25));
 	vga_switcheroo_set_dynamic_switch(pdev, VGA_SWITCHEROO_ON);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_ON;
+
+	/* Monitors may have been connected / disconnected during suspend */
+	schedule_work(&nouveau_drm(drm_dev)->hpd_work);
+
 	return ret;
 }
 
@@ -826,20 +845,20 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	get_task_comm(tmpname, current);
 	snprintf(name, sizeof(name), "%s[%d]", tmpname, pid_nr(fpriv->pid));
 
-	ret = nouveau_cli_create(dev, name, sizeof(*cli), (void **)&cli);
+	if (!(cli = kzalloc(sizeof(*cli), GFP_KERNEL)))
+		return ret;
 
+	ret = nouveau_cli_init(drm, name, cli);
 	if (ret)
-		goto out_suspend;
+		goto done;
 
 	cli->base.super = false;
 
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nvkm_vm_new(nvxx_device(&drm->device), 0, (1ULL << 40),
-				  0x1000, NULL, &cli->vm);
-		if (ret) {
-			nouveau_cli_destroy(cli);
-			goto out_suspend;
-		}
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA) {
+		ret = nvkm_vm_new(nvxx_device(&drm->client.device), 0,
+				  (1ULL << 40), 0x1000, NULL, &cli->vm);
+		if (ret)
+			goto done;
 
 		nvxx_client(&cli->base)->vm = cli->vm;
 	}
@@ -850,10 +869,14 @@ nouveau_drm_open(struct drm_device *dev, struct drm_file *fpriv)
 	list_add(&cli->head, &drm->clients);
 	mutex_unlock(&drm->client.mutex);
 
-out_suspend:
+done:
+	if (ret && cli) {
+		nouveau_cli_fini(cli);
+		kfree(cli);
+	}
+
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
-
 	return ret;
 }
 
@@ -880,7 +903,8 @@ static void
 nouveau_drm_postclose(struct drm_device *dev, struct drm_file *fpriv)
 {
 	struct nouveau_cli *cli = nouveau_cli(fpriv);
-	nouveau_cli_destroy(cli);
+	nouveau_cli_fini(cli);
+	kfree(cli);
 	pm_runtime_mark_last_busy(dev->dev);
 	pm_runtime_put_autosuspend(dev->dev);
 }
@@ -1030,6 +1054,7 @@ static void nouveau_display_options(void)
 	DRM_DEBUG_DRIVER("... modeset      : %d\n", nouveau_modeset);
 	DRM_DEBUG_DRIVER("... runpm        : %d\n", nouveau_runtime_pm);
 	DRM_DEBUG_DRIVER("... vram_pushbuf : %d\n", nouveau_vram_pushbuf);
+	DRM_DEBUG_DRIVER("... hdmimhz      : %d\n", nouveau_hdmimhz);
 }
 
 static const struct dev_pm_ops nouveau_pm_ops = {
@@ -1105,6 +1130,7 @@ nouveau_drm_init(void)
 #endif
 
 	nouveau_register_dsm_handler();
+	nouveau_backlight_ctor();
 	return drm_pci_init(&driver_pci, &nouveau_drm_pci_driver);
 }
 
@@ -1115,6 +1141,7 @@ nouveau_drm_exit(void)
 		return;
 
 	drm_pci_exit(&driver_pci, &nouveau_drm_pci_driver);
+	nouveau_backlight_dtor();
 	nouveau_unregister_dsm_handler();
 
 #ifdef CONFIG_NOUVEAU_PLATFORM_DRIVER
diff --git a/drivers/gpu/drm/nouveau/nouveau_drv.h b/drivers/gpu/drm/nouveau/nouveau_drv.h
index 822a0212cd48..eadec2f49ad3 100644
--- a/drivers/gpu/drm/nouveau/nouveau_drv.h
+++ b/drivers/gpu/drm/nouveau/nouveau_drv.h
@@ -37,6 +37,8 @@
  *      - implemented limited ABI16/NVIF interop
  */
 
+#include <linux/notifier.h>
+
 #include <nvif/client.h>
 #include <nvif/device.h>
 #include <nvif/ioctl.h>
@@ -84,14 +86,17 @@ enum nouveau_drm_handle {
 
 struct nouveau_cli {
 	struct nvif_client base;
+	struct drm_device *dev;
+	struct mutex mutex;
+
+	struct nvif_device device;
+
 	struct nvkm_vm *vm; /*XXX*/
 	struct list_head head;
-	struct mutex mutex;
 	void *abi16;
 	struct list_head objects;
 	struct list_head notifys;
 	char name[32];
-	struct drm_device *dev;
 };
 
 static inline struct nouveau_cli *
@@ -109,7 +114,6 @@ struct nouveau_drm {
 	struct nouveau_cli client;
 	struct drm_device *dev;
 
-	struct nvif_device device;
 	struct list_head clients;
 
 	struct {
@@ -161,11 +165,21 @@ struct nouveau_drm {
 	struct nvbios vbios;
 	struct nouveau_display *display;
 	struct backlight_device *backlight;
+	struct list_head bl_connectors;
+	struct work_struct hpd_work;
+	struct work_struct fbcon_work;
+	int fbcon_new_state;
+#ifdef CONFIG_ACPI
+	struct notifier_block acpi_nb;
+#endif
 
 	/* power management */
 	struct nouveau_hwmon *hwmon;
 	struct nouveau_debugfs *debugfs;
 
+	/* led management */
+	struct nouveau_led *led;
+
 	/* display power reference */
 	bool have_disp_power_ref;
 
@@ -201,6 +215,10 @@ void nouveau_drm_device_remove(struct drm_device *dev);
 	if (unlikely(drm_debug & DRM_UT_DRIVER))                               \
 		NV_PRINTK(info, &(drm)->client, f, ##a);                       \
 } while(0)
+#define NV_ATOMIC(drm,f,a...) do {                                             \
+	if (unlikely(drm_debug & DRM_UT_ATOMIC))                               \
+		NV_PRINTK(info, &(drm)->client, f, ##a);                       \
+} while(0)
 
 extern int nouveau_modeset;
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_encoder.h b/drivers/gpu/drm/nouveau/nouveau_encoder.h
index ee6a6d3fc80f..198e5f27682f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_encoder.h
+++ b/drivers/gpu/drm/nouveau/nouveau_encoder.h
@@ -30,6 +30,7 @@
 #include <subdev/bios/dcb.h>
 
 #include <drm/drm_encoder_slave.h>
+#include <drm/drm_dp_mst_helper.h>
 #include "dispnv04/disp.h"
 
 #define NV_DPMS_CLEARED 0x80
@@ -57,15 +58,16 @@ struct nouveau_encoder {
 
 	union {
 		struct {
-			u8  dpcd[8];
+			struct nv50_mstm *mstm;
 			int link_nr;
 			int link_bw;
-			u32 datarate;
 		} dp;
 	};
 
 	void (*enc_save)(struct drm_encoder *encoder);
 	void (*enc_restore)(struct drm_encoder *encoder);
+	void (*update)(struct nouveau_encoder *, u8 head,
+		       struct drm_display_mode *, u8 proto, u8 depth);
 };
 
 struct nouveau_encoder *
@@ -90,9 +92,17 @@ get_slave_funcs(struct drm_encoder *enc)
 }
 
 /* nouveau_dp.c */
+enum nouveau_dp_status {
+	NOUVEAU_DP_SST,
+	NOUVEAU_DP_MST,
+};
+
 int nouveau_dp_detect(struct nouveau_encoder *);
 
 struct nouveau_connector *
 nouveau_encoder_connector_get(struct nouveau_encoder *encoder);
 
+int nv50_mstm_detect(struct nv50_mstm *, u8 dpcd[8], int allow);
+void nv50_mstm_remove(struct nv50_mstm *);
+void nv50_mstm_service(struct nv50_mstm *);
 #endif /* __NOUVEAU_ENCODER_H__ */
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.c b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
index 9f5692726c16..442e25c17383 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.c
@@ -41,6 +41,7 @@
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_atomic.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_gem.h"
@@ -58,8 +59,8 @@ static void
 nouveau_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
-	struct nvif_device *device = &drm->device;
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -90,8 +91,8 @@ static void
 nouveau_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
-	struct nvif_device *device = &drm->device;
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -122,8 +123,8 @@ static void
 nouveau_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
-	struct nvif_device *device = &drm->device;
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
+	struct nvif_device *device = &drm->client.device;
 	int ret;
 
 	if (info->state != FBINFO_STATE_RUNNING)
@@ -154,7 +155,7 @@ static int
 nouveau_fbcon_sync(struct fb_info *info)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -181,7 +182,7 @@ static int
 nouveau_fbcon_open(struct fb_info *info, int user)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
 	int ret = pm_runtime_get_sync(drm->dev->dev);
 	if (ret < 0 && ret != -EACCES)
 		return ret;
@@ -192,42 +193,30 @@ static int
 nouveau_fbcon_release(struct fb_info *info, int user)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
 	pm_runtime_put(drm->dev->dev);
 	return 0;
 }
 
 static struct fb_ops nouveau_fbcon_ops = {
 	.owner = THIS_MODULE,
+	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open = nouveau_fbcon_open,
 	.fb_release = nouveau_fbcon_release,
-	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
 	.fb_fillrect = nouveau_fbcon_fillrect,
 	.fb_copyarea = nouveau_fbcon_copyarea,
 	.fb_imageblit = nouveau_fbcon_imageblit,
 	.fb_sync = nouveau_fbcon_sync,
-	.fb_pan_display = drm_fb_helper_pan_display,
-	.fb_blank = drm_fb_helper_blank,
-	.fb_setcmap = drm_fb_helper_setcmap,
-	.fb_debug_enter = drm_fb_helper_debug_enter,
-	.fb_debug_leave = drm_fb_helper_debug_leave,
 };
 
 static struct fb_ops nouveau_fbcon_sw_ops = {
 	.owner = THIS_MODULE,
+	DRM_FB_HELPER_DEFAULT_OPS,
 	.fb_open = nouveau_fbcon_open,
 	.fb_release = nouveau_fbcon_release,
-	.fb_check_var = drm_fb_helper_check_var,
-	.fb_set_par = drm_fb_helper_set_par,
 	.fb_fillrect = drm_fb_helper_cfb_fillrect,
 	.fb_copyarea = drm_fb_helper_cfb_copyarea,
 	.fb_imageblit = drm_fb_helper_cfb_imageblit,
-	.fb_pan_display = drm_fb_helper_pan_display,
-	.fb_blank = drm_fb_helper_blank,
-	.fb_setcmap = drm_fb_helper_setcmap,
-	.fb_debug_enter = drm_fb_helper_debug_enter,
-	.fb_debug_leave = drm_fb_helper_debug_leave,
 };
 
 void
@@ -277,10 +266,10 @@ nouveau_fbcon_accel_init(struct drm_device *dev)
 	struct fb_info *info = fbcon->helper.fbdev;
 	int ret;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		ret = nv04_fbcon_accel_init(info);
 	else
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_FERMI)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_FERMI)
 		ret = nv50_fbcon_accel_init(info);
 	else
 		ret = nvc0_fbcon_accel_init(info);
@@ -333,16 +322,15 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 {
 	struct nouveau_fbdev *fbcon =
 		container_of(helper, struct nouveau_fbdev, helper);
-	struct drm_device *dev = fbcon->dev;
+	struct drm_device *dev = fbcon->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct fb_info *info;
-	struct drm_framebuffer *fb;
-	struct nouveau_framebuffer *nouveau_fb;
+	struct nouveau_framebuffer *fb;
 	struct nouveau_channel *chan;
 	struct nouveau_bo *nvbo;
 	struct drm_mode_fb_cmd2 mode_cmd;
-	int size, ret;
+	int ret;
 
 	mode_cmd.width = sizes->surface_width;
 	mode_cmd.height = sizes->surface_height;
@@ -353,16 +341,18 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 	mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
 							  sizes->surface_depth);
 
-	size = mode_cmd.pitches[0] * mode_cmd.height;
-	size = roundup(size, PAGE_SIZE);
-
-	ret = nouveau_gem_new(dev, size, 0, NOUVEAU_GEM_DOMAIN_VRAM,
+	ret = nouveau_gem_new(&drm->client, mode_cmd.pitches[0] *
+			      mode_cmd.height, 0, NOUVEAU_GEM_DOMAIN_VRAM,
 			      0, 0x0000, &nvbo);
 	if (ret) {
 		NV_ERROR(drm, "failed to allocate framebuffer\n");
 		goto out;
 	}
 
+	ret = nouveau_framebuffer_new(dev, &mode_cmd, nvbo, &fb);
+	if (ret)
+		goto out_unref;
+
 	ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, false);
 	if (ret) {
 		NV_ERROR(drm, "failed to pin fb: %d\n", ret);
@@ -377,8 +367,7 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	chan = nouveau_nofbaccel ? NULL : drm->channel;
 	if (chan && device->info.family >= NV_DEVICE_INFO_V0_TESLA) {
-		ret = nouveau_bo_vma_add(nvbo, drm->client.vm,
-					&fbcon->nouveau_fb.vma);
+		ret = nouveau_bo_vma_add(nvbo, drm->client.vm, &fb->vma);
 		if (ret) {
 			NV_ERROR(drm, "failed to map fb into chan: %d\n", ret);
 			chan = NULL;
@@ -394,13 +383,8 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	info->par = fbcon;
 
-	nouveau_framebuffer_init(dev, &fbcon->nouveau_fb, &mode_cmd, nvbo);
-
-	nouveau_fb = &fbcon->nouveau_fb;
-	fb = &nouveau_fb->base;
-
 	/* setup helper */
-	fbcon->helper.fb = fb;
+	fbcon->helper.fb = &fb->base;
 
 	strcpy(info->fix.id, "nouveaufb");
 	if (!chan)
@@ -411,14 +395,15 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 			      FBINFO_HWACCEL_IMAGEBLIT;
 	info->flags |= FBINFO_CAN_FORCE_OUTPUT;
 	info->fbops = &nouveau_fbcon_sw_ops;
-	info->fix.smem_start = nvbo->bo.mem.bus.base +
-			       nvbo->bo.mem.bus.offset;
-	info->fix.smem_len = size;
+	info->fix.smem_start = fb->nvbo->bo.mem.bus.base +
+			       fb->nvbo->bo.mem.bus.offset;
+	info->fix.smem_len = fb->nvbo->bo.mem.num_pages << PAGE_SHIFT;
 
-	info->screen_base = nvbo_kmap_obj_iovirtual(nouveau_fb->nvbo);
-	info->screen_size = size;
+	info->screen_base = nvbo_kmap_obj_iovirtual(fb->nvbo);
+	info->screen_size = fb->nvbo->bo.mem.num_pages << PAGE_SHIFT;
 
-	drm_fb_helper_fill_fix(info, fb->pitches[0], fb->depth);
+	drm_fb_helper_fill_fix(info, fb->base.pitches[0],
+			       fb->base.format->depth);
 	drm_fb_helper_fill_var(info, &fbcon->helper, sizes->fb_width, sizes->fb_height);
 
 	/* Use default scratch pixmap (info->pixmap.flags = FB_PIXMAP_SYSTEM) */
@@ -429,20 +414,19 @@ nouveau_fbcon_create(struct drm_fb_helper *helper,
 
 	/* To allow resizeing without swapping buffers */
 	NV_INFO(drm, "allocated %dx%d fb: 0x%llx, bo %p\n",
-		nouveau_fb->base.width, nouveau_fb->base.height,
-		nvbo->bo.offset, nvbo);
+		fb->base.width, fb->base.height, fb->nvbo->bo.offset, nvbo);
 
 	vga_switcheroo_client_fb_set(dev->pdev, info);
 	return 0;
 
 out_unlock:
 	if (chan)
-		nouveau_bo_vma_del(nvbo, &fbcon->nouveau_fb.vma);
-	nouveau_bo_unmap(nvbo);
+		nouveau_bo_vma_del(fb->nvbo, &fb->vma);
+	nouveau_bo_unmap(fb->nvbo);
 out_unpin:
-	nouveau_bo_unpin(nvbo);
+	nouveau_bo_unpin(fb->nvbo);
 out_unref:
-	nouveau_bo_ref(NULL, &nvbo);
+	nouveau_bo_ref(NULL, &fb->nvbo);
 out:
 	return ret;
 }
@@ -458,28 +442,26 @@ nouveau_fbcon_output_poll_changed(struct drm_device *dev)
 static int
 nouveau_fbcon_destroy(struct drm_device *dev, struct nouveau_fbdev *fbcon)
 {
-	struct nouveau_framebuffer *nouveau_fb = &fbcon->nouveau_fb;
+	struct nouveau_framebuffer *nouveau_fb = nouveau_framebuffer(fbcon->helper.fb);
 
 	drm_fb_helper_unregister_fbi(&fbcon->helper);
 	drm_fb_helper_release_fbi(&fbcon->helper);
+	drm_fb_helper_fini(&fbcon->helper);
 
 	if (nouveau_fb->nvbo) {
-		nouveau_bo_unmap(nouveau_fb->nvbo);
 		nouveau_bo_vma_del(nouveau_fb->nvbo, &nouveau_fb->vma);
+		nouveau_bo_unmap(nouveau_fb->nvbo);
 		nouveau_bo_unpin(nouveau_fb->nvbo);
-		drm_gem_object_unreference_unlocked(&nouveau_fb->nvbo->gem);
-		nouveau_fb->nvbo = NULL;
+		drm_framebuffer_unreference(&nouveau_fb->base);
 	}
-	drm_fb_helper_fini(&fbcon->helper);
-	drm_framebuffer_unregister_private(&nouveau_fb->base);
-	drm_framebuffer_cleanup(&nouveau_fb->base);
+
 	return 0;
 }
 
 void nouveau_fbcon_gpu_lockup(struct fb_info *info)
 {
 	struct nouveau_fbdev *fbcon = info->par;
-	struct nouveau_drm *drm = nouveau_drm(fbcon->dev);
+	struct nouveau_drm *drm = nouveau_drm(fbcon->helper.dev);
 
 	NV_ERROR(drm, "GPU lockup - switching to software fbcon\n");
 	info->flags |= FBINFO_HWACCEL_DISABLED;
@@ -491,19 +473,43 @@ static const struct drm_fb_helper_funcs nouveau_fbcon_helper_funcs = {
 	.fb_probe = nouveau_fbcon_create,
 };
 
+static void
+nouveau_fbcon_set_suspend_work(struct work_struct *work)
+{
+	struct nouveau_drm *drm = container_of(work, typeof(*drm), fbcon_work);
+	int state = READ_ONCE(drm->fbcon_new_state);
+
+	if (state == FBINFO_STATE_RUNNING)
+		pm_runtime_get_sync(drm->dev->dev);
+
+	console_lock();
+	if (state == FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_restore(drm->dev);
+	drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
+	if (state != FBINFO_STATE_RUNNING)
+		nouveau_fbcon_accel_save_disable(drm->dev);
+	console_unlock();
+
+	if (state == FBINFO_STATE_RUNNING) {
+		pm_runtime_mark_last_busy(drm->dev->dev);
+		pm_runtime_put_sync(drm->dev->dev);
+	}
+}
+
 void
 nouveau_fbcon_set_suspend(struct drm_device *dev, int state)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	if (drm->fbcon) {
-		console_lock();
-		if (state == FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_restore(dev);
-		drm_fb_helper_set_suspend(&drm->fbcon->helper, state);
-		if (state != FBINFO_STATE_RUNNING)
-			nouveau_fbcon_accel_save_disable(dev);
-		console_unlock();
-	}
+
+	if (!drm->fbcon)
+		return;
+
+	drm->fbcon_new_state = state;
+	/* Since runtime resume can happen as a result of a sysfs operation,
+	 * it's possible we already have the console locked. So handle fbcon
+	 * init/deinit from a seperate work thread
+	 */
+	schedule_work(&drm->fbcon_work);
 }
 
 int
@@ -522,13 +528,12 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (!fbcon)
 		return -ENOMEM;
 
-	fbcon->dev = dev;
 	drm->fbcon = fbcon;
+	INIT_WORK(&drm->fbcon_work, nouveau_fbcon_set_suspend_work);
 
 	drm_fb_helper_prepare(dev, &fbcon->helper, &nouveau_fbcon_helper_funcs);
 
-	ret = drm_fb_helper_init(dev, &fbcon->helper,
-				 dev->mode_config.num_crtc, 4);
+	ret = drm_fb_helper_init(dev, &fbcon->helper, 4);
 	if (ret)
 		goto free;
 
@@ -536,16 +541,17 @@ nouveau_fbcon_init(struct drm_device *dev)
 	if (ret)
 		goto fini;
 
-	if (drm->device.info.ram_size <= 32 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 32 * 1024 * 1024)
 		preferred_bpp = 8;
 	else
-	if (drm->device.info.ram_size <= 64 * 1024 * 1024)
+	if (drm->client.device.info.ram_size <= 64 * 1024 * 1024)
 		preferred_bpp = 16;
 	else
 		preferred_bpp = 32;
 
 	/* disable all the possible outputs/crtcs before entering KMS mode */
-	drm_helper_disable_unused_functions(dev);
+	if (!drm_drv_uses_atomic_modeset(dev))
+		drm_helper_disable_unused_functions(dev);
 
 	ret = drm_fb_helper_initial_config(&fbcon->helper, preferred_bpp);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_fbcon.h b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
index ca77ad001978..e2bca729721e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fbcon.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fbcon.h
@@ -33,8 +33,6 @@
 
 struct nouveau_fbdev {
 	struct drm_fb_helper helper;
-	struct nouveau_framebuffer nouveau_fb;
-	struct drm_device *dev;
 	unsigned int saved_flags;
 	struct nvif_object surf2d;
 	struct nvif_object clip;
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.c b/drivers/gpu/drm/nouveau/nouveau_fence.c
index 4bb9ab892ae1..99e14e3e0fe4 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.c
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.c
@@ -28,7 +28,7 @@
 
 #include <linux/ktime.h>
 #include <linux/hrtimer.h>
-#include <trace/events/fence.h>
+#include <trace/events/dma_fence.h>
 
 #include <nvif/cl826e.h>
 #include <nvif/notify.h>
@@ -38,11 +38,11 @@
 #include "nouveau_dma.h"
 #include "nouveau_fence.h"
 
-static const struct fence_ops nouveau_fence_ops_uevent;
-static const struct fence_ops nouveau_fence_ops_legacy;
+static const struct dma_fence_ops nouveau_fence_ops_uevent;
+static const struct dma_fence_ops nouveau_fence_ops_legacy;
 
 static inline struct nouveau_fence *
-from_fence(struct fence *fence)
+from_fence(struct dma_fence *fence)
 {
 	return container_of(fence, struct nouveau_fence, base);
 }
@@ -58,23 +58,23 @@ nouveau_fence_signal(struct nouveau_fence *fence)
 {
 	int drop = 0;
 
-	fence_signal_locked(&fence->base);
+	dma_fence_signal_locked(&fence->base);
 	list_del(&fence->head);
 	rcu_assign_pointer(fence->channel, NULL);
 
-	if (test_bit(FENCE_FLAG_USER_BITS, &fence->base.flags)) {
+	if (test_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags)) {
 		struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
 		if (!--fctx->notify_ref)
 			drop = 1;
 	}
 
-	fence_put(&fence->base);
+	dma_fence_put(&fence->base);
 	return drop;
 }
 
 static struct nouveau_fence *
-nouveau_local_fence(struct fence *fence, struct nouveau_drm *drm) {
+nouveau_local_fence(struct dma_fence *fence, struct nouveau_drm *drm) {
 	struct nouveau_fence_priv *priv = (void*)drm->fence;
 
 	if (fence->ops != &nouveau_fence_ops_legacy &&
@@ -190,7 +190,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 		return;
 
 	ret = nvif_notify_init(&chan->user, nouveau_fence_wait_uevent_handler,
-			       false, G82_CHANNEL_DMA_V0_NTFY_UEVENT,
+			       false, NV826E_V0_NTFY_NON_STALL_INTERRUPT,
 			       &(struct nvif_notify_uevent_req) { },
 			       sizeof(struct nvif_notify_uevent_req),
 			       sizeof(struct nvif_notify_uevent_rep),
@@ -201,7 +201,7 @@ nouveau_fence_context_new(struct nouveau_channel *chan, struct nouveau_fence_cha
 
 struct nouveau_fence_work {
 	struct work_struct work;
-	struct fence_cb cb;
+	struct dma_fence_cb cb;
 	void (*func)(void *);
 	void *data;
 };
@@ -214,7 +214,7 @@ nouveau_fence_work_handler(struct work_struct *kwork)
 	kfree(work);
 }
 
-static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
+static void nouveau_fence_work_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 {
 	struct nouveau_fence_work *work = container_of(cb, typeof(*work), cb);
 
@@ -222,12 +222,12 @@ static void nouveau_fence_work_cb(struct fence *fence, struct fence_cb *cb)
 }
 
 void
-nouveau_fence_work(struct fence *fence,
+nouveau_fence_work(struct dma_fence *fence,
 		   void (*func)(void *), void *data)
 {
 	struct nouveau_fence_work *work;
 
-	if (fence_is_signaled(fence))
+	if (dma_fence_is_signaled(fence))
 		goto err;
 
 	work = kmalloc(sizeof(*work), GFP_KERNEL);
@@ -245,7 +245,7 @@ nouveau_fence_work(struct fence *fence,
 	work->func = func;
 	work->data = data;
 
-	if (fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
+	if (dma_fence_add_callback(fence, &work->cb, nouveau_fence_work_cb) < 0)
 		goto err_free;
 	return;
 
@@ -266,17 +266,17 @@ nouveau_fence_emit(struct nouveau_fence *fence, struct nouveau_channel *chan)
 	fence->timeout  = jiffies + (15 * HZ);
 
 	if (priv->uevent)
-		fence_init(&fence->base, &nouveau_fence_ops_uevent,
-			   &fctx->lock, fctx->context, ++fctx->sequence);
+		dma_fence_init(&fence->base, &nouveau_fence_ops_uevent,
+			       &fctx->lock, fctx->context, ++fctx->sequence);
 	else
-		fence_init(&fence->base, &nouveau_fence_ops_legacy,
-			   &fctx->lock, fctx->context, ++fctx->sequence);
+		dma_fence_init(&fence->base, &nouveau_fence_ops_legacy,
+			       &fctx->lock, fctx->context, ++fctx->sequence);
 	kref_get(&fctx->fence_ref);
 
-	trace_fence_emit(&fence->base);
+	trace_dma_fence_emit(&fence->base);
 	ret = fctx->emit(fence);
 	if (!ret) {
-		fence_get(&fence->base);
+		dma_fence_get(&fence->base);
 		spin_lock_irq(&fctx->lock);
 
 		if (nouveau_fence_update(chan, fctx))
@@ -298,7 +298,7 @@ nouveau_fence_done(struct nouveau_fence *fence)
 		struct nouveau_channel *chan;
 		unsigned long flags;
 
-		if (test_bit(FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
+		if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->base.flags))
 			return true;
 
 		spin_lock_irqsave(&fctx->lock, flags);
@@ -307,11 +307,11 @@ nouveau_fence_done(struct nouveau_fence *fence)
 			nvif_notify_put(&fctx->notify);
 		spin_unlock_irqrestore(&fctx->lock, flags);
 	}
-	return fence_is_signaled(&fence->base);
+	return dma_fence_is_signaled(&fence->base);
 }
 
 static long
-nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
+nouveau_fence_wait_legacy(struct dma_fence *f, bool intr, long wait)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	unsigned long sleep_time = NSEC_PER_MSEC / 1000;
@@ -330,7 +330,7 @@ nouveau_fence_wait_legacy(struct fence *f, bool intr, long wait)
 		__set_current_state(intr ? TASK_INTERRUPTIBLE :
 					   TASK_UNINTERRUPTIBLE);
 
-		kt = ktime_set(0, sleep_time);
+		kt = sleep_time;
 		schedule_hrtimeout(&kt, HRTIMER_MODE_REL);
 		sleep_time *= 2;
 		if (sleep_time > NSEC_PER_MSEC)
@@ -378,7 +378,7 @@ nouveau_fence_wait(struct nouveau_fence *fence, bool lazy, bool intr)
 	if (!lazy)
 		return nouveau_fence_wait_busy(fence, intr);
 
-	ret = fence_wait_timeout(&fence->base, intr, 15 * HZ);
+	ret = dma_fence_wait_timeout(&fence->base, intr, 15 * HZ);
 	if (ret < 0)
 		return ret;
 	else if (!ret)
@@ -391,7 +391,7 @@ int
 nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool exclusive, bool intr)
 {
 	struct nouveau_fence_chan *fctx = chan->fence;
-	struct fence *fence;
+	struct dma_fence *fence;
 	struct reservation_object *resv = nvbo->bo.resv;
 	struct reservation_object_list *fobj;
 	struct nouveau_fence *f;
@@ -421,7 +421,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
 		}
 
 		if (must_wait)
-			ret = fence_wait(fence, intr);
+			ret = dma_fence_wait(fence, intr);
 
 		return ret;
 	}
@@ -446,7 +446,7 @@ nouveau_fence_sync(struct nouveau_bo *nvbo, struct nouveau_channel *chan, bool e
 		}
 
 		if (must_wait)
-			ret = fence_wait(fence, intr);
+			ret = dma_fence_wait(fence, intr);
 	}
 
 	return ret;
@@ -456,7 +456,7 @@ void
 nouveau_fence_unref(struct nouveau_fence **pfence)
 {
 	if (*pfence)
-		fence_put(&(*pfence)->base);
+		dma_fence_put(&(*pfence)->base);
 	*pfence = NULL;
 }
 
@@ -484,12 +484,12 @@ nouveau_fence_new(struct nouveau_channel *chan, bool sysmem,
 	return ret;
 }
 
-static const char *nouveau_fence_get_get_driver_name(struct fence *fence)
+static const char *nouveau_fence_get_get_driver_name(struct dma_fence *fence)
 {
 	return "nouveau";
 }
 
-static const char *nouveau_fence_get_timeline_name(struct fence *f)
+static const char *nouveau_fence_get_timeline_name(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -503,7 +503,7 @@ static const char *nouveau_fence_get_timeline_name(struct fence *f)
  * result. The drm node should still be there, so we can derive the index from
  * the fence context.
  */
-static bool nouveau_fence_is_signaled(struct fence *f)
+static bool nouveau_fence_is_signaled(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -519,7 +519,7 @@ static bool nouveau_fence_is_signaled(struct fence *f)
 	return ret;
 }
 
-static bool nouveau_fence_no_signaling(struct fence *f)
+static bool nouveau_fence_no_signaling(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 
@@ -527,33 +527,33 @@ static bool nouveau_fence_no_signaling(struct fence *f)
 	 * caller should have a reference on the fence,
 	 * else fence could get freed here
 	 */
-	WARN_ON(atomic_read(&fence->base.refcount.refcount) <= 1);
+	WARN_ON(kref_read(&fence->base.refcount) <= 1);
 
 	/*
-	 * This needs uevents to work correctly, but fence_add_callback relies on
+	 * This needs uevents to work correctly, but dma_fence_add_callback relies on
 	 * being able to enable signaling. It will still get signaled eventually,
 	 * just not right away.
 	 */
 	if (nouveau_fence_is_signaled(f)) {
 		list_del(&fence->head);
 
-		fence_put(&fence->base);
+		dma_fence_put(&fence->base);
 		return false;
 	}
 
 	return true;
 }
 
-static void nouveau_fence_release(struct fence *f)
+static void nouveau_fence_release(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
 
 	kref_put(&fctx->fence_ref, nouveau_fence_context_put);
-	fence_free(&fence->base);
+	dma_fence_free(&fence->base);
 }
 
-static const struct fence_ops nouveau_fence_ops_legacy = {
+static const struct dma_fence_ops nouveau_fence_ops_legacy = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_no_signaling,
@@ -562,7 +562,7 @@ static const struct fence_ops nouveau_fence_ops_legacy = {
 	.release = nouveau_fence_release
 };
 
-static bool nouveau_fence_enable_signaling(struct fence *f)
+static bool nouveau_fence_enable_signaling(struct dma_fence *f)
 {
 	struct nouveau_fence *fence = from_fence(f);
 	struct nouveau_fence_chan *fctx = nouveau_fctx(fence);
@@ -573,18 +573,18 @@ static bool nouveau_fence_enable_signaling(struct fence *f)
 
 	ret = nouveau_fence_no_signaling(f);
 	if (ret)
-		set_bit(FENCE_FLAG_USER_BITS, &fence->base.flags);
+		set_bit(DMA_FENCE_FLAG_USER_BITS, &fence->base.flags);
 	else if (!--fctx->notify_ref)
 		nvif_notify_put(&fctx->notify);
 
 	return ret;
 }
 
-static const struct fence_ops nouveau_fence_ops_uevent = {
+static const struct dma_fence_ops nouveau_fence_ops_uevent = {
 	.get_driver_name = nouveau_fence_get_get_driver_name,
 	.get_timeline_name = nouveau_fence_get_timeline_name,
 	.enable_signaling = nouveau_fence_enable_signaling,
 	.signaled = nouveau_fence_is_signaled,
-	.wait = fence_default_wait,
-	.release = NULL
+	.wait = dma_fence_default_wait,
+	.release = nouveau_fence_release
 };
diff --git a/drivers/gpu/drm/nouveau/nouveau_fence.h b/drivers/gpu/drm/nouveau/nouveau_fence.h
index 64c4ce7115ad..d5e58a38f160 100644
--- a/drivers/gpu/drm/nouveau/nouveau_fence.h
+++ b/drivers/gpu/drm/nouveau/nouveau_fence.h
@@ -1,14 +1,14 @@
 #ifndef __NOUVEAU_FENCE_H__
 #define __NOUVEAU_FENCE_H__
 
-#include <linux/fence.h>
+#include <linux/dma-fence.h>
 #include <nvif/notify.h>
 
 struct nouveau_drm;
 struct nouveau_bo;
 
 struct nouveau_fence {
-	struct fence base;
+	struct dma_fence base;
 
 	struct list_head head;
 
@@ -24,7 +24,7 @@ void nouveau_fence_unref(struct nouveau_fence **);
 
 int  nouveau_fence_emit(struct nouveau_fence *, struct nouveau_channel *);
 bool nouveau_fence_done(struct nouveau_fence *);
-void nouveau_fence_work(struct fence *, void (*)(void *), void *);
+void nouveau_fence_work(struct dma_fence *, void (*)(void *), void *);
 int  nouveau_fence_wait(struct nouveau_fence *, bool lazy, bool intr);
 int  nouveau_fence_sync(struct nouveau_bo *, struct nouveau_channel *, bool exclusive, bool intr);
 
@@ -92,7 +92,6 @@ struct nv84_fence_chan {
 	struct nouveau_fence_chan base;
 	struct nvkm_vma vma;
 	struct nvkm_vma vma_gart;
-	struct nvkm_vma dispc_vma[4];
 };
 
 struct nv84_fence_priv {
@@ -100,9 +99,9 @@ struct nv84_fence_priv {
 	struct nouveau_bo *bo;
 	struct nouveau_bo *bo_gart;
 	u32 *suspend;
+	struct mutex mutex;
 };
 
-u64  nv84_fence_crtc(struct nouveau_channel *, int);
 int  nv84_fence_context_new(struct nouveau_channel *);
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.c b/drivers/gpu/drm/nouveau/nouveau_gem.c
index 72e2399bce39..ca5397beb357 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@@ -119,7 +119,7 @@ nouveau_gem_object_unmap(struct nouveau_bo *nvbo, struct nvkm_vma *vma)
 	const bool mapped = nvbo->bo.mem.mem_type != TTM_PL_SYSTEM;
 	struct reservation_object *resv = nvbo->bo.resv;
 	struct reservation_object_list *fobj;
-	struct fence *fence = NULL;
+	struct dma_fence *fence = NULL;
 
 	fobj = reservation_object_get_list(resv);
 
@@ -175,11 +175,11 @@ nouveau_gem_object_close(struct drm_gem_object *gem, struct drm_file *file_priv)
 }
 
 int
-nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
+nouveau_gem_new(struct nouveau_cli *cli, u64 size, int align, uint32_t domain,
 		uint32_t tile_mode, uint32_t tile_flags,
 		struct nouveau_bo **pnvbo)
 {
-	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nouveau_drm *drm = nouveau_drm(cli->dev);
 	struct nouveau_bo *nvbo;
 	u32 flags = 0;
 	int ret;
@@ -194,7 +194,7 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	if (domain & NOUVEAU_GEM_DOMAIN_COHERENT)
 		flags |= TTM_PL_FLAG_UNCACHED;
 
-	ret = nouveau_bo_new(dev, size, align, flags, tile_mode,
+	ret = nouveau_bo_new(cli, size, align, flags, tile_mode,
 			     tile_flags, NULL, NULL, pnvbo);
 	if (ret)
 		return ret;
@@ -206,12 +206,12 @@ nouveau_gem_new(struct drm_device *dev, int size, int align, uint32_t domain,
 	 */
 	nvbo->valid_domains = NOUVEAU_GEM_DOMAIN_VRAM |
 			      NOUVEAU_GEM_DOMAIN_GART;
-	if (drm->device.info.family >= NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family >= NV_DEVICE_INFO_V0_TESLA)
 		nvbo->valid_domains &= domain;
 
 	/* Initialize the embedded gem-object. We return a single gem-reference
 	 * to the caller, instead of a normal nouveau_bo ttm reference. */
-	ret = drm_gem_object_init(dev, &nvbo->gem, nvbo->bo.mem.size);
+	ret = drm_gem_object_init(drm->dev, &nvbo->gem, nvbo->bo.mem.size);
 	if (ret) {
 		nouveau_bo_ref(NULL, pnvbo);
 		return -ENOMEM;
@@ -257,7 +257,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	struct drm_nouveau_gem_new *req = data;
 	struct nouveau_bo *nvbo = NULL;
 	int ret = 0;
@@ -267,7 +267,7 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}
 
-	ret = nouveau_gem_new(dev, req->info.size, req->align,
+	ret = nouveau_gem_new(cli, req->info.size, req->align,
 			      req->info.domain, req->info.tile_mode,
 			      req->info.tile_flags, &nvbo);
 	if (ret)
@@ -369,7 +369,7 @@ validate_init(struct nouveau_channel *chan, struct drm_file *file_priv,
 {
 	struct nouveau_cli *cli = nouveau_cli(file_priv);
 	int trycnt = 0;
-	int ret, i;
+	int ret = -EINVAL, i;
 	struct nouveau_bo *res_bo = NULL;
 	LIST_HEAD(gart_list);
 	LIST_HEAD(vram_list);
@@ -496,7 +496,7 @@ validate_list(struct nouveau_channel *chan, struct nouveau_cli *cli,
 			return ret;
 		}
 
-		if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA) {
+		if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA) {
 			if (nvbo->bo.offset == b->presumed.offset &&
 			    ((nvbo->bo.mem.mem_type == TTM_PL_VRAM &&
 			      b->presumed.domain & NOUVEAU_GEM_DOMAIN_VRAM) ||
@@ -767,7 +767,7 @@ nouveau_gem_ioctl_pushbuf(struct drm_device *dev, void *data,
 				      push[i].length);
 		}
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		ret = RING_SPACE(chan, req->nr_push * 2);
 		if (ret) {
 			NV_PRINTK(err, cli, "cal_space: %d\n", ret);
@@ -840,7 +840,7 @@ out_next:
 		req->suffix0 = 0x00000000;
 		req->suffix1 = 0x00000000;
 	} else
-	if (drm->device.info.chipset >= 0x25) {
+	if (drm->client.device.info.chipset >= 0x25) {
 		req->suffix0 = 0x00020000;
 		req->suffix1 = 0x00000000;
 	} else {
@@ -861,6 +861,7 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 	struct nouveau_bo *nvbo;
 	bool no_wait = !!(req->flags & NOUVEAU_GEM_CPU_PREP_NOWAIT);
 	bool write = !!(req->flags & NOUVEAU_GEM_CPU_PREP_WRITE);
+	long lret;
 	int ret;
 
 	gem = drm_gem_object_lookup(file_priv, req->handle);
@@ -868,19 +869,15 @@ nouveau_gem_ioctl_cpu_prep(struct drm_device *dev, void *data,
 		return -ENOENT;
 	nvbo = nouveau_gem_object(gem);
 
-	if (no_wait)
-		ret = reservation_object_test_signaled_rcu(nvbo->bo.resv, write) ? 0 : -EBUSY;
-	else {
-		long lret;
+	lret = reservation_object_wait_timeout_rcu(nvbo->bo.resv, write, true,
+						   no_wait ? 0 : 30 * HZ);
+	if (!lret)
+		ret = -EBUSY;
+	else if (lret > 0)
+		ret = 0;
+	else
+		ret = lret;
 
-		lret = reservation_object_wait_timeout_rcu(nvbo->bo.resv, write, true, 30 * HZ);
-		if (!lret)
-			ret = -EBUSY;
-		else if (lret > 0)
-			ret = 0;
-		else
-			ret = lret;
-	}
 	nouveau_bo_sync_for_cpu(nvbo);
 	drm_gem_object_unreference_unlocked(gem);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_gem.h b/drivers/gpu/drm/nouveau/nouveau_gem.h
index 7e32da2e037a..8fa6ed9ddd3a 100644
--- a/drivers/gpu/drm/nouveau/nouveau_gem.h
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.h
@@ -16,7 +16,7 @@ nouveau_gem_object(struct drm_gem_object *gem)
 }
 
 /* nouveau_gem.c */
-extern int nouveau_gem_new(struct drm_device *, int size, int align,
+extern int nouveau_gem_new(struct nouveau_cli *, u64 size, int align,
 			   uint32_t domain, uint32_t tile_mode,
 			   uint32_t tile_flags, struct nouveau_bo **);
 extern void nouveau_gem_object_del(struct drm_gem_object *);
diff --git a/drivers/gpu/drm/nouveau/nouveau_hwmon.c b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
index 71f764bf4cc6..23b1670c1c2f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_hwmon.c
+++ b/drivers/gpu/drm/nouveau/nouveau_hwmon.c
@@ -43,7 +43,7 @@ nouveau_hwmon_show_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int temp = nvkm_therm_temp_get(therm);
 
 	if (temp < 0)
@@ -69,7 +69,7 @@ nouveau_hwmon_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	      therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST) * 1000);
@@ -81,7 +81,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -102,7 +102,7 @@ nouveau_hwmon_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	 therm->attr_get(therm, NVKM_THERM_ATTR_THRS_FAN_BOOST_HYST) * 1000);
@@ -114,7 +114,7 @@ nouveau_hwmon_set_temp1_auto_point1_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -134,7 +134,7 @@ nouveau_hwmon_max_temp(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK) * 1000);
@@ -145,7 +145,7 @@ nouveau_hwmon_set_max_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -165,7 +165,7 @@ nouveau_hwmon_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_DOWN_CLK_HYST) * 1000);
@@ -176,7 +176,7 @@ nouveau_hwmon_set_max_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -197,7 +197,7 @@ nouveau_hwmon_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL) * 1000);
@@ -209,7 +209,7 @@ nouveau_hwmon_set_critical_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -230,7 +230,7 @@ nouveau_hwmon_critical_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_CRITICAL_HYST) * 1000);
@@ -243,7 +243,7 @@ nouveau_hwmon_set_critical_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -263,7 +263,7 @@ nouveau_hwmon_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	       therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN) * 1000);
@@ -275,7 +275,7 @@ nouveau_hwmon_set_emergency_temp(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -296,7 +296,7 @@ nouveau_hwmon_emergency_temp_hyst(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n",
 	  therm->attr_get(therm, NVKM_THERM_ATTR_THRS_SHUTDOWN_HYST) * 1000);
@@ -309,7 +309,7 @@ nouveau_hwmon_set_emergency_temp_hyst(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 
 	if (kstrtol(buf, 10, &value) == -EINVAL)
@@ -349,7 +349,7 @@ nouveau_hwmon_show_fan1_input(struct device *d, struct device_attribute *attr,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 
 	return snprintf(buf, PAGE_SIZE, "%d\n", nvkm_therm_fan_sense(therm));
 }
@@ -362,7 +362,7 @@ nouveau_hwmon_get_pwm1_enable(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MODE);
@@ -378,7 +378,7 @@ nouveau_hwmon_set_pwm1_enable(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -401,7 +401,7 @@ nouveau_hwmon_get_pwm1(struct device *d, struct device_attribute *a, char *buf)
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->fan_get(therm);
@@ -417,7 +417,7 @@ nouveau_hwmon_set_pwm1(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret = -ENODEV;
 	long value;
 
@@ -441,7 +441,7 @@ nouveau_hwmon_get_pwm1_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MIN_DUTY);
@@ -457,7 +457,7 @@ nouveau_hwmon_set_pwm1_min(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -481,7 +481,7 @@ nouveau_hwmon_get_pwm1_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	int ret;
 
 	ret = therm->attr_get(therm, NVKM_THERM_ATTR_FAN_MAX_DUTY);
@@ -497,7 +497,7 @@ nouveau_hwmon_set_pwm1_max(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
 	long value;
 	int ret;
 
@@ -521,7 +521,7 @@ nouveau_hwmon_get_in0_input(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 	int ret;
 
 	ret = nvkm_volt_get(volt);
@@ -540,7 +540,7 @@ nouveau_hwmon_get_in0_min(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->min_uv)
 		return -ENODEV;
@@ -557,7 +557,7 @@ nouveau_hwmon_get_in0_max(struct device *d,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
 
 	if (!volt || !volt->max_uv)
 		return -ENODEV;
@@ -584,7 +584,7 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 {
 	struct drm_device *dev = dev_get_drvdata(d);
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	int result = nvkm_iccsense_read_all(iccsense);
 
 	if (result < 0)
@@ -596,6 +596,32 @@ nouveau_hwmon_get_power1_input(struct device *d, struct device_attribute *a,
 static SENSOR_DEVICE_ATTR(power1_input, S_IRUGO,
 			  nouveau_hwmon_get_power1_input, NULL, 0);
 
+static ssize_t
+nouveau_hwmon_get_power1_max(struct device *d, struct device_attribute *a,
+			     char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_max);
+}
+
+static SENSOR_DEVICE_ATTR(power1_max, S_IRUGO,
+			  nouveau_hwmon_get_power1_max, NULL, 0);
+
+static ssize_t
+nouveau_hwmon_get_power1_crit(struct device *d, struct device_attribute *a,
+			      char *buf)
+{
+	struct drm_device *dev = dev_get_drvdata(d);
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
+	return sprintf(buf, "%i\n", iccsense->power_w_crit);
+}
+
+static SENSOR_DEVICE_ATTR(power1_crit, S_IRUGO,
+			  nouveau_hwmon_get_power1_crit, NULL, 0);
+
 static struct attribute *hwmon_default_attributes[] = {
 	&sensor_dev_attr_name.dev_attr.attr,
 	&sensor_dev_attr_update_rate.dev_attr.attr,
@@ -639,6 +665,12 @@ static struct attribute *hwmon_power_attributes[] = {
 	NULL
 };
 
+static struct attribute *hwmon_power_caps_attributes[] = {
+	&sensor_dev_attr_power1_max.dev_attr.attr,
+	&sensor_dev_attr_power1_crit.dev_attr.attr,
+	NULL
+};
+
 static const struct attribute_group hwmon_default_attrgroup = {
 	.attrs = hwmon_default_attributes,
 };
@@ -657,6 +689,9 @@ static const struct attribute_group hwmon_in0_attrgroup = {
 static const struct attribute_group hwmon_power_attrgroup = {
 	.attrs = hwmon_power_attributes,
 };
+static const struct attribute_group hwmon_power_caps_attrgroup = {
+	.attrs = hwmon_power_caps_attributes,
+};
 #endif
 
 int
@@ -664,9 +699,9 @@ nouveau_hwmon_init(struct drm_device *dev)
 {
 #if defined(CONFIG_HWMON) || (defined(MODULE) && defined(CONFIG_HWMON_MODULE))
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvkm_therm *therm = nvxx_therm(&drm->device);
-	struct nvkm_volt *volt = nvxx_volt(&drm->device);
-	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->device);
+	struct nvkm_therm *therm = nvxx_therm(&drm->client.device);
+	struct nvkm_volt *volt = nvxx_volt(&drm->client.device);
+	struct nvkm_iccsense *iccsense = nvxx_iccsense(&drm->client.device);
 	struct nouveau_hwmon *hwmon;
 	struct device *hwmon_dev;
 	int ret = 0;
@@ -728,8 +763,16 @@ nouveau_hwmon_init(struct drm_device *dev)
 	if (iccsense && iccsense->data_valid && !list_empty(&iccsense->rails)) {
 		ret = sysfs_create_group(&hwmon_dev->kobj,
 					 &hwmon_power_attrgroup);
+
 		if (ret)
 			goto error;
+
+		if (iccsense->power_w_max && iccsense->power_w_crit) {
+			ret = sysfs_create_group(&hwmon_dev->kobj,
+						 &hwmon_power_caps_attrgroup);
+			if (ret)
+				goto error;
+		}
 	}
 
 	hwmon->hwmon = hwmon_dev;
@@ -759,6 +802,7 @@ nouveau_hwmon_fini(struct drm_device *dev)
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_fan_rpm_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_in0_attrgroup);
 		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_attrgroup);
+		sysfs_remove_group(&hwmon->hwmon->kobj, &hwmon_power_caps_attrgroup);
 
 		hwmon_device_unregister(hwmon->hwmon);
 	}
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.c b/drivers/gpu/drm/nouveau/nouveau_led.c
new file mode 100644
index 000000000000..2c5e0628da12
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_led.c
@@ -0,0 +1,140 @@
+/*
+ * Copyright (C) 2016 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+/*
+ * Authors:
+ *  Martin Peres <martin.peres@free.fr>
+ */
+
+#include <linux/leds.h>
+
+#include "nouveau_led.h"
+#include <nvkm/subdev/gpio.h>
+
+static enum led_brightness
+nouveau_led_get_brightness(struct led_classdev *led)
+{
+	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
+	struct nouveau_drm *drm = nouveau_drm(drm_dev);
+	struct nvif_object *device = &drm->client.device.object;
+	u32 div, duty;
+
+	div =  nvif_rd32(device, 0x61c880) & 0x00ffffff;
+	duty = nvif_rd32(device, 0x61c884) & 0x00ffffff;
+
+	if (div > 0)
+		return duty * LED_FULL / div;
+	else
+		return 0;
+}
+
+static void
+nouveau_led_set_brightness(struct led_classdev *led, enum led_brightness value)
+{
+	struct drm_device *drm_dev = container_of(led, struct nouveau_led, led)->dev;
+	struct nouveau_drm *drm = nouveau_drm(drm_dev);
+	struct nvif_object *device = &drm->client.device.object;
+
+	u32 input_clk = 27e6; /* PDISPLAY.SOR[1].PWM is connected to the crystal */
+	u32 freq = 100; /* this is what nvidia uses and it should be good-enough */
+	u32 div, duty;
+
+	div = input_clk / freq;
+	duty = value * div / LED_FULL;
+
+	/* for now, this is safe to directly poke those registers because:
+	 *  - A: nvidia never puts the logo led to any other PWM controler
+	 *       than PDISPLAY.SOR[1].PWM.
+	 *  - B: nouveau does not touch these registers anywhere else
+	 */
+	nvif_wr32(device, 0x61c880, div);
+	nvif_wr32(device, 0x61c884, 0xc0000000 | duty);
+}
+
+
+int
+nouveau_led_init(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nvkm_gpio *gpio = nvxx_gpio(&drm->client.device);
+	struct dcb_gpio_func logo_led;
+	int ret;
+
+	if (!gpio)
+		return 0;
+
+	/* check that there is a GPIO controlling the logo LED */
+	if (nvkm_gpio_find(gpio, 0, DCB_GPIO_LOGO_LED_PWM, 0xff, &logo_led))
+		return 0;
+
+	drm->led = kzalloc(sizeof(*drm->led), GFP_KERNEL);
+	if (!drm->led)
+		return -ENOMEM;
+	drm->led->dev = dev;
+
+	drm->led->led.name = "nvidia-logo";
+	drm->led->led.max_brightness = 255;
+	drm->led->led.brightness_get = nouveau_led_get_brightness;
+	drm->led->led.brightness_set = nouveau_led_set_brightness;
+
+	ret = led_classdev_register(dev->dev, &drm->led->led);
+	if (ret) {
+		kfree(drm->led);
+		drm->led = NULL;
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+nouveau_led_suspend(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+
+	if (drm->led)
+		led_classdev_suspend(&drm->led->led);
+}
+
+void
+nouveau_led_resume(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+
+	if (drm->led)
+		led_classdev_resume(&drm->led->led);
+}
+
+void
+nouveau_led_fini(struct drm_device *dev)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+
+	if (drm->led) {
+		led_classdev_unregister(&drm->led->led);
+		kfree(drm->led);
+		drm->led = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nouveau_led.h b/drivers/gpu/drm/nouveau/nouveau_led.h
new file mode 100644
index 000000000000..21a5775028cc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nouveau_led.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Martin Peres
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Martin Peres <martin.peres@free.fr>
+ */
+
+#ifndef __NOUVEAU_LED_H__
+#define __NOUVEAU_LED_H__
+
+#include "nouveau_drv.h"
+
+struct led_classdev;
+
+struct nouveau_led {
+	struct drm_device *dev;
+
+	struct led_classdev led;
+};
+
+static inline struct nouveau_led *
+nouveau_led(struct drm_device *dev)
+{
+	return nouveau_drm(dev)->led;
+}
+
+/* nouveau_led.c */
+#if IS_REACHABLE(CONFIG_LEDS_CLASS)
+int  nouveau_led_init(struct drm_device *dev);
+void nouveau_led_suspend(struct drm_device *dev);
+void nouveau_led_resume(struct drm_device *dev);
+void nouveau_led_fini(struct drm_device *dev);
+#else
+static inline int  nouveau_led_init(struct drm_device *dev) { return 0; };
+static inline void nouveau_led_suspend(struct drm_device *dev) { };
+static inline void nouveau_led_resume(struct drm_device *dev) { };
+static inline void nouveau_led_fini(struct drm_device *dev) { };
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nouveau_nvif.c b/drivers/gpu/drm/nouveau/nouveau_nvif.c
index 15f0925ea13b..b3f29b1ce9ea 100644
--- a/drivers/gpu/drm/nouveau/nouveau_nvif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_nvif.c
@@ -60,20 +60,15 @@ nvkm_client_ioctl(void *priv, bool super, void *data, u32 size, void **hack)
 static int
 nvkm_client_resume(void *priv)
 {
-	return nvkm_client_init(priv);
+	struct nvkm_client *client = priv;
+	return nvkm_object_init(&client->object);
 }
 
 static int
 nvkm_client_suspend(void *priv)
 {
-	return nvkm_client_fini(priv, true);
-}
-
-static void
-nvkm_client_driver_fini(void *priv)
-{
 	struct nvkm_client *client = priv;
-	nvkm_client_del(&client);
+	return nvkm_object_fini(&client->object, true);
 }
 
 static int
@@ -108,23 +103,14 @@ static int
 nvkm_client_driver_init(const char *name, u64 device, const char *cfg,
 			const char *dbg, void **ppriv)
 {
-	struct nvkm_client *client;
-	int ret;
-
-	ret = nvkm_client_new(name, device, cfg, dbg, &client);
-	*ppriv = client;
-	if (ret)
-		return ret;
-
-	client->ntfy = nvkm_client_ntfy;
-	return 0;
+	return nvkm_client_new(name, device, cfg, dbg, nvkm_client_ntfy,
+			       (struct nvkm_client **)ppriv);
 }
 
 const struct nvif_driver
 nvif_driver_nvkm = {
 	.name = "nvkm",
 	.init = nvkm_client_driver_init,
-	.fini = nvkm_client_driver_fini,
 	.suspend = nvkm_client_suspend,
 	.resume = nvkm_client_resume,
 	.ioctl = nvkm_client_ioctl,
diff --git a/drivers/gpu/drm/nouveau/nouveau_prime.c b/drivers/gpu/drm/nouveau/nouveau_prime.c
index a0a9704cfe2b..1fefc93af1d7 100644
--- a/drivers/gpu/drm/nouveau/nouveau_prime.c
+++ b/drivers/gpu/drm/nouveau/nouveau_prime.c
@@ -60,6 +60,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 							 struct dma_buf_attachment *attach,
 							 struct sg_table *sg)
 {
+	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_bo *nvbo;
 	struct reservation_object *robj = attach->dmabuf->resv;
 	u32 flags = 0;
@@ -68,7 +69,7 @@ struct drm_gem_object *nouveau_gem_prime_import_sg_table(struct drm_device *dev,
 	flags = TTM_PL_FLAG_TT;
 
 	ww_mutex_lock(&robj->lock, NULL);
-	ret = nouveau_bo_new(dev, attach->dmabuf->size, 0, flags, 0, 0,
+	ret = nouveau_bo_new(&drm->client, attach->dmabuf->size, 0, flags, 0, 0,
 			     sg, robj, &nvbo);
 	ww_mutex_unlock(&robj->lock);
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index db35ab5883ac..b7ab268f7d6f 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -24,10 +24,10 @@ nouveau_sgdma_destroy(struct ttm_tt *ttm)
 }
 
 static int
-nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	if (ttm->sg) {
 		node->sg    = ttm->sg;
@@ -36,7 +36,7 @@ nv04_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 
 	nvkm_vm_map(&node->vma[0], node);
 	nvbe->node = node;
@@ -58,10 +58,10 @@ static struct ttm_backend_func nv04_sgdma_backend = {
 };
 
 static int
-nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
+nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *reg)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)ttm;
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 
 	/* noop: bound in move_notify() */
 	if (ttm->sg) {
@@ -71,7 +71,7 @@ nv50_sgdma_bind(struct ttm_tt *ttm, struct ttm_mem_reg *mem)
 		node->sg    = NULL;
 		node->pages = nvbe->ttm.dma_address;
 	}
-	node->size = (mem->num_pages << PAGE_SHIFT) >> 12;
+	node->size = (reg->num_pages << PAGE_SHIFT) >> 12;
 	return 0;
 }
 
@@ -100,7 +100,7 @@ nouveau_sgdma_create_ttm(struct ttm_bo_device *bdev,
 	if (!nvbe)
 		return NULL;
 
-	if (drm->device.info.family < NV_DEVICE_INFO_V0_TESLA)
+	if (drm->client.device.info.family < NV_DEVICE_INFO_V0_TESLA)
 		nvbe->ttm.ttm.func = &nv04_sgdma_backend;
 	else
 		nvbe->ttm.ttm.func = &nv50_sgdma_backend;
diff --git a/drivers/gpu/drm/nouveau/nouveau_ttm.c b/drivers/gpu/drm/nouveau/nouveau_ttm.c
index a6dbe8258040..13e5cc5f07fe 100644
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@@ -36,7 +36,7 @@ static int
 nouveau_vram_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_fb *fb = nvxx_fb(&drm->device);
+	struct nvkm_fb *fb = nvxx_fb(&drm->client.device);
 	man->priv = fb;
 	return 0;
 }
@@ -64,53 +64,53 @@ nvkm_mem_node_cleanup(struct nvkm_mem *node)
 
 static void
 nouveau_vram_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
-	nvkm_mem_node_cleanup(mem->mm_node);
-	ram->func->put(ram, (struct nvkm_mem **)&mem->mm_node);
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	ram->func->put(ram, (struct nvkm_mem **)&reg->mm_node);
 }
 
 static int
 nouveau_vram_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_ram *ram = nvxx_fb(&drm->device)->ram;
+	struct nvkm_ram *ram = nvxx_fb(&drm->client.device)->ram;
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
 	struct nvkm_mem *node;
 	u32 size_nc = 0;
 	int ret;
 
-	if (drm->device.info.ram_size == 0)
+	if (drm->client.device.info.ram_size == 0)
 		return -ENOMEM;
 
 	if (nvbo->tile_flags & NOUVEAU_GEM_TILE_NONCONTIG)
 		size_nc = 1 << nvbo->page_shift;
 
-	ret = ram->func->get(ram, mem->num_pages << PAGE_SHIFT,
-			     mem->page_alignment << PAGE_SHIFT, size_nc,
+	ret = ram->func->get(ram, reg->num_pages << PAGE_SHIFT,
+			     reg->page_alignment << PAGE_SHIFT, size_nc,
 			     (nvbo->tile_flags >> 8) & 0x3ff, &node);
 	if (ret) {
-		mem->mm_node = NULL;
+		reg->mm_node = NULL;
 		return (ret == -ENOSPC) ? 0 : ret;
 	}
 
 	node->page_shift = nvbo->page_shift;
 
-	mem->mm_node = node;
-	mem->start   = node->offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->offset >> PAGE_SHIFT;
 	return 0;
 }
 
 const struct ttm_mem_type_manager_func nouveau_vram_manager = {
-	nouveau_vram_manager_init,
-	nouveau_vram_manager_fini,
-	nouveau_vram_manager_new,
-	nouveau_vram_manager_del,
+	.init = nouveau_vram_manager_init,
+	.takedown = nouveau_vram_manager_fini,
+	.get_node = nouveau_vram_manager_new,
+	.put_node = nouveau_vram_manager_del,
 };
 
 static int
@@ -127,18 +127,18 @@ nouveau_gart_manager_fini(struct ttm_mem_type_manager *man)
 
 static void
 nouveau_gart_manager_del(struct ttm_mem_type_manager *man,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
-	nvkm_mem_node_cleanup(mem->mm_node);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	nvkm_mem_node_cleanup(reg->mm_node);
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 			 struct ttm_buffer_object *bo,
 			 const struct ttm_place *place,
-			 struct ttm_mem_reg *mem)
+			 struct ttm_mem_reg *reg)
 {
 	struct nouveau_drm *drm = nouveau_bdev(bo->bdev);
 	struct nouveau_bo *nvbo = nouveau_bo(bo);
@@ -150,7 +150,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	switch (drm->device.info.family) {
+	switch (drm->client.device.info.family) {
 	case NV_DEVICE_INFO_V0_TNT:
 	case NV_DEVICE_INFO_V0_CELSIUS:
 	case NV_DEVICE_INFO_V0_KELVIN:
@@ -158,7 +158,7 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 	case NV_DEVICE_INFO_V0_CURIE:
 		break;
 	case NV_DEVICE_INFO_V0_TESLA:
-		if (drm->device.info.chipset != 0x50)
+		if (drm->client.device.info.chipset != 0x50)
 			node->memtype = (nvbo->tile_flags & 0x7f00) >> 8;
 		break;
 	case NV_DEVICE_INFO_V0_FERMI:
@@ -169,12 +169,12 @@ nouveau_gart_manager_new(struct ttm_mem_type_manager *man,
 		break;
 	default:
 		NV_WARN(drm, "%s: unhandled family type %x\n", __func__,
-			drm->device.info.family);
+			drm->client.device.info.family);
 		break;
 	}
 
-	mem->mm_node = node;
-	mem->start   = 0;
+	reg->mm_node = node;
+	reg->start   = 0;
 	return 0;
 }
 
@@ -184,11 +184,11 @@ nouveau_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
 }
 
 const struct ttm_mem_type_manager_func nouveau_gart_manager = {
-	nouveau_gart_manager_init,
-	nouveau_gart_manager_fini,
-	nouveau_gart_manager_new,
-	nouveau_gart_manager_del,
-	nouveau_gart_manager_debug
+	.init = nouveau_gart_manager_init,
+	.takedown = nouveau_gart_manager_fini,
+	.get_node = nouveau_gart_manager_new,
+	.put_node = nouveau_gart_manager_del,
+	.debug = nouveau_gart_manager_debug
 };
 
 /*XXX*/
@@ -197,7 +197,7 @@ static int
 nv04_gart_manager_init(struct ttm_mem_type_manager *man, unsigned long psize)
 {
 	struct nouveau_drm *drm = nouveau_bdev(man->bdev);
-	struct nvkm_mmu *mmu = nvxx_mmu(&drm->device);
+	struct nvkm_mmu *mmu = nvxx_mmu(&drm->client.device);
 	struct nv04_mmu *priv = (void *)mmu;
 	struct nvkm_vm *vm = NULL;
 	nvkm_vm_ref(priv->vm, &vm, NULL);
@@ -215,20 +215,20 @@ nv04_gart_manager_fini(struct ttm_mem_type_manager *man)
 }
 
 static void
-nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *mem)
+nv04_gart_manager_del(struct ttm_mem_type_manager *man, struct ttm_mem_reg *reg)
 {
-	struct nvkm_mem *node = mem->mm_node;
+	struct nvkm_mem *node = reg->mm_node;
 	if (node->vma[0].node)
 		nvkm_vm_put(&node->vma[0]);
-	kfree(mem->mm_node);
-	mem->mm_node = NULL;
+	kfree(reg->mm_node);
+	reg->mm_node = NULL;
 }
 
 static int
 nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 		      struct ttm_buffer_object *bo,
 		      const struct ttm_place *place,
-		      struct ttm_mem_reg *mem)
+		      struct ttm_mem_reg *reg)
 {
 	struct nvkm_mem *node;
 	int ret;
@@ -239,15 +239,15 @@ nv04_gart_manager_new(struct ttm_mem_type_manager *man,
 
 	node->page_shift = 12;
 
-	ret = nvkm_vm_get(man->priv, mem->num_pages << 12, node->page_shift,
+	ret = nvkm_vm_get(man->priv, reg->num_pages << 12, node->page_shift,
 			  NV_MEM_ACCESS_RW, &node->vma[0]);
 	if (ret) {
 		kfree(node);
 		return ret;
 	}
 
-	mem->mm_node = node;
-	mem->start   = node->vma[0].offset >> PAGE_SHIFT;
+	reg->mm_node = node;
+	reg->start   = node->vma[0].offset >> PAGE_SHIFT;
 	return 0;
 }
 
@@ -257,11 +257,11 @@ nv04_gart_manager_debug(struct ttm_mem_type_manager *man, const char *prefix)
 }
 
 const struct ttm_mem_type_manager_func nv04_gart_manager = {
-	nv04_gart_manager_init,
-	nv04_gart_manager_fini,
-	nv04_gart_manager_new,
-	nv04_gart_manager_del,
-	nv04_gart_manager_debug
+	.init = nv04_gart_manager_init,
+	.takedown = nv04_gart_manager_fini,
+	.get_node = nv04_gart_manager_new,
+	.put_node = nv04_gart_manager_del,
+	.debug = nv04_gart_manager_debug
 };
 
 int
@@ -339,7 +339,7 @@ nouveau_ttm_global_release(struct nouveau_drm *drm)
 int
 nouveau_ttm_init(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 	struct nvkm_pci *pci = device->pci;
 	struct drm_device *dev = drm->dev;
 	u8 bits;
@@ -352,8 +352,8 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 		drm->agp.cma = pci->agp.cma;
 	}
 
-	bits = nvxx_mmu(&drm->device)->dma_bits;
-	if (nvxx_device(&drm->device)->func->pci) {
+	bits = nvxx_mmu(&drm->client.device)->dma_bits;
+	if (nvxx_device(&drm->client.device)->func->pci) {
 		if (drm->agp.bridge)
 			bits = 32;
 	} else if (device->func->tegra) {
@@ -396,7 +396,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 	}
 
 	/* VRAM init */
-	drm->gem.vram_available = drm->device.info.ram_user;
+	drm->gem.vram_available = drm->client.device.info.ram_user;
 
 	arch_io_reserve_memtype_wc(device->func->resource_addr(device, 1),
 				   device->func->resource_size(device, 1));
@@ -413,7 +413,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 
 	/* GART init */
 	if (!drm->agp.bridge) {
-		drm->gem.gart_available = nvxx_mmu(&drm->device)->limit;
+		drm->gem.gart_available = nvxx_mmu(&drm->client.device)->limit;
 	} else {
 		drm->gem.gart_available = drm->agp.size;
 	}
@@ -433,7 +433,7 @@ nouveau_ttm_init(struct nouveau_drm *drm)
 void
 nouveau_ttm_fini(struct nouveau_drm *drm)
 {
-	struct nvkm_device *device = nvxx_device(&drm->device);
+	struct nvkm_device *device = nvxx_device(&drm->client.device);
 
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_VRAM);
 	ttm_bo_clean_mm(&drm->ttm.bdev, TTM_PL_TT);
diff --git a/drivers/gpu/drm/nouveau/nouveau_usif.c b/drivers/gpu/drm/nouveau/nouveau_usif.c
index 08f9c6fa0f7f..afbdbed1a690 100644
--- a/drivers/gpu/drm/nouveau/nouveau_usif.c
+++ b/drivers/gpu/drm/nouveau/nouveau_usif.c
@@ -103,7 +103,7 @@ usif_notify(const void *header, u32 length, const void *data, u32 size)
 	}
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
@@ -313,7 +313,8 @@ usif_ioctl(struct drm_file *filp, void __user *user, u32 argc)
 	if (!(ret = nvif_unpack(-ENOSYS, &data, &size, argv->v0, 0, 0, true))) {
 		/* block access to objects not created via this interface */
 		owner = argv->v0.owner;
-		if (argv->v0.object == 0ULL)
+		if (argv->v0.object == 0ULL &&
+		    argv->v0.type != NVIF_IOCTL_V0_DEL)
 			argv->v0.owner = NVDRM_OBJECT_ANY; /* except client */
 		else
 			argv->v0.owner = NVDRM_OBJECT_USIF;
diff --git a/drivers/gpu/drm/nouveau/nouveau_vga.c b/drivers/gpu/drm/nouveau/nouveau_vga.c
index c6a180a0c284..eef22c6b9665 100644
--- a/drivers/gpu/drm/nouveau/nouveau_vga.c
+++ b/drivers/gpu/drm/nouveau/nouveau_vga.c
@@ -13,13 +13,13 @@ static unsigned int
 nouveau_vga_set_decode(void *priv, bool state)
 {
 	struct nouveau_drm *drm = nouveau_drm(priv);
-	struct nvif_object *device = &drm->device.object;
+	struct nvif_object *device = &drm->client.device.object;
 
-	if (drm->device.info.family == NV_DEVICE_INFO_V0_CURIE &&
-	    drm->device.info.chipset >= 0x4c)
+	if (drm->client.device.info.family == NV_DEVICE_INFO_V0_CURIE &&
+	    drm->client.device.info.chipset >= 0x4c)
 		nvif_wr32(device, 0x088060, state);
 	else
-	if (drm->device.info.chipset >= 0x40)
+	if (drm->client.device.info.chipset >= 0x40)
 		nvif_wr32(device, 0x088054, state);
 	else
 		nvif_wr32(device, 0x001854, state);
diff --git a/drivers/gpu/drm/nouveau/nv04_fbcon.c b/drivers/gpu/drm/nouveau/nv04_fbcon.c
index da8fd5ff9d0f..01731dbeb3d8 100644
--- a/drivers/gpu/drm/nouveau/nv04_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv04_fbcon.c
@@ -30,7 +30,7 @@ int
 nv04_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -50,7 +50,7 @@ int
 nv04_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -77,7 +77,7 @@ int
 nv04_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	uint32_t fg;
 	uint32_t bg;
@@ -133,10 +133,10 @@ int
 nv04_fbcon_accel_init(struct fb_info *info)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct drm_device *dev = nfbdev->dev;
+	struct drm_device *dev = nfbdev->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	int surface_fmt, pattern_fmt, rect_fmt;
 	int ret;
 
diff --git a/drivers/gpu/drm/nouveau/nv04_fence.c b/drivers/gpu/drm/nouveau/nv04_fence.c
index 1915b7b82a59..fa8f2375c398 100644
--- a/drivers/gpu/drm/nouveau/nv04_fence.c
+++ b/drivers/gpu/drm/nouveau/nv04_fence.c
@@ -110,6 +110,6 @@ nv04_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv04_fence_context_new;
 	priv->base.context_del = nv04_fence_context_del;
 	priv->base.contexts = 15;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.c b/drivers/gpu/drm/nouveau/nv10_fence.c
index 4e3de34ff6f4..2998bde29211 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.c
+++ b/drivers/gpu/drm/nouveau/nv10_fence.c
@@ -57,16 +57,13 @@ void
 nv10_fence_context_del(struct nouveau_channel *chan)
 {
 	struct nv10_fence_chan *fctx = chan->fence;
-	int i;
 	nouveau_fence_context_del(&fctx->base);
-	for (i = 0; i < ARRAY_SIZE(fctx->head); i++)
-		nvif_object_fini(&fctx->head[i]);
 	nvif_object_fini(&fctx->sema);
 	chan->fence = NULL;
 	nouveau_fence_context_free(&fctx->base);
 }
 
-int
+static int
 nv10_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_chan *fctx;
@@ -107,7 +104,7 @@ nv10_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv10_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 31;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nv10_fence.h b/drivers/gpu/drm/nouveau/nv10_fence.h
index a87259f3983a..b7a508585304 100644
--- a/drivers/gpu/drm/nouveau/nv10_fence.h
+++ b/drivers/gpu/drm/nouveau/nv10_fence.h
@@ -7,7 +7,6 @@
 struct nv10_fence_chan {
 	struct nouveau_fence_chan base;
 	struct nvif_object sema;
-	struct nvif_object head[4];
 };
 
 struct nv10_fence_priv {
diff --git a/drivers/gpu/drm/nouveau/nv17_fence.c b/drivers/gpu/drm/nouveau/nv17_fence.c
index 7d5e562a55c5..6477b7069e14 100644
--- a/drivers/gpu/drm/nouveau/nv17_fence.c
+++ b/drivers/gpu/drm/nouveau/nv17_fence.c
@@ -76,9 +76,9 @@ nv17_fence_context_new(struct nouveau_channel *chan)
 {
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
 	int ret = 0;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
@@ -126,10 +126,10 @@ nv17_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv17_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 31;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv50_display.c b/drivers/gpu/drm/nouveau/nv50_display.c
index 7d0edcbcfca7..a9182d5e6011 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.c
+++ b/drivers/gpu/drm/nouveau/nv50_display.c
@@ -25,10 +25,12 @@
 #include <linux/dma-mapping.h>
 
 #include <drm/drmP.h>
+#include <drm/drm_atomic.h>
+#include <drm/drm_atomic_helper.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/drm_plane_helper.h>
 #include <drm/drm_dp_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_plane_helper.h>
 
 #include <nvif/class.h>
 #include <nvif/cl0002.h>
@@ -38,6 +40,7 @@
 #include <nvif/cl507c.h>
 #include <nvif/cl507d.h>
 #include <nvif/cl507e.h>
+#include <nvif/event.h>
 
 #include "nouveau_drv.h"
 #include "nouveau_dma.h"
@@ -46,6 +49,7 @@
 #include "nouveau_encoder.h"
 #include "nouveau_crtc.h"
 #include "nouveau_fence.h"
+#include "nouveau_fbcon.h"
 #include "nv50_display.h"
 
 #define EVO_DMA_NR 9
@@ -61,6 +65,227 @@
 #define EVO_MAST_NTFY     EVO_SYNC(      0, 0x00)
 #define EVO_FLIP_SEM0(c)  EVO_SYNC((c) + 1, 0x00)
 #define EVO_FLIP_SEM1(c)  EVO_SYNC((c) + 1, 0x10)
+#define EVO_FLIP_NTFY0(c) EVO_SYNC((c) + 1, 0x20)
+#define EVO_FLIP_NTFY1(c) EVO_SYNC((c) + 1, 0x30)
+
+/******************************************************************************
+ * Atomic state
+ *****************************************************************************/
+#define nv50_atom(p) container_of((p), struct nv50_atom, state)
+
+struct nv50_atom {
+	struct drm_atomic_state state;
+
+	struct list_head outp;
+	bool lock_core;
+	bool flush_disable;
+};
+
+struct nv50_outp_atom {
+	struct list_head head;
+
+	struct drm_encoder *encoder;
+	bool flush_disable;
+
+	union {
+		struct {
+			bool ctrl:1;
+		};
+		u8 mask;
+	} clr;
+
+	union {
+		struct {
+			bool ctrl:1;
+		};
+		u8 mask;
+	} set;
+};
+
+#define nv50_head_atom(p) container_of((p), struct nv50_head_atom, state)
+
+struct nv50_head_atom {
+	struct drm_crtc_state state;
+
+	struct {
+		u16 iW;
+		u16 iH;
+		u16 oW;
+		u16 oH;
+	} view;
+
+	struct nv50_head_mode {
+		bool interlace;
+		u32 clock;
+		struct {
+			u16 active;
+			u16 synce;
+			u16 blanke;
+			u16 blanks;
+		} h;
+		struct {
+			u32 active;
+			u16 synce;
+			u16 blanke;
+			u16 blanks;
+			u16 blank2s;
+			u16 blank2e;
+			u16 blankus;
+		} v;
+	} mode;
+
+	struct {
+		u32 handle;
+		u64 offset:40;
+	} lut;
+
+	struct {
+		bool visible;
+		u32 handle;
+		u64 offset:40;
+		u8  format;
+		u8  kind:7;
+		u8  layout:1;
+		u8  block:4;
+		u32 pitch:20;
+		u16 x;
+		u16 y;
+		u16 w;
+		u16 h;
+	} core;
+
+	struct {
+		bool visible;
+		u32 handle;
+		u64 offset:40;
+		u8  layout:1;
+		u8  format:1;
+	} curs;
+
+	struct {
+		u8  depth;
+		u8  cpp;
+		u16 x;
+		u16 y;
+		u16 w;
+		u16 h;
+	} base;
+
+	struct {
+		u8 cpp;
+	} ovly;
+
+	struct {
+		bool enable:1;
+		u8 bits:2;
+		u8 mode:4;
+	} dither;
+
+	struct {
+		struct {
+			u16 cos:12;
+			u16 sin:12;
+		} sat;
+	} procamp;
+
+	union {
+		struct {
+			bool core:1;
+			bool curs:1;
+		};
+		u8 mask;
+	} clr;
+
+	union {
+		struct {
+			bool core:1;
+			bool curs:1;
+			bool view:1;
+			bool mode:1;
+			bool base:1;
+			bool ovly:1;
+			bool dither:1;
+			bool procamp:1;
+		};
+		u16 mask;
+	} set;
+};
+
+static inline struct nv50_head_atom *
+nv50_head_atom_get(struct drm_atomic_state *state, struct drm_crtc *crtc)
+{
+	struct drm_crtc_state *statec = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(statec))
+		return (void *)statec;
+	return nv50_head_atom(statec);
+}
+
+#define nv50_wndw_atom(p) container_of((p), struct nv50_wndw_atom, state)
+
+struct nv50_wndw_atom {
+	struct drm_plane_state state;
+	u8 interval;
+
+	struct drm_rect clip;
+
+	struct {
+		u32  handle;
+		u16  offset:12;
+		bool awaken:1;
+	} ntfy;
+
+	struct {
+		u32 handle;
+		u16 offset:12;
+		u32 acquire;
+		u32 release;
+	} sema;
+
+	struct {
+		u8 enable:2;
+	} lut;
+
+	struct {
+		u8  mode:2;
+		u8  interval:4;
+
+		u8  format;
+		u8  kind:7;
+		u8  layout:1;
+		u8  block:4;
+		u32 pitch:20;
+		u16 w;
+		u16 h;
+
+		u32 handle;
+		u64 offset;
+	} image;
+
+	struct {
+		u16 x;
+		u16 y;
+	} point;
+
+	union {
+		struct {
+			bool ntfy:1;
+			bool sema:1;
+			bool image:1;
+		};
+		u8 mask;
+	} clr;
+
+	union {
+		struct {
+			bool ntfy:1;
+			bool sema:1;
+			bool image:1;
+			bool lut:1;
+			bool point:1;
+		};
+		u8 mask;
+	} set;
+};
 
 /******************************************************************************
  * EVO channel
@@ -133,34 +358,6 @@ nv50_pioc_create(struct nvif_device *device, struct nvif_object *disp,
 }
 
 /******************************************************************************
- * Cursor Immediate
- *****************************************************************************/
-
-struct nv50_curs {
-	struct nv50_pioc base;
-};
-
-static int
-nv50_curs_create(struct nvif_device *device, struct nvif_object *disp,
-		 int head, struct nv50_curs *curs)
-{
-	struct nv50_disp_cursor_v0 args = {
-		.head = head,
-	};
-	static const s32 oclass[] = {
-		GK104_DISP_CURSOR,
-		GF110_DISP_CURSOR,
-		GT214_DISP_CURSOR,
-		G82_DISP_CURSOR,
-		NV50_DISP_CURSOR,
-		0
-	};
-
-	return nv50_pioc_create(device, disp, oclass, head, &args, sizeof(args),
-				&curs->base);
-}
-
-/******************************************************************************
  * Overlay Immediate
  *****************************************************************************/
 
@@ -192,6 +389,11 @@ nv50_oimm_create(struct nvif_device *device, struct nvif_object *disp,
  * DMA EVO channel
  *****************************************************************************/
 
+struct nv50_dmac_ctxdma {
+	struct list_head head;
+	struct nvif_object object;
+};
+
 struct nv50_dmac {
 	struct nv50_chan base;
 	dma_addr_t handle;
@@ -199,6 +401,7 @@ struct nv50_dmac {
 
 	struct nvif_object sync;
 	struct nvif_object vram;
+	struct list_head ctxdma;
 
 	/* Protects against concurrent pushbuf access to this channel, lock is
 	 * grabbed by evo_wait (if the pushbuf reservation is successful) and
@@ -207,9 +410,82 @@ struct nv50_dmac {
 };
 
 static void
+nv50_dmac_ctxdma_del(struct nv50_dmac_ctxdma *ctxdma)
+{
+	nvif_object_fini(&ctxdma->object);
+	list_del(&ctxdma->head);
+	kfree(ctxdma);
+}
+
+static struct nv50_dmac_ctxdma *
+nv50_dmac_ctxdma_new(struct nv50_dmac *dmac, struct nouveau_framebuffer *fb)
+{
+	struct nouveau_drm *drm = nouveau_drm(fb->base.dev);
+	struct nv50_dmac_ctxdma *ctxdma;
+	const u8    kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
+	const u32 handle = 0xfb000000 | kind;
+	struct {
+		struct nv_dma_v0 base;
+		union {
+			struct nv50_dma_v0 nv50;
+			struct gf100_dma_v0 gf100;
+			struct gf119_dma_v0 gf119;
+		};
+	} args = {};
+	u32 argc = sizeof(args.base);
+	int ret;
+
+	list_for_each_entry(ctxdma, &dmac->ctxdma, head) {
+		if (ctxdma->object.handle == handle)
+			return ctxdma;
+	}
+
+	if (!(ctxdma = kzalloc(sizeof(*ctxdma), GFP_KERNEL)))
+		return ERR_PTR(-ENOMEM);
+	list_add(&ctxdma->head, &dmac->ctxdma);
+
+	args.base.target = NV_DMA_V0_TARGET_VRAM;
+	args.base.access = NV_DMA_V0_ACCESS_RDWR;
+	args.base.start  = 0;
+	args.base.limit  = drm->client.device.info.ram_user - 1;
+
+	if (drm->client.device.info.chipset < 0x80) {
+		args.nv50.part = NV50_DMA_V0_PART_256;
+		argc += sizeof(args.nv50);
+	} else
+	if (drm->client.device.info.chipset < 0xc0) {
+		args.nv50.part = NV50_DMA_V0_PART_256;
+		args.nv50.kind = kind;
+		argc += sizeof(args.nv50);
+	} else
+	if (drm->client.device.info.chipset < 0xd0) {
+		args.gf100.kind = kind;
+		argc += sizeof(args.gf100);
+	} else {
+		args.gf119.page = GF119_DMA_V0_PAGE_LP;
+		args.gf119.kind = kind;
+		argc += sizeof(args.gf119);
+	}
+
+	ret = nvif_object_init(&dmac->base.user, handle, NV_DMA_IN_MEMORY,
+			       &args, argc, &ctxdma->object);
+	if (ret) {
+		nv50_dmac_ctxdma_del(ctxdma);
+		return ERR_PTR(ret);
+	}
+
+	return ctxdma;
+}
+
+static void
 nv50_dmac_destroy(struct nv50_dmac *dmac, struct nvif_object *disp)
 {
 	struct nvif_device *device = dmac->base.device;
+	struct nv50_dmac_ctxdma *ctxdma, *ctxtmp;
+
+	list_for_each_entry_safe(ctxdma, ctxtmp, &dmac->ctxdma, head) {
+		nv50_dmac_ctxdma_del(ctxdma);
+	}
 
 	nvif_object_fini(&dmac->vram);
 	nvif_object_fini(&dmac->sync);
@@ -278,6 +554,7 @@ nv50_dmac_create(struct nvif_device *device, struct nvif_object *disp,
 	if (ret)
 		return ret;
 
+	INIT_LIST_HEAD(&dmac->ctxdma);
 	return ret;
 }
 
@@ -297,7 +574,7 @@ nv50_core_create(struct nvif_device *device, struct nvif_object *disp,
 		.pushbuf = 0xb0007d00,
 	};
 	static const s32 oclass[] = {
-		GP104_DISP_CORE_CHANNEL_DMA,
+		GP102_DISP_CORE_CHANNEL_DMA,
 		GP100_DISP_CORE_CHANNEL_DMA,
 		GM200_DISP_CORE_CHANNEL_DMA,
 		GM107_DISP_CORE_CHANNEL_DMA,
@@ -381,34 +658,23 @@ nv50_ovly_create(struct nvif_device *device, struct nvif_object *disp,
 
 struct nv50_head {
 	struct nouveau_crtc base;
-	struct nouveau_bo *image;
-	struct nv50_curs curs;
-	struct nv50_sync sync;
 	struct nv50_ovly ovly;
 	struct nv50_oimm oimm;
 };
 
 #define nv50_head(c) ((struct nv50_head *)nouveau_crtc(c))
-#define nv50_curs(c) (&nv50_head(c)->curs)
-#define nv50_sync(c) (&nv50_head(c)->sync)
 #define nv50_ovly(c) (&nv50_head(c)->ovly)
 #define nv50_oimm(c) (&nv50_head(c)->oimm)
 #define nv50_chan(c) (&(c)->base.base)
 #define nv50_vers(c) nv50_chan(c)->user.oclass
 
-struct nv50_fbdma {
-	struct list_head head;
-	struct nvif_object core;
-	struct nvif_object base[4];
-};
-
 struct nv50_disp {
 	struct nvif_object *disp;
 	struct nv50_mast mast;
 
-	struct list_head fbdma;
-
 	struct nouveau_bo *sync;
+
+	struct mutex mutex;
 };
 
 static struct nv50_disp *
@@ -419,12 +685,6 @@ nv50_disp(struct drm_device *dev)
 
 #define nv50_mast(d) (&nv50_disp(d)->mast)
 
-static struct drm_crtc *
-nv50_display_crtc_get(struct drm_encoder *encoder)
-{
-	return nouveau_encoder(encoder)->crtc;
-}
-
 /******************************************************************************
  * EVO channel helpers
  *****************************************************************************/
@@ -463,812 +723,1463 @@ evo_kick(u32 *push, void *evoc)
 	mutex_unlock(&dmac->lock);
 }
 
-#if 1
-#define evo_mthd(p,m,s) *((p)++) = (((s) << 18) | (m))
-#define evo_data(p,d)   *((p)++) = (d)
-#else
 #define evo_mthd(p,m,s) do {                                                   \
 	const u32 _m = (m), _s = (s);                                          \
-	printk(KERN_ERR "%04x %d %s\n", _m, _s, __func__);                     \
+	if (drm_debug & DRM_UT_KMS)                                            \
+		printk(KERN_ERR "%04x %d %s\n", _m, _s, __func__);             \
 	*((p)++) = ((_s << 18) | _m);                                          \
 } while(0)
+
 #define evo_data(p,d) do {                                                     \
 	const u32 _d = (d);                                                    \
-	printk(KERN_ERR "\t%08x\n", _d);                                       \
+	if (drm_debug & DRM_UT_KMS)                                            \
+		printk(KERN_ERR "\t%08x\n", _d);                               \
 	*((p)++) = _d;                                                         \
 } while(0)
-#endif
 
-static bool
-evo_sync_wait(void *data)
+/******************************************************************************
+ * Plane
+ *****************************************************************************/
+#define nv50_wndw(p) container_of((p), struct nv50_wndw, plane)
+
+struct nv50_wndw {
+	const struct nv50_wndw_func *func;
+	struct nv50_dmac *dmac;
+
+	struct drm_plane plane;
+
+	struct nvif_notify notify;
+	u16 ntfy;
+	u16 sema;
+	u32 data;
+};
+
+struct nv50_wndw_func {
+	void *(*dtor)(struct nv50_wndw *);
+	int (*acquire)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
+		       struct nv50_head_atom *asyh);
+	void (*release)(struct nv50_wndw *, struct nv50_wndw_atom *asyw,
+			struct nv50_head_atom *asyh);
+	void (*prepare)(struct nv50_wndw *, struct nv50_head_atom *asyh,
+			struct nv50_wndw_atom *asyw);
+
+	void (*sema_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*sema_clr)(struct nv50_wndw *);
+	void (*ntfy_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*ntfy_clr)(struct nv50_wndw *);
+	int (*ntfy_wait_begun)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*image_set)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*image_clr)(struct nv50_wndw *);
+	void (*lut)(struct nv50_wndw *, struct nv50_wndw_atom *);
+	void (*point)(struct nv50_wndw *, struct nv50_wndw_atom *);
+
+	u32 (*update)(struct nv50_wndw *, u32 interlock);
+};
+
+static int
+nv50_wndw_wait_armed(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
-	if (nouveau_bo_rd32(data, EVO_MAST_NTFY) != 0x00000000)
-		return true;
-	usleep_range(1, 2);
-	return false;
+	if (asyw->set.ntfy)
+		return wndw->func->ntfy_wait_begun(wndw, asyw);
+	return 0;
 }
 
-static int
-evo_sync(struct drm_device *dev)
+static u32
+nv50_wndw_flush_clr(struct nv50_wndw *wndw, u32 interlock, bool flush,
+		    struct nv50_wndw_atom *asyw)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
-	struct nv50_disp *disp = nv50_disp(dev);
-	struct nv50_mast *mast = nv50_mast(dev);
-	u32 *push = evo_wait(mast, 8);
-	if (push) {
-		nouveau_bo_wr32(disp->sync, EVO_MAST_NTFY, 0x00000000);
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x80000000 | EVO_MAST_NTFY);
-		evo_mthd(push, 0x0080, 2);
-		evo_data(push, 0x00000000);
-		evo_data(push, 0x00000000);
-		evo_kick(push, mast);
-		if (nvif_msec(device, 2000,
-			if (evo_sync_wait(disp->sync))
-				break;
-		) >= 0)
-			return 0;
+	if (asyw->clr.sema && (!asyw->set.sema || flush))
+		wndw->func->sema_clr(wndw);
+	if (asyw->clr.ntfy && (!asyw->set.ntfy || flush))
+		wndw->func->ntfy_clr(wndw);
+	if (asyw->clr.image && (!asyw->set.image || flush))
+		wndw->func->image_clr(wndw);
+
+	return flush ? wndw->func->update(wndw, interlock) : 0;
+}
+
+static u32
+nv50_wndw_flush_set(struct nv50_wndw *wndw, u32 interlock,
+		    struct nv50_wndw_atom *asyw)
+{
+	if (interlock) {
+		asyw->image.mode = 0;
+		asyw->image.interval = 1;
 	}
 
-	return -EBUSY;
+	if (asyw->set.sema ) wndw->func->sema_set (wndw, asyw);
+	if (asyw->set.ntfy ) wndw->func->ntfy_set (wndw, asyw);
+	if (asyw->set.image) wndw->func->image_set(wndw, asyw);
+	if (asyw->set.lut  ) wndw->func->lut      (wndw, asyw);
+	if (asyw->set.point) wndw->func->point    (wndw, asyw);
+
+	return wndw->func->update(wndw, interlock);
 }
 
-/******************************************************************************
- * Page flipping channel
- *****************************************************************************/
-struct nouveau_bo *
-nv50_display_crtc_sema(struct drm_device *dev, int crtc)
+static void
+nv50_wndw_atomic_check_release(struct nv50_wndw *wndw,
+			       struct nv50_wndw_atom *asyw,
+			       struct nv50_head_atom *asyh)
 {
-	return nv50_disp(dev)->sync;
+	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	NV_ATOMIC(drm, "%s release\n", wndw->plane.name);
+	wndw->func->release(wndw, asyw, asyh);
+	asyw->ntfy.handle = 0;
+	asyw->sema.handle = 0;
 }
 
-struct nv50_display_flip {
-	struct nv50_disp *disp;
-	struct nv50_sync *chan;
-};
-
-static bool
-nv50_display_flip_wait(void *data)
+static int
+nv50_wndw_atomic_check_acquire(struct nv50_wndw *wndw,
+			       struct nv50_wndw_atom *asyw,
+			       struct nv50_head_atom *asyh)
 {
-	struct nv50_display_flip *flip = data;
-	if (nouveau_bo_rd32(flip->disp->sync, flip->chan->addr / 4) ==
-					      flip->chan->data)
-		return true;
-	usleep_range(1, 2);
-	return false;
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(asyw->state.fb);
+	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	int ret;
+
+	NV_ATOMIC(drm, "%s acquire\n", wndw->plane.name);
+	asyw->clip.x1 = 0;
+	asyw->clip.y1 = 0;
+	asyw->clip.x2 = asyh->state.mode.hdisplay;
+	asyw->clip.y2 = asyh->state.mode.vdisplay;
+
+	asyw->image.w = fb->base.width;
+	asyw->image.h = fb->base.height;
+	asyw->image.kind = (fb->nvbo->tile_flags & 0x0000ff00) >> 8;
+	if (asyw->image.kind) {
+		asyw->image.layout = 0;
+		if (drm->client.device.info.chipset >= 0xc0)
+			asyw->image.block = fb->nvbo->tile_mode >> 4;
+		else
+			asyw->image.block = fb->nvbo->tile_mode;
+		asyw->image.pitch = (fb->base.pitches[0] / 4) << 4;
+	} else {
+		asyw->image.layout = 1;
+		asyw->image.block  = 0;
+		asyw->image.pitch  = fb->base.pitches[0];
+	}
+
+	ret = wndw->func->acquire(wndw, asyw, asyh);
+	if (ret)
+		return ret;
+
+	if (asyw->set.image) {
+		if (!(asyw->image.mode = asyw->interval ? 0 : 1))
+			asyw->image.interval = asyw->interval;
+		else
+			asyw->image.interval = 0;
+	}
+
+	return 0;
 }
 
-void
-nv50_display_flip_stop(struct drm_crtc *crtc)
+static int
+nv50_wndw_atomic_check(struct drm_plane *plane, struct drm_plane_state *state)
 {
-	struct nvif_device *device = &nouveau_drm(crtc->dev)->device;
-	struct nv50_display_flip flip = {
-		.disp = nv50_disp(crtc->dev),
-		.chan = nv50_sync(crtc),
-	};
-	u32 *push;
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	struct nv50_wndw_atom *armw = nv50_wndw_atom(wndw->plane.state);
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	struct nv50_head_atom *harm = NULL, *asyh = NULL;
+	bool varm = false, asyv = false, asym = false;
+	int ret;
 
-	push = evo_wait(flip.chan, 8);
-	if (push) {
-		evo_mthd(push, 0x0084, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x0094, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x00c0, 1);
-		evo_data(push, 0x00000000);
-		evo_mthd(push, 0x0080, 1);
-		evo_data(push, 0x00000000);
-		evo_kick(push, flip.chan);
+	NV_ATOMIC(drm, "%s atomic_check\n", plane->name);
+	if (asyw->state.crtc) {
+		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
+		if (IS_ERR(asyh))
+			return PTR_ERR(asyh);
+		asym = drm_atomic_crtc_needs_modeset(&asyh->state);
+		asyv = asyh->state.active;
 	}
 
-	nvif_msec(device, 2000,
-		if (nv50_display_flip_wait(&flip))
-			break;
-	);
+	if (armw->state.crtc) {
+		harm = nv50_head_atom_get(asyw->state.state, armw->state.crtc);
+		if (IS_ERR(harm))
+			return PTR_ERR(harm);
+		varm = harm->state.crtc->state->active;
+	}
+
+	if (asyv) {
+		asyw->point.x = asyw->state.crtc_x;
+		asyw->point.y = asyw->state.crtc_y;
+		if (memcmp(&armw->point, &asyw->point, sizeof(asyw->point)))
+			asyw->set.point = true;
+
+		if (!varm || asym || armw->state.fb != asyw->state.fb) {
+			ret = nv50_wndw_atomic_check_acquire(wndw, asyw, asyh);
+			if (ret)
+				return ret;
+		}
+	} else
+	if (varm) {
+		nv50_wndw_atomic_check_release(wndw, asyw, harm);
+	} else {
+		return 0;
+	}
+
+	if (!asyv || asym) {
+		asyw->clr.ntfy = armw->ntfy.handle != 0;
+		asyw->clr.sema = armw->sema.handle != 0;
+		if (wndw->func->image_clr)
+			asyw->clr.image = armw->image.handle != 0;
+		asyw->set.lut = wndw->func->lut && asyv;
+	}
+
+	return 0;
 }
 
-int
-nv50_display_flip_next(struct drm_crtc *crtc, struct drm_framebuffer *fb,
-		       struct nouveau_channel *chan, u32 swap_interval)
+static void
+nv50_wndw_cleanup_fb(struct drm_plane *plane, struct drm_plane_state *old_state)
 {
-	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_head *head = nv50_head(crtc);
-	struct nv50_sync *sync = nv50_sync(crtc);
-	u32 *push;
-	int ret;
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(old_state->fb);
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
 
-	if (crtc->primary->fb->width != fb->width ||
-	    crtc->primary->fb->height != fb->height)
-		return -EINVAL;
+	NV_ATOMIC(drm, "%s cleanup: %p\n", plane->name, old_state->fb);
+	if (!old_state->fb)
+		return;
 
-	swap_interval <<= 4;
-	if (swap_interval == 0)
-		swap_interval |= 0x100;
-	if (chan == NULL)
-		evo_sync(crtc->dev);
+	nouveau_bo_unpin(fb->nvbo);
+}
 
-	push = evo_wait(sync, 128);
-	if (unlikely(push == NULL))
-		return -EBUSY;
+static int
+nv50_wndw_prepare_fb(struct drm_plane *plane, struct drm_plane_state *state)
+{
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(state->fb);
+	struct nouveau_drm *drm = nouveau_drm(plane->dev);
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	struct nv50_head_atom *asyh;
+	struct nv50_dmac_ctxdma *ctxdma;
+	int ret;
 
-	if (chan && chan->user.oclass < G82_CHANNEL_GPFIFO) {
-		ret = RING_SPACE(chan, 8);
-		if (ret)
-			return ret;
+	NV_ATOMIC(drm, "%s prepare: %p\n", plane->name, state->fb);
+	if (!asyw->state.fb)
+		return 0;
 
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 2);
-		OUT_RING  (chan, NvEvoSema0 + nv_crtc->index);
-		OUT_RING  (chan, sync->addr ^ 0x10);
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_RELEASE, 1);
-		OUT_RING  (chan, sync->data + 1);
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_SEMAPHORE_OFFSET, 2);
-		OUT_RING  (chan, sync->addr);
-		OUT_RING  (chan, sync->data);
-	} else
-	if (chan && chan->user.oclass < FERMI_CHANNEL_GPFIFO) {
-		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
-		ret = RING_SPACE(chan, 12);
-		if (ret)
-			return ret;
+	ret = nouveau_bo_pin(fb->nvbo, TTM_PL_FLAG_VRAM, true);
+	if (ret)
+		return ret;
 
-		BEGIN_NV04(chan, 0, NV11_SUBCHAN_DMA_SEMAPHORE, 1);
-		OUT_RING  (chan, chan->vram.handle);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(addr ^ 0x10));
-		OUT_RING  (chan, lower_32_bits(addr ^ 0x10));
-		OUT_RING  (chan, sync->data + 1);
-		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG);
-		BEGIN_NV04(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(addr));
-		OUT_RING  (chan, lower_32_bits(addr));
-		OUT_RING  (chan, sync->data);
-		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
-	} else
-	if (chan) {
-		u64 addr = nv84_fence_crtc(chan, nv_crtc->index) + sync->addr;
-		ret = RING_SPACE(chan, 10);
-		if (ret)
-			return ret;
+	ctxdma = nv50_dmac_ctxdma_new(wndw->dmac, fb);
+	if (IS_ERR(ctxdma)) {
+		nouveau_bo_unpin(fb->nvbo);
+		return PTR_ERR(ctxdma);
+	}
 
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(addr ^ 0x10));
-		OUT_RING  (chan, lower_32_bits(addr ^ 0x10));
-		OUT_RING  (chan, sync->data + 1);
-		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_WRITE_LONG |
-				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
-		BEGIN_NVC0(chan, 0, NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH, 4);
-		OUT_RING  (chan, upper_32_bits(addr));
-		OUT_RING  (chan, lower_32_bits(addr));
-		OUT_RING  (chan, sync->data);
-		OUT_RING  (chan, NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL |
-				 NVC0_SUBCHAN_SEMAPHORE_TRIGGER_YIELD);
-	}
-
-	if (chan) {
-		sync->addr ^= 0x10;
-		sync->data++;
-		FIRE_RING (chan);
-	}
-
-	/* queue the flip */
-	evo_mthd(push, 0x0100, 1);
-	evo_data(push, 0xfffe0000);
-	evo_mthd(push, 0x0084, 1);
-	evo_data(push, swap_interval);
-	if (!(swap_interval & 0x00000100)) {
-		evo_mthd(push, 0x00e0, 1);
-		evo_data(push, 0x40000000);
-	}
-	evo_mthd(push, 0x0088, 4);
-	evo_data(push, sync->addr);
-	evo_data(push, sync->data++);
-	evo_data(push, sync->data);
-	evo_data(push, sync->base.sync.handle);
-	evo_mthd(push, 0x00a0, 2);
-	evo_data(push, 0x00000000);
-	evo_data(push, 0x00000000);
-	evo_mthd(push, 0x00c0, 1);
-	evo_data(push, nv_fb->r_handle);
-	evo_mthd(push, 0x0110, 2);
-	evo_data(push, 0x00000000);
-	evo_data(push, 0x00000000);
-	if (nv50_vers(sync) < GF110_DISP_BASE_CHANNEL_DMA) {
-		evo_mthd(push, 0x0800, 5);
-		evo_data(push, nv_fb->nvbo->bo.offset >> 8);
-		evo_data(push, 0);
-		evo_data(push, (fb->height << 16) | fb->width);
-		evo_data(push, nv_fb->r_pitch);
-		evo_data(push, nv_fb->r_format);
-	} else {
-		evo_mthd(push, 0x0400, 5);
-		evo_data(push, nv_fb->nvbo->bo.offset >> 8);
-		evo_data(push, 0);
-		evo_data(push, (fb->height << 16) | fb->width);
-		evo_data(push, nv_fb->r_pitch);
-		evo_data(push, nv_fb->r_format);
+	asyw->state.fence = reservation_object_get_excl_rcu(fb->nvbo->bo.resv);
+	asyw->image.handle = ctxdma->object.handle;
+	asyw->image.offset = fb->nvbo->bo.offset;
+
+	if (wndw->func->prepare) {
+		asyh = nv50_head_atom_get(asyw->state.state, asyw->state.crtc);
+		if (IS_ERR(asyh))
+			return PTR_ERR(asyh);
+
+		wndw->func->prepare(wndw, asyh, asyw);
 	}
-	evo_mthd(push, 0x0080, 1);
-	evo_data(push, 0x00000000);
-	evo_kick(push, sync);
 
-	nouveau_bo_ref(nv_fb->nvbo, &head->image);
+	return 0;
+}
+
+static const struct drm_plane_helper_funcs
+nv50_wndw_helper = {
+	.prepare_fb = nv50_wndw_prepare_fb,
+	.cleanup_fb = nv50_wndw_cleanup_fb,
+	.atomic_check = nv50_wndw_atomic_check,
+};
+
+static void
+nv50_wndw_atomic_destroy_state(struct drm_plane *plane,
+			       struct drm_plane_state *state)
+{
+	struct nv50_wndw_atom *asyw = nv50_wndw_atom(state);
+	__drm_atomic_helper_plane_destroy_state(&asyw->state);
+	kfree(asyw);
+}
+
+static struct drm_plane_state *
+nv50_wndw_atomic_duplicate_state(struct drm_plane *plane)
+{
+	struct nv50_wndw_atom *armw = nv50_wndw_atom(plane->state);
+	struct nv50_wndw_atom *asyw;
+	if (!(asyw = kmalloc(sizeof(*asyw), GFP_KERNEL)))
+		return NULL;
+	__drm_atomic_helper_plane_duplicate_state(plane, &asyw->state);
+	asyw->interval = 1;
+	asyw->sema = armw->sema;
+	asyw->ntfy = armw->ntfy;
+	asyw->image = armw->image;
+	asyw->point = armw->point;
+	asyw->lut = armw->lut;
+	asyw->clr.mask = 0;
+	asyw->set.mask = 0;
+	return &asyw->state;
+}
+
+static void
+nv50_wndw_reset(struct drm_plane *plane)
+{
+	struct nv50_wndw_atom *asyw;
+
+	if (WARN_ON(!(asyw = kzalloc(sizeof(*asyw), GFP_KERNEL))))
+		return;
+
+	if (plane->state)
+		plane->funcs->atomic_destroy_state(plane, plane->state);
+	plane->state = &asyw->state;
+	plane->state->plane = plane;
+	plane->state->rotation = DRM_ROTATE_0;
+}
+
+static void
+nv50_wndw_destroy(struct drm_plane *plane)
+{
+	struct nv50_wndw *wndw = nv50_wndw(plane);
+	void *data;
+	nvif_notify_fini(&wndw->notify);
+	data = wndw->func->dtor(wndw);
+	drm_plane_cleanup(&wndw->plane);
+	kfree(data);
+}
+
+static const struct drm_plane_funcs
+nv50_wndw = {
+	.update_plane = drm_atomic_helper_update_plane,
+	.disable_plane = drm_atomic_helper_disable_plane,
+	.destroy = nv50_wndw_destroy,
+	.reset = nv50_wndw_reset,
+	.set_property = drm_atomic_helper_plane_set_property,
+	.atomic_duplicate_state = nv50_wndw_atomic_duplicate_state,
+	.atomic_destroy_state = nv50_wndw_atomic_destroy_state,
+};
+
+static void
+nv50_wndw_fini(struct nv50_wndw *wndw)
+{
+	nvif_notify_put(&wndw->notify);
+}
+
+static void
+nv50_wndw_init(struct nv50_wndw *wndw)
+{
+	nvif_notify_get(&wndw->notify);
+}
+
+static int
+nv50_wndw_ctor(const struct nv50_wndw_func *func, struct drm_device *dev,
+	       enum drm_plane_type type, const char *name, int index,
+	       struct nv50_dmac *dmac, const u32 *format, int nformat,
+	       struct nv50_wndw *wndw)
+{
+	int ret;
+
+	wndw->func = func;
+	wndw->dmac = dmac;
+
+	ret = drm_universal_plane_init(dev, &wndw->plane, 0, &nv50_wndw, format,
+				       nformat, type, "%s-%d", name, index);
+	if (ret)
+		return ret;
+
+	drm_plane_helper_add(&wndw->plane, &nv50_wndw_helper);
 	return 0;
 }
 
 /******************************************************************************
- * CRTC
+ * Cursor plane
  *****************************************************************************/
+#define nv50_curs(p) container_of((p), struct nv50_curs, wndw)
+
+struct nv50_curs {
+	struct nv50_wndw wndw;
+	struct nvif_object chan;
+};
+
+static u32
+nv50_curs_update(struct nv50_wndw *wndw, u32 interlock)
+{
+	struct nv50_curs *curs = nv50_curs(wndw);
+	nvif_wr32(&curs->chan, 0x0080, 0x00000000);
+	return 0;
+}
+
+static void
+nv50_curs_point(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_curs *curs = nv50_curs(wndw);
+	nvif_wr32(&curs->chan, 0x0084, (asyw->point.y << 16) | asyw->point.x);
+}
+
+static void
+nv50_curs_prepare(struct nv50_wndw *wndw, struct nv50_head_atom *asyh,
+		  struct nv50_wndw_atom *asyw)
+{
+	asyh->curs.handle = nv50_disp(wndw->plane.dev)->mast.base.vram.handle;
+	asyh->curs.offset = asyw->image.offset;
+	asyh->set.curs = asyh->curs.visible;
+}
+
+static void
+nv50_curs_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		  struct nv50_head_atom *asyh)
+{
+	asyh->curs.visible = false;
+}
+
 static int
-nv50_crtc_set_dither(struct nouveau_crtc *nv_crtc, bool update)
+nv50_curs_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		  struct nv50_head_atom *asyh)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-	struct nouveau_connector *nv_connector;
-	struct drm_connector *connector;
-	u32 *push, mode = 0x00;
+	int ret;
 
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	connector = &nv_connector->base;
-	if (nv_connector->dithering_mode == DITHERING_MODE_AUTO) {
-		if (nv_crtc->base.primary->fb->depth > connector->display_info.bpc * 3)
-			mode = DITHERING_MODE_DYNAMIC2X2;
-	} else {
-		mode = nv_connector->dithering_mode;
-	}
+	ret = drm_plane_helper_check_state(&asyw->state, &asyw->clip,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   true, true);
+	asyh->curs.visible = asyw->state.visible;
+	if (ret || !asyh->curs.visible)
+		return ret;
 
-	if (nv_connector->dithering_depth == DITHERING_DEPTH_AUTO) {
-		if (connector->display_info.bpc >= 8)
-			mode |= DITHERING_DEPTH_8BPC;
-	} else {
-		mode |= nv_connector->dithering_depth;
+	switch (asyw->state.fb->width) {
+	case 32: asyh->curs.layout = 0; break;
+	case 64: asyh->curs.layout = 1; break;
+	default:
+		return -EINVAL;
 	}
 
-	push = evo_wait(mast, 4);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x08a0 + (nv_crtc->index * 0x0400), 1);
-			evo_data(push, mode);
-		} else
-		if (nv50_vers(mast) < GK104_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0490 + (nv_crtc->index * 0x0300), 1);
-			evo_data(push, mode);
-		} else {
-			evo_mthd(push, 0x04a0 + (nv_crtc->index * 0x0300), 1);
-			evo_data(push, mode);
-		}
+	if (asyw->state.fb->width != asyw->state.fb->height)
+		return -EINVAL;
 
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, mast);
+	switch (asyw->state.fb->format->format) {
+	case DRM_FORMAT_ARGB8888: asyh->curs.format = 1; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static void *
+nv50_curs_dtor(struct nv50_wndw *wndw)
+{
+	struct nv50_curs *curs = nv50_curs(wndw);
+	nvif_object_fini(&curs->chan);
+	return curs;
+}
+
+static const u32
+nv50_curs_format[] = {
+	DRM_FORMAT_ARGB8888,
+};
+
+static const struct nv50_wndw_func
+nv50_curs = {
+	.dtor = nv50_curs_dtor,
+	.acquire = nv50_curs_acquire,
+	.release = nv50_curs_release,
+	.prepare = nv50_curs_prepare,
+	.point = nv50_curs_point,
+	.update = nv50_curs_update,
+};
+
 static int
-nv50_crtc_set_scale(struct nouveau_crtc *nv_crtc, bool update)
+nv50_curs_new(struct nouveau_drm *drm, struct nv50_head *head,
+	      struct nv50_curs **pcurs)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-	struct drm_display_mode *omode, *umode = &nv_crtc->base.mode;
-	struct drm_crtc *crtc = &nv_crtc->base;
-	struct nouveau_connector *nv_connector;
-	int mode = DRM_MODE_SCALE_NONE;
-	u32 oX, oY, *push;
+	static const struct nvif_mclass curses[] = {
+		{ GK104_DISP_CURSOR, 0 },
+		{ GF110_DISP_CURSOR, 0 },
+		{ GT214_DISP_CURSOR, 0 },
+		{   G82_DISP_CURSOR, 0 },
+		{  NV50_DISP_CURSOR, 0 },
+		{}
+	};
+	struct nv50_disp_cursor_v0 args = {
+		.head = head->base.index,
+	};
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_curs *curs;
+	int cid, ret;
+
+	cid = nvif_mclass(disp->disp, curses);
+	if (cid < 0) {
+		NV_ERROR(drm, "No supported cursor immediate class\n");
+		return cid;
+	}
 
-	/* start off at the resolution we programmed the crtc for, this
-	 * effectively handles NONE/FULL scaling
-	 */
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	if (nv_connector && nv_connector->native_mode) {
-		mode = nv_connector->scaling_mode;
-		if (nv_connector->scaling_full) /* non-EDID LVDS/eDP mode */
-			mode = DRM_MODE_SCALE_FULLSCREEN;
+	if (!(curs = *pcurs = kzalloc(sizeof(*curs), GFP_KERNEL)))
+		return -ENOMEM;
+
+	ret = nv50_wndw_ctor(&nv50_curs, drm->dev, DRM_PLANE_TYPE_CURSOR,
+			     "curs", head->base.index, &disp->mast.base,
+			     nv50_curs_format, ARRAY_SIZE(nv50_curs_format),
+			     &curs->wndw);
+	if (ret) {
+		kfree(curs);
+		return ret;
 	}
 
-	if (mode != DRM_MODE_SCALE_NONE)
-		omode = nv_connector->native_mode;
-	else
-		omode = umode;
+	ret = nvif_object_init(disp->disp, 0, curses[cid].oclass, &args,
+			       sizeof(args), &curs->chan);
+	if (ret) {
+		NV_ERROR(drm, "curs%04x allocation failed: %d\n",
+			 curses[cid].oclass, ret);
+		return ret;
+	}
 
-	oX = omode->hdisplay;
-	oY = omode->vdisplay;
-	if (omode->flags & DRM_MODE_FLAG_DBLSCAN)
-		oY *= 2;
+	return 0;
+}
 
-	/* add overscan compensation if necessary, will keep the aspect
-	 * ratio the same as the backend mode unless overridden by the
-	 * user setting both hborder and vborder properties.
-	 */
-	if (nv_connector && ( nv_connector->underscan == UNDERSCAN_ON ||
-			     (nv_connector->underscan == UNDERSCAN_AUTO &&
-			      drm_detect_hdmi_monitor(nv_connector->edid)))) {
-		u32 bX = nv_connector->underscan_hborder;
-		u32 bY = nv_connector->underscan_vborder;
-		u32 aspect = (oY << 19) / oX;
+/******************************************************************************
+ * Primary plane
+ *****************************************************************************/
+#define nv50_base(p) container_of((p), struct nv50_base, wndw)
 
-		if (bX) {
-			oX -= (bX * 2);
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		} else {
-			oX -= (oX >> 4) + 32;
-			if (bY) oY -= (bY * 2);
-			else    oY  = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
+struct nv50_base {
+	struct nv50_wndw wndw;
+	struct nv50_sync chan;
+	int id;
+};
+
+static int
+nv50_base_notify(struct nvif_notify *notify)
+{
+	return NVIF_NOTIFY_KEEP;
+}
+
+static void
+nv50_base_lut(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 2))) {
+		evo_mthd(push, 0x00e0, 1);
+		evo_data(push, asyw->lut.enable << 30);
+		evo_kick(push, &base->chan);
 	}
+}
 
-	/* handle CENTER/ASPECT scaling, taking into account the areas
-	 * removed already for overscan compensation
-	 */
-	switch (mode) {
-	case DRM_MODE_SCALE_CENTER:
-		oX = min((u32)umode->hdisplay, oX);
-		oY = min((u32)umode->vdisplay, oY);
-		/* fall-through */
-	case DRM_MODE_SCALE_ASPECT:
-		if (oY < oX) {
-			u32 aspect = (umode->hdisplay << 19) / umode->vdisplay;
-			oX = ((oY * aspect) + (aspect / 2)) >> 19;
-		} else {
-			u32 aspect = (umode->vdisplay << 19) / umode->hdisplay;
-			oY = ((oX * aspect) + (aspect / 2)) >> 19;
-		}
-		break;
-	default:
-		break;
+static void
+nv50_base_image_clr(struct nv50_wndw *wndw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 4))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x00000000);
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &base->chan);
 	}
+}
 
-	push = evo_wait(mast, 8);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			/*XXX: SCALE_CTRL_ACTIVE??? */
-			evo_mthd(push, 0x08d8 + (nv_crtc->index * 0x400), 2);
-			evo_data(push, (oY << 16) | oX);
-			evo_data(push, (oY << 16) | oX);
-			evo_mthd(push, 0x08a4 + (nv_crtc->index * 0x400), 1);
+static void
+nv50_base_image_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	const s32 oclass = base->chan.base.base.user.oclass;
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 10))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, (asyw->image.mode << 8) |
+			       (asyw->image.interval << 4));
+		evo_mthd(push, 0x00c0, 1);
+		evo_data(push, asyw->image.handle);
+		if (oclass < G82_DISP_BASE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0800, 5);
+			evo_data(push, asyw->image.offset >> 8);
 			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x08c8 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, umode->vdisplay << 16 | umode->hdisplay);
+			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
+			evo_data(push, (asyw->image.layout << 20) |
+					asyw->image.pitch |
+					asyw->image.block);
+			evo_data(push, (asyw->image.kind << 16) |
+				       (asyw->image.format << 8));
+		} else
+		if (oclass < GF110_DISP_BASE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0800, 5);
+			evo_data(push, asyw->image.offset >> 8);
+			evo_data(push, 0x00000000);
+			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
+			evo_data(push, (asyw->image.layout << 20) |
+					asyw->image.pitch |
+					asyw->image.block);
+			evo_data(push, asyw->image.format << 8);
 		} else {
-			evo_mthd(push, 0x04c0 + (nv_crtc->index * 0x300), 3);
-			evo_data(push, (oY << 16) | oX);
-			evo_data(push, (oY << 16) | oX);
-			evo_data(push, (oY << 16) | oX);
-			evo_mthd(push, 0x0494 + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x0400, 5);
+			evo_data(push, asyw->image.offset >> 8);
 			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x04b8 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, umode->vdisplay << 16 | umode->hdisplay);
+			evo_data(push, (asyw->image.h << 16) | asyw->image.w);
+			evo_data(push, (asyw->image.layout << 24) |
+					asyw->image.pitch |
+					asyw->image.block);
+			evo_data(push, asyw->image.format << 8);
 		}
+		evo_kick(push, &base->chan);
+	}
+}
 
-		evo_kick(push, mast);
+static void
+nv50_base_ntfy_clr(struct nv50_wndw *wndw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 2))) {
+		evo_mthd(push, 0x00a4, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &base->chan);
+	}
+}
 
-		if (update) {
-			nv50_display_flip_stop(crtc);
-			nv50_display_flip_next(crtc, crtc->primary->fb,
-					       NULL, 1);
-		}
+static void
+nv50_base_ntfy_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 3))) {
+		evo_mthd(push, 0x00a0, 2);
+		evo_data(push, (asyw->ntfy.awaken << 30) | asyw->ntfy.offset);
+		evo_data(push, asyw->ntfy.handle);
+		evo_kick(push, &base->chan);
 	}
+}
 
-	return 0;
+static void
+nv50_base_sema_clr(struct nv50_wndw *wndw)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+	if ((push = evo_wait(&base->chan, 2))) {
+		evo_mthd(push, 0x0094, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, &base->chan);
+	}
 }
 
-static int
-nv50_crtc_set_raster_vblank_dmi(struct nouveau_crtc *nv_crtc, u32 usec)
+static void
+nv50_base_sema_set(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	struct nv50_base *base = nv50_base(wndw);
 	u32 *push;
+	if ((push = evo_wait(&base->chan, 5))) {
+		evo_mthd(push, 0x0088, 4);
+		evo_data(push, asyw->sema.offset);
+		evo_data(push, asyw->sema.acquire);
+		evo_data(push, asyw->sema.release);
+		evo_data(push, asyw->sema.handle);
+		evo_kick(push, &base->chan);
+	}
+}
 
-	push = evo_wait(mast, 8);
-	if (!push)
-		return -ENOMEM;
+static u32
+nv50_base_update(struct nv50_wndw *wndw, u32 interlock)
+{
+	struct nv50_base *base = nv50_base(wndw);
+	u32 *push;
+
+	if (!(push = evo_wait(&base->chan, 2)))
+		return 0;
+	evo_mthd(push, 0x0080, 1);
+	evo_data(push, interlock);
+	evo_kick(push, &base->chan);
 
-	evo_mthd(push, 0x0828 + (nv_crtc->index * 0x400), 1);
-	evo_data(push, usec);
-	evo_kick(push, mast);
+	if (base->chan.base.base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA)
+		return interlock ? 2 << (base->id * 8) : 0;
+	return interlock ? 2 << (base->id * 4) : 0;
+}
+
+static int
+nv50_base_ntfy_wait_begun(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw)
+{
+	struct nouveau_drm *drm = nouveau_drm(wndw->plane.dev);
+	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
+	if (nvif_msec(&drm->client.device, 2000ULL,
+		u32 data = nouveau_bo_rd32(disp->sync, asyw->ntfy.offset / 4);
+		if ((data & 0xc0000000) == 0x40000000)
+			break;
+		usleep_range(1, 2);
+	) < 0)
+		return -ETIMEDOUT;
 	return 0;
 }
 
+static void
+nv50_base_release(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		  struct nv50_head_atom *asyh)
+{
+	asyh->base.cpp = 0;
+}
+
 static int
-nv50_crtc_set_color_vibrance(struct nouveau_crtc *nv_crtc, bool update)
+nv50_base_acquire(struct nv50_wndw *wndw, struct nv50_wndw_atom *asyw,
+		  struct nv50_head_atom *asyh)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-	u32 *push, hue, vib;
-	int adj;
+	const struct drm_framebuffer *fb = asyw->state.fb;
+	int ret;
 
-	adj = (nv_crtc->color_vibrance > 0) ? 50 : 0;
-	vib = ((nv_crtc->color_vibrance * 2047 + adj) / 100) & 0xfff;
-	hue = ((nv_crtc->vibrant_hue * 2047) / 100) & 0xfff;
+	if (!fb->format->depth)
+		return -EINVAL;
 
-	push = evo_wait(mast, 16);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x08a8 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, (hue << 20) | (vib << 8));
-		} else {
-			evo_mthd(push, 0x0498 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, (hue << 20) | (vib << 8));
-		}
+	ret = drm_plane_helper_check_state(&asyw->state, &asyw->clip,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   DRM_PLANE_HELPER_NO_SCALING,
+					   false, true);
+	if (ret)
+		return ret;
 
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-		}
-		evo_kick(push, mast);
+	asyh->base.depth = fb->format->depth;
+	asyh->base.cpp = fb->format->cpp[0];
+	asyh->base.x = asyw->state.src.x1 >> 16;
+	asyh->base.y = asyw->state.src.y1 >> 16;
+	asyh->base.w = asyw->state.fb->width;
+	asyh->base.h = asyw->state.fb->height;
+
+	switch (fb->format->format) {
+	case DRM_FORMAT_C8         : asyw->image.format = 0x1e; break;
+	case DRM_FORMAT_RGB565     : asyw->image.format = 0xe8; break;
+	case DRM_FORMAT_XRGB1555   :
+	case DRM_FORMAT_ARGB1555   : asyw->image.format = 0xe9; break;
+	case DRM_FORMAT_XRGB8888   :
+	case DRM_FORMAT_ARGB8888   : asyw->image.format = 0xcf; break;
+	case DRM_FORMAT_XBGR2101010:
+	case DRM_FORMAT_ABGR2101010: asyw->image.format = 0xd1; break;
+	case DRM_FORMAT_XBGR8888   :
+	case DRM_FORMAT_ABGR8888   : asyw->image.format = 0xd5; break;
+	default:
+		WARN_ON(1);
+		return -EINVAL;
 	}
 
+	asyw->lut.enable = 1;
+	asyw->set.image = true;
 	return 0;
 }
 
+static void *
+nv50_base_dtor(struct nv50_wndw *wndw)
+{
+	struct nv50_disp *disp = nv50_disp(wndw->plane.dev);
+	struct nv50_base *base = nv50_base(wndw);
+	nv50_dmac_destroy(&base->chan.base, disp->disp);
+	return base;
+}
+
+static const u32
+nv50_base_format[] = {
+	DRM_FORMAT_C8,
+	DRM_FORMAT_RGB565,
+	DRM_FORMAT_XRGB1555,
+	DRM_FORMAT_ARGB1555,
+	DRM_FORMAT_XRGB8888,
+	DRM_FORMAT_ARGB8888,
+	DRM_FORMAT_XBGR2101010,
+	DRM_FORMAT_ABGR2101010,
+	DRM_FORMAT_XBGR8888,
+	DRM_FORMAT_ABGR8888,
+};
+
+static const struct nv50_wndw_func
+nv50_base = {
+	.dtor = nv50_base_dtor,
+	.acquire = nv50_base_acquire,
+	.release = nv50_base_release,
+	.sema_set = nv50_base_sema_set,
+	.sema_clr = nv50_base_sema_clr,
+	.ntfy_set = nv50_base_ntfy_set,
+	.ntfy_clr = nv50_base_ntfy_clr,
+	.ntfy_wait_begun = nv50_base_ntfy_wait_begun,
+	.image_set = nv50_base_image_set,
+	.image_clr = nv50_base_image_clr,
+	.lut = nv50_base_lut,
+	.update = nv50_base_update,
+};
+
 static int
-nv50_crtc_set_image(struct nouveau_crtc *nv_crtc, struct drm_framebuffer *fb,
-		    int x, int y, bool update)
+nv50_base_new(struct nouveau_drm *drm, struct nv50_head *head,
+	      struct nv50_base **pbase)
+{
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_base *base;
+	int ret;
+
+	if (!(base = *pbase = kzalloc(sizeof(*base), GFP_KERNEL)))
+		return -ENOMEM;
+	base->id = head->base.index;
+	base->wndw.ntfy = EVO_FLIP_NTFY0(base->id);
+	base->wndw.sema = EVO_FLIP_SEM0(base->id);
+	base->wndw.data = 0x00000000;
+
+	ret = nv50_wndw_ctor(&nv50_base, drm->dev, DRM_PLANE_TYPE_PRIMARY,
+			     "base", base->id, &base->chan.base,
+			     nv50_base_format, ARRAY_SIZE(nv50_base_format),
+			     &base->wndw);
+	if (ret) {
+		kfree(base);
+		return ret;
+	}
+
+	ret = nv50_base_create(&drm->client.device, disp->disp, base->id,
+			       disp->sync->bo.offset, &base->chan);
+	if (ret)
+		return ret;
+
+	return nvif_notify_init(&base->chan.base.base.user, nv50_base_notify,
+				false,
+				NV50_DISP_BASE_CHANNEL_DMA_V0_NTFY_UEVENT,
+				&(struct nvif_notify_uevent_req) {},
+				sizeof(struct nvif_notify_uevent_req),
+				sizeof(struct nvif_notify_uevent_rep),
+				&base->wndw.notify);
+}
+
+/******************************************************************************
+ * Head
+ *****************************************************************************/
+static void
+nv50_head_procamp(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(fb);
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
 	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x08a8 + (head->base.index * 0x400), 1);
+		else
+			evo_mthd(push, 0x0498 + (head->base.index * 0x300), 1);
+		evo_data(push, (asyh->procamp.sat.sin << 20) |
+			       (asyh->procamp.sat.cos << 8));
+		evo_kick(push, core);
+	}
+}
 
-	push = evo_wait(mast, 16);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0860 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, nvfb->nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x0868 + (nv_crtc->index * 0x400), 3);
-			evo_data(push, (fb->height << 16) | fb->width);
-			evo_data(push, nvfb->r_pitch);
-			evo_data(push, nvfb->r_format);
-			evo_mthd(push, 0x08c0 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, (y << 16) | x);
-			if (nv50_vers(mast) > NV50_DISP_CORE_CHANNEL_DMA) {
-				evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
-				evo_data(push, nvfb->r_handle);
-			}
-		} else {
-			evo_mthd(push, 0x0460 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, nvfb->nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x0468 + (nv_crtc->index * 0x300), 4);
-			evo_data(push, (fb->height << 16) | fb->width);
-			evo_data(push, nvfb->r_pitch);
-			evo_data(push, nvfb->r_format);
-			evo_data(push, nvfb->r_handle);
-			evo_mthd(push, 0x04b0 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, (y << 16) | x);
-		}
+static void
+nv50_head_dither(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x08a0 + (head->base.index * 0x0400), 1);
+		else
+		if (core->base.user.oclass < GK104_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x0490 + (head->base.index * 0x0300), 1);
+		else
+			evo_mthd(push, 0x04a0 + (head->base.index * 0x0300), 1);
+		evo_data(push, (asyh->dither.mode << 3) |
+			       (asyh->dither.bits << 1) |
+			        asyh->dither.enable);
+		evo_kick(push, core);
+	}
+}
 
-		if (update) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
+static void
+nv50_head_ovly(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->base.cpp) {
+		switch (asyh->base.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		default:
+			WARN_ON(1);
+			break;
 		}
-		evo_kick(push, mast);
+		bounds |= 0x00000001;
 	}
 
-	nv_crtc->fb.handle = nvfb->r_handle;
-	return 0;
+	if ((push = evo_wait(core, 2))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x0904 + head->base.index * 0x400, 1);
+		else
+			evo_mthd(push, 0x04d4 + head->base.index * 0x300, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
+	}
 }
 
 static void
-nv50_crtc_cursor_show(struct nouveau_crtc *nv_crtc)
+nv50_head_base(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-	u32 *push = evo_wait(mast, 16);
-	if (push) {
-		if (nv50_vers(mast) < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 2);
-			evo_data(push, 0x85000000);
-			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
-		} else
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 2);
-			evo_data(push, 0x85000000);
-			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x089c + (nv_crtc->index * 0x400), 1);
-			evo_data(push, mast->base.vram.handle);
-		} else {
-			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 2);
-			evo_data(push, 0x85000000);
-			evo_data(push, nv_crtc->cursor.nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
-			evo_data(push, mast->base.vram.handle);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 bounds = 0;
+	u32 *push;
+
+	if (asyh->base.cpp) {
+		switch (asyh->base.cpp) {
+		case 8: bounds |= 0x00000500; break;
+		case 4: bounds |= 0x00000300; break;
+		case 2: bounds |= 0x00000100; break;
+		case 1: bounds |= 0x00000000; break;
+		default:
+			WARN_ON(1);
+			break;
 		}
-		evo_kick(push, mast);
+		bounds |= 0x00000001;
+	}
+
+	if ((push = evo_wait(core, 2))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x0900 + head->base.index * 0x400, 1);
+		else
+			evo_mthd(push, 0x04d0 + head->base.index * 0x300, 1);
+		evo_data(push, bounds);
+		evo_kick(push, core);
 	}
-	nv_crtc->cursor.visible = true;
 }
 
 static void
-nv50_crtc_cursor_hide(struct nouveau_crtc *nv_crtc)
+nv50_head_curs_clr(struct nv50_head *head)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-	u32 *push = evo_wait(mast, 16);
-	if (push) {
-		if (nv50_vers(mast) < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 1);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
 			evo_data(push, 0x05000000);
 		} else
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0880 + (nv_crtc->index * 0x400), 1);
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0880 + head->base.index * 0x400, 1);
 			evo_data(push, 0x05000000);
-			evo_mthd(push, 0x089c + (nv_crtc->index * 0x400), 1);
+			evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
 			evo_data(push, 0x00000000);
 		} else {
-			evo_mthd(push, 0x0480 + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x0480 + head->base.index * 0x300, 1);
 			evo_data(push, 0x05000000);
-			evo_mthd(push, 0x048c + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
 			evo_data(push, 0x00000000);
 		}
-		evo_kick(push, mast);
+		evo_kick(push, core);
 	}
-	nv_crtc->cursor.visible = false;
 }
 
 static void
-nv50_crtc_cursor_show_hide(struct nouveau_crtc *nv_crtc, bool show, bool update)
+nv50_head_curs_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	struct nv50_mast *mast = nv50_mast(nv_crtc->base.dev);
-
-	if (show && nv_crtc->cursor.nvbo && nv_crtc->base.enabled)
-		nv50_crtc_cursor_show(nv_crtc);
-	else
-		nv50_crtc_cursor_hide(nv_crtc);
-
-	if (update) {
-		u32 *push = evo_wait(mast, 2);
-		if (push) {
-			evo_mthd(push, 0x0080, 1);
-			evo_data(push, 0x00000000);
-			evo_kick(push, mast);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 5))) {
+		if (core->base.user.oclass < G82_DISP_BASE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
+			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
+						    (asyh->curs.format << 24));
+			evo_data(push, asyh->curs.offset >> 8);
+		} else
+		if (core->base.user.oclass < GF110_DISP_BASE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0880 + head->base.index * 0x400, 2);
+			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
+						    (asyh->curs.format << 24));
+			evo_data(push, asyh->curs.offset >> 8);
+			evo_mthd(push, 0x089c + head->base.index * 0x400, 1);
+			evo_data(push, asyh->curs.handle);
+		} else {
+			evo_mthd(push, 0x0480 + head->base.index * 0x300, 2);
+			evo_data(push, 0x80000000 | (asyh->curs.layout << 26) |
+						    (asyh->curs.format << 24));
+			evo_data(push, asyh->curs.offset >> 8);
+			evo_mthd(push, 0x048c + head->base.index * 0x300, 1);
+			evo_data(push, asyh->curs.handle);
 		}
+		evo_kick(push, core);
 	}
 }
 
 static void
-nv50_crtc_dpms(struct drm_crtc *crtc, int mode)
+nv50_head_core_clr(struct nv50_head *head)
 {
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 2))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA)
+			evo_mthd(push, 0x0874 + head->base.index * 0x400, 1);
+		else
+			evo_mthd(push, 0x0474 + head->base.index * 0x300, 1);
+		evo_data(push, 0x00000000);
+		evo_kick(push, core);
+	}
 }
 
 static void
-nv50_crtc_prepare(struct drm_crtc *crtc)
+nv50_head_core_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_mast *mast = nv50_mast(crtc->dev);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
 	u32 *push;
+	if ((push = evo_wait(core, 9))) {
+		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
+			evo_data(push, asyh->core.offset >> 8);
+			evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
+			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
+			evo_data(push, asyh->core.layout << 20 |
+				       (asyh->core.pitch >> 8) << 8 |
+				       asyh->core.block);
+			evo_data(push, asyh->core.kind << 16 |
+				       asyh->core.format << 8);
+			evo_data(push, asyh->core.handle);
+			evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
+			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
+			/* EVO will complain with INVALID_STATE if we have an
+			 * active cursor and (re)specify HeadSetContextDmaIso
+			 * without also updating HeadSetOffsetCursor.
+			 */
+			asyh->set.curs = asyh->curs.visible;
+		} else
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0860 + head->base.index * 0x400, 1);
+			evo_data(push, asyh->core.offset >> 8);
+			evo_mthd(push, 0x0868 + head->base.index * 0x400, 4);
+			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
+			evo_data(push, asyh->core.layout << 20 |
+				       (asyh->core.pitch >> 8) << 8 |
+				       asyh->core.block);
+			evo_data(push, asyh->core.format << 8);
+			evo_data(push, asyh->core.handle);
+			evo_mthd(push, 0x08c0 + head->base.index * 0x400, 1);
+			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
+		} else {
+			evo_mthd(push, 0x0460 + head->base.index * 0x300, 1);
+			evo_data(push, asyh->core.offset >> 8);
+			evo_mthd(push, 0x0468 + head->base.index * 0x300, 4);
+			evo_data(push, (asyh->core.h << 16) | asyh->core.w);
+			evo_data(push, asyh->core.layout << 24 |
+				       (asyh->core.pitch >> 8) << 8 |
+				       asyh->core.block);
+			evo_data(push, asyh->core.format << 8);
+			evo_data(push, asyh->core.handle);
+			evo_mthd(push, 0x04b0 + head->base.index * 0x300, 1);
+			evo_data(push, (asyh->core.y << 16) | asyh->core.x);
+		}
+		evo_kick(push, core);
+	}
+}
 
-	nv50_display_flip_stop(crtc);
-
-	push = evo_wait(mast, 6);
-	if (push) {
-		if (nv50_vers(mast) < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 1);
+static void
+nv50_head_lut_clr(struct nv50_head *head)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 4))) {
+		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
 			evo_data(push, 0x40000000);
 		} else
-		if (nv50_vers(mast) <  GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 1);
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 1);
 			evo_data(push, 0x40000000);
-			evo_mthd(push, 0x085c + (nv_crtc->index * 0x400), 1);
+			evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
 			evo_data(push, 0x00000000);
 		} else {
-			evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x0440 + (head->base.index * 0x300), 1);
 			evo_data(push, 0x03000000);
-			evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
 			evo_data(push, 0x00000000);
 		}
-
-		evo_kick(push, mast);
+		evo_kick(push, core);
 	}
-
-	nv50_crtc_cursor_show_hide(nv_crtc, false, false);
 }
 
 static void
-nv50_crtc_commit(struct drm_crtc *crtc)
+nv50_head_lut_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_mast *mast = nv50_mast(crtc->dev);
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
 	u32 *push;
-
-	push = evo_wait(mast, 32);
-	if (push) {
-		if (nv50_vers(mast) < G82_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, nv_crtc->fb.handle);
-			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 2);
+	if ((push = evo_wait(core, 7))) {
+		if (core->base.user.oclass < G82_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
 			evo_data(push, 0xc0000000);
-			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+			evo_data(push, asyh->lut.offset >> 8);
 		} else
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0874 + (nv_crtc->index * 0x400), 1);
-			evo_data(push, nv_crtc->fb.handle);
-			evo_mthd(push, 0x0840 + (nv_crtc->index * 0x400), 2);
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0840 + (head->base.index * 0x400), 2);
 			evo_data(push, 0xc0000000);
-			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
-			evo_mthd(push, 0x085c + (nv_crtc->index * 0x400), 1);
-			evo_data(push, mast->base.vram.handle);
+			evo_data(push, asyh->lut.offset >> 8);
+			evo_mthd(push, 0x085c + (head->base.index * 0x400), 1);
+			evo_data(push, asyh->lut.handle);
 		} else {
-			evo_mthd(push, 0x0474 + (nv_crtc->index * 0x300), 1);
-			evo_data(push, nv_crtc->fb.handle);
-			evo_mthd(push, 0x0440 + (nv_crtc->index * 0x300), 4);
+			evo_mthd(push, 0x0440 + (head->base.index * 0x300), 4);
 			evo_data(push, 0x83000000);
-			evo_data(push, nv_crtc->lut.nvbo->bo.offset >> 8);
+			evo_data(push, asyh->lut.offset >> 8);
 			evo_data(push, 0x00000000);
 			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x045c + (nv_crtc->index * 0x300), 1);
-			evo_data(push, mast->base.vram.handle);
-			evo_mthd(push, 0x0430 + (nv_crtc->index * 0x300), 1);
+			evo_mthd(push, 0x045c + (head->base.index * 0x300), 1);
+			evo_data(push, asyh->lut.handle);
+		}
+		evo_kick(push, core);
+	}
+}
+
+static void
+nv50_head_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	struct nv50_head_mode *m = &asyh->mode;
+	u32 *push;
+	if ((push = evo_wait(core, 14))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x0804 + (head->base.index * 0x400), 2);
+			evo_data(push, 0x00800000 | m->clock);
+			evo_data(push, m->interlace ? 0x00000002 : 0x00000000);
+			evo_mthd(push, 0x0810 + (head->base.index * 0x400), 7);
+			evo_data(push, 0x00000000);
+			evo_data(push, (m->v.active  << 16) | m->h.active );
+			evo_data(push, (m->v.synce   << 16) | m->h.synce  );
+			evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
+			evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
+			evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
+			evo_data(push, asyh->mode.v.blankus);
+			evo_mthd(push, 0x082c + (head->base.index * 0x400), 1);
+			evo_data(push, 0x00000000);
+		} else {
+			evo_mthd(push, 0x0410 + (head->base.index * 0x300), 6);
+			evo_data(push, 0x00000000);
+			evo_data(push, (m->v.active  << 16) | m->h.active );
+			evo_data(push, (m->v.synce   << 16) | m->h.synce  );
+			evo_data(push, (m->v.blanke  << 16) | m->h.blanke );
+			evo_data(push, (m->v.blanks  << 16) | m->h.blanks );
+			evo_data(push, (m->v.blank2e << 16) | m->v.blank2s);
+			evo_mthd(push, 0x042c + (head->base.index * 0x300), 2);
+			evo_data(push, 0x00000000); /* ??? */
 			evo_data(push, 0xffffff00);
+			evo_mthd(push, 0x0450 + (head->base.index * 0x300), 3);
+			evo_data(push, m->clock * 1000);
+			evo_data(push, 0x00200000); /* ??? */
+			evo_data(push, m->clock * 1000);
 		}
+		evo_kick(push, core);
+	}
+}
 
-		evo_kick(push, mast);
+static void
+nv50_head_view(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct nv50_dmac *core = &nv50_disp(head->base.base.dev)->mast.base;
+	u32 *push;
+	if ((push = evo_wait(core, 10))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			evo_mthd(push, 0x08a4 + (head->base.index * 0x400), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x08c8 + (head->base.index * 0x400), 1);
+			evo_data(push, (asyh->view.iH << 16) | asyh->view.iW);
+			evo_mthd(push, 0x08d8 + (head->base.index * 0x400), 2);
+			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+		} else {
+			evo_mthd(push, 0x0494 + (head->base.index * 0x300), 1);
+			evo_data(push, 0x00000000);
+			evo_mthd(push, 0x04b8 + (head->base.index * 0x300), 1);
+			evo_data(push, (asyh->view.iH << 16) | asyh->view.iW);
+			evo_mthd(push, 0x04c0 + (head->base.index * 0x300), 3);
+			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+			evo_data(push, (asyh->view.oH << 16) | asyh->view.oW);
+		}
+		evo_kick(push, core);
 	}
+}
 
-	nv50_crtc_cursor_show_hide(nv_crtc, true, true);
-	nv50_display_flip_next(crtc, crtc->primary->fb, NULL, 1);
+static void
+nv50_head_flush_clr(struct nv50_head *head, struct nv50_head_atom *asyh, bool y)
+{
+	if (asyh->clr.core && (!asyh->set.core || y))
+		nv50_head_lut_clr(head);
+	if (asyh->clr.core && (!asyh->set.core || y))
+		nv50_head_core_clr(head);
+	if (asyh->clr.curs && (!asyh->set.curs || y))
+		nv50_head_curs_clr(head);
 }
 
-static bool
-nv50_crtc_mode_fixup(struct drm_crtc *crtc, const struct drm_display_mode *mode,
-		     struct drm_display_mode *adjusted_mode)
+static void
+nv50_head_flush_set(struct nv50_head *head, struct nv50_head_atom *asyh)
 {
-	drm_mode_set_crtcinfo(adjusted_mode, CRTC_INTERLACE_HALVE_V);
-	return true;
+	if (asyh->set.view   ) nv50_head_view    (head, asyh);
+	if (asyh->set.mode   ) nv50_head_mode    (head, asyh);
+	if (asyh->set.core   ) nv50_head_lut_set (head, asyh);
+	if (asyh->set.core   ) nv50_head_core_set(head, asyh);
+	if (asyh->set.curs   ) nv50_head_curs_set(head, asyh);
+	if (asyh->set.base   ) nv50_head_base    (head, asyh);
+	if (asyh->set.ovly   ) nv50_head_ovly    (head, asyh);
+	if (asyh->set.dither ) nv50_head_dither  (head, asyh);
+	if (asyh->set.procamp) nv50_head_procamp (head, asyh);
 }
 
-static int
-nv50_crtc_swap_fbs(struct drm_crtc *crtc, struct drm_framebuffer *old_fb)
+static void
+nv50_head_atomic_check_procamp(struct nv50_head_atom *armh,
+			       struct nv50_head_atom *asyh,
+			       struct nouveau_conn_atom *asyc)
 {
-	struct nouveau_framebuffer *nvfb = nouveau_framebuffer(crtc->primary->fb);
-	struct nv50_head *head = nv50_head(crtc);
-	int ret;
+	const int vib = asyc->procamp.color_vibrance - 100;
+	const int hue = asyc->procamp.vibrant_hue - 90;
+	const int adj = (vib > 0) ? 50 : 0;
+	asyh->procamp.sat.cos = ((vib * 2047 + adj) / 100) & 0xfff;
+	asyh->procamp.sat.sin = ((hue * 2047) / 100) & 0xfff;
+	asyh->set.procamp = true;
+}
 
-	ret = nouveau_bo_pin(nvfb->nvbo, TTM_PL_FLAG_VRAM, true);
-	if (ret == 0) {
-		if (head->image)
-			nouveau_bo_unpin(head->image);
-		nouveau_bo_ref(nvfb->nvbo, &head->image);
+static void
+nv50_head_atomic_check_dither(struct nv50_head_atom *armh,
+			      struct nv50_head_atom *asyh,
+			      struct nouveau_conn_atom *asyc)
+{
+	struct drm_connector *connector = asyc->state.connector;
+	u32 mode = 0x00;
+
+	if (asyc->dither.mode == DITHERING_MODE_AUTO) {
+		if (asyh->base.depth > connector->display_info.bpc * 3)
+			mode = DITHERING_MODE_DYNAMIC2X2;
+	} else {
+		mode = asyc->dither.mode;
 	}
 
-	return ret;
+	if (asyc->dither.depth == DITHERING_DEPTH_AUTO) {
+		if (connector->display_info.bpc >= 8)
+			mode |= DITHERING_DEPTH_8BPC;
+	} else {
+		mode |= asyc->dither.depth;
+	}
+
+	asyh->dither.enable = mode;
+	asyh->dither.bits = mode >> 1;
+	asyh->dither.mode = mode >> 3;
+	asyh->set.dither = true;
 }
 
-static int
-nv50_crtc_mode_set(struct drm_crtc *crtc, struct drm_display_mode *umode,
-		   struct drm_display_mode *mode, int x, int y,
-		   struct drm_framebuffer *old_fb)
+static void
+nv50_head_atomic_check_view(struct nv50_head_atom *armh,
+			    struct nv50_head_atom *asyh,
+			    struct nouveau_conn_atom *asyc)
 {
-	struct nv50_mast *mast = nv50_mast(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nouveau_connector *nv_connector;
-	u32 ilace = (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 2 : 1;
-	u32 vscan = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
-	u32 hactive, hsynce, hbackp, hfrontp, hblanke, hblanks;
-	u32 vactive, vsynce, vbackp, vfrontp, vblanke, vblanks;
-	u32 vblan2e = 0, vblan2s = 1, vblankus = 0;
-	u32 *push;
-	int ret;
-
-	hactive = mode->htotal;
-	hsynce  = mode->hsync_end - mode->hsync_start - 1;
-	hbackp  = mode->htotal - mode->hsync_end;
-	hblanke = hsynce + hbackp;
-	hfrontp = mode->hsync_start - mode->hdisplay;
-	hblanks = mode->htotal - hfrontp - 1;
-
-	vactive = mode->vtotal * vscan / ilace;
-	vsynce  = ((mode->vsync_end - mode->vsync_start) * vscan / ilace) - 1;
-	vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
-	vblanke = vsynce + vbackp;
-	vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
-	vblanks = vactive - vfrontp - 1;
-	/* XXX: Safe underestimate, even "0" works */
-	vblankus = (vactive - mode->vdisplay - 2) * hactive;
-	vblankus *= 1000;
-	vblankus /= mode->clock;
+	struct drm_connector *connector = asyc->state.connector;
+	struct drm_display_mode *omode = &asyh->state.adjusted_mode;
+	struct drm_display_mode *umode = &asyh->state.mode;
+	int mode = asyc->scaler.mode;
+	struct edid *edid;
+
+	if (connector->edid_blob_ptr)
+		edid = (struct edid *)connector->edid_blob_ptr->data;
+	else
+		edid = NULL;
 
-	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
-		vblan2e = vactive + vsynce + vbackp;
-		vblan2s = vblan2e + (mode->vdisplay * vscan / ilace);
-		vactive = (vactive * 2) + 1;
+	if (!asyc->scaler.full) {
+		if (mode == DRM_MODE_SCALE_NONE)
+			omode = umode;
+	} else {
+		/* Non-EDID LVDS/eDP mode. */
+		mode = DRM_MODE_SCALE_FULLSCREEN;
 	}
 
-	ret = nv50_crtc_swap_fbs(crtc, old_fb);
-	if (ret)
-		return ret;
+	asyh->view.iW = umode->hdisplay;
+	asyh->view.iH = umode->vdisplay;
+	asyh->view.oW = omode->hdisplay;
+	asyh->view.oH = omode->vdisplay;
+	if (omode->flags & DRM_MODE_FLAG_DBLSCAN)
+		asyh->view.oH *= 2;
 
-	push = evo_wait(mast, 64);
-	if (push) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-			evo_mthd(push, 0x0804 + (nv_crtc->index * 0x400), 2);
-			evo_data(push, 0x00800000 | mode->clock);
-			evo_data(push, (ilace == 2) ? 2 : 0);
-			evo_mthd(push, 0x0810 + (nv_crtc->index * 0x400), 6);
-			evo_data(push, 0x00000000);
-			evo_data(push, (vactive << 16) | hactive);
-			evo_data(push, ( vsynce << 16) | hsynce);
-			evo_data(push, (vblanke << 16) | hblanke);
-			evo_data(push, (vblanks << 16) | hblanks);
-			evo_data(push, (vblan2e << 16) | vblan2s);
-			evo_mthd(push, 0x082c + (nv_crtc->index * 0x400), 1);
-			evo_data(push, 0x00000000);
-			evo_mthd(push, 0x0900 + (nv_crtc->index * 0x400), 2);
-			evo_data(push, 0x00000311);
-			evo_data(push, 0x00000100);
+	/* Add overscan compensation if necessary, will keep the aspect
+	 * ratio the same as the backend mode unless overridden by the
+	 * user setting both hborder and vborder properties.
+	 */
+	if ((asyc->scaler.underscan.mode == UNDERSCAN_ON ||
+	    (asyc->scaler.underscan.mode == UNDERSCAN_AUTO &&
+	     drm_detect_hdmi_monitor(edid)))) {
+		u32 bX = asyc->scaler.underscan.hborder;
+		u32 bY = asyc->scaler.underscan.vborder;
+		u32 r = (asyh->view.oH << 19) / asyh->view.oW;
+
+		if (bX) {
+			asyh->view.oW -= (bX * 2);
+			if (bY) asyh->view.oH -= (bY * 2);
+			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
 		} else {
-			evo_mthd(push, 0x0410 + (nv_crtc->index * 0x300), 6);
-			evo_data(push, 0x00000000);
-			evo_data(push, (vactive << 16) | hactive);
-			evo_data(push, ( vsynce << 16) | hsynce);
-			evo_data(push, (vblanke << 16) | hblanke);
-			evo_data(push, (vblanks << 16) | hblanks);
-			evo_data(push, (vblan2e << 16) | vblan2s);
-			evo_mthd(push, 0x042c + (nv_crtc->index * 0x300), 1);
-			evo_data(push, 0x00000000); /* ??? */
-			evo_mthd(push, 0x0450 + (nv_crtc->index * 0x300), 3);
-			evo_data(push, mode->clock * 1000);
-			evo_data(push, 0x00200000); /* ??? */
-			evo_data(push, mode->clock * 1000);
-			evo_mthd(push, 0x04d0 + (nv_crtc->index * 0x300), 2);
-			evo_data(push, 0x00000311);
-			evo_data(push, 0x00000100);
+			asyh->view.oW -= (asyh->view.oW >> 4) + 32;
+			if (bY) asyh->view.oH -= (bY * 2);
+			else    asyh->view.oH  = ((asyh->view.oW * r) + (r / 2)) >> 19;
 		}
+	}
 
-		evo_kick(push, mast);
+	/* Handle CENTER/ASPECT scaling, taking into account the areas
+	 * removed already for overscan compensation.
+	 */
+	switch (mode) {
+	case DRM_MODE_SCALE_CENTER:
+		asyh->view.oW = min((u16)umode->hdisplay, asyh->view.oW);
+		asyh->view.oH = min((u16)umode->vdisplay, asyh->view.oH);
+		/* fall-through */
+	case DRM_MODE_SCALE_ASPECT:
+		if (asyh->view.oH < asyh->view.oW) {
+			u32 r = (asyh->view.iW << 19) / asyh->view.iH;
+			asyh->view.oW = ((asyh->view.oH * r) + (r / 2)) >> 19;
+		} else {
+			u32 r = (asyh->view.iH << 19) / asyh->view.iW;
+			asyh->view.oH = ((asyh->view.oW * r) + (r / 2)) >> 19;
+		}
+		break;
+	default:
+		break;
 	}
 
-	nv_connector = nouveau_crtc_connector_get(nv_crtc);
-	nv50_crtc_set_dither(nv_crtc, false);
-	nv50_crtc_set_scale(nv_crtc, false);
+	asyh->set.view = true;
+}
 
-	/* G94 only accepts this after setting scale */
-	if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA)
-		nv50_crtc_set_raster_vblank_dmi(nv_crtc, vblankus);
+static void
+nv50_head_atomic_check_mode(struct nv50_head *head, struct nv50_head_atom *asyh)
+{
+	struct drm_display_mode *mode = &asyh->state.adjusted_mode;
+	u32 ilace   = (mode->flags & DRM_MODE_FLAG_INTERLACE) ? 2 : 1;
+	u32 vscan   = (mode->flags & DRM_MODE_FLAG_DBLSCAN) ? 2 : 1;
+	u32 hbackp  =  mode->htotal - mode->hsync_end;
+	u32 vbackp  = (mode->vtotal - mode->vsync_end) * vscan / ilace;
+	u32 hfrontp =  mode->hsync_start - mode->hdisplay;
+	u32 vfrontp = (mode->vsync_start - mode->vdisplay) * vscan / ilace;
+	u32 blankus;
+	struct nv50_head_mode *m = &asyh->mode;
+
+	m->h.active = mode->htotal;
+	m->h.synce  = mode->hsync_end - mode->hsync_start - 1;
+	m->h.blanke = m->h.synce + hbackp;
+	m->h.blanks = mode->htotal - hfrontp - 1;
+
+	m->v.active = mode->vtotal * vscan / ilace;
+	m->v.synce  = ((mode->vsync_end - mode->vsync_start) * vscan / ilace) - 1;
+	m->v.blanke = m->v.synce + vbackp;
+	m->v.blanks = m->v.active - vfrontp - 1;
+
+	/*XXX: Safe underestimate, even "0" works */
+	blankus = (m->v.active - mode->vdisplay - 2) * m->h.active;
+	blankus *= 1000;
+	blankus /= mode->clock;
+	m->v.blankus = blankus;
 
-	nv50_crtc_set_color_vibrance(nv_crtc, false);
-	nv50_crtc_set_image(nv_crtc, crtc->primary->fb, x, y, false);
-	return 0;
+	if (mode->flags & DRM_MODE_FLAG_INTERLACE) {
+		m->v.blank2e =  m->v.active + m->v.synce + vbackp;
+		m->v.blank2s =  m->v.blank2e + (mode->vdisplay * vscan / ilace);
+		m->v.active  = (m->v.active * 2) + 1;
+		m->interlace = true;
+	} else {
+		m->v.blank2e = 0;
+		m->v.blank2s = 1;
+		m->interlace = false;
+	}
+	m->clock = mode->clock;
+
+	drm_mode_set_crtcinfo(mode, CRTC_INTERLACE_HALVE_V);
+	asyh->set.mode = true;
 }
 
 static int
-nv50_crtc_mode_set_base(struct drm_crtc *crtc, int x, int y,
-			struct drm_framebuffer *old_fb)
+nv50_head_atomic_check(struct drm_crtc *crtc, struct drm_crtc_state *state)
 {
 	struct nouveau_drm *drm = nouveau_drm(crtc->dev);
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	int ret;
+	struct nv50_disp *disp = nv50_disp(crtc->dev);
+	struct nv50_head *head = nv50_head(crtc);
+	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
+	struct nv50_head_atom *asyh = nv50_head_atom(state);
+	struct nouveau_conn_atom *asyc = NULL;
+	struct drm_connector_state *conns;
+	struct drm_connector *conn;
+	int i;
 
-	if (!crtc->primary->fb) {
-		NV_DEBUG(drm, "No FB bound\n");
-		return 0;
+	NV_ATOMIC(drm, "%s atomic_check %d\n", crtc->name, asyh->state.active);
+	if (asyh->state.active) {
+		for_each_connector_in_state(asyh->state.state, conn, conns, i) {
+			if (conns->crtc == crtc) {
+				asyc = nouveau_conn_atom(conns);
+				break;
+			}
+		}
+
+		if (armh->state.active) {
+			if (asyc) {
+				if (asyh->state.mode_changed)
+					asyc->set.scaler = true;
+				if (armh->base.depth != asyh->base.depth)
+					asyc->set.dither = true;
+			}
+		} else {
+			asyc->set.mask = ~0;
+			asyh->set.mask = ~0;
+		}
+
+		if (asyh->state.mode_changed)
+			nv50_head_atomic_check_mode(head, asyh);
+
+		if (asyc) {
+			if (asyc->set.scaler)
+				nv50_head_atomic_check_view(armh, asyh, asyc);
+			if (asyc->set.dither)
+				nv50_head_atomic_check_dither(armh, asyh, asyc);
+			if (asyc->set.procamp)
+				nv50_head_atomic_check_procamp(armh, asyh, asyc);
+		}
+
+		if ((asyh->core.visible = (asyh->base.cpp != 0))) {
+			asyh->core.x = asyh->base.x;
+			asyh->core.y = asyh->base.y;
+			asyh->core.w = asyh->base.w;
+			asyh->core.h = asyh->base.h;
+		} else
+		if ((asyh->core.visible = asyh->curs.visible)) {
+			/*XXX: We need to either find some way of having the
+			 *     primary base layer appear black, while still
+			 *     being able to display the other layers, or we
+			 *     need to allocate a dummy black surface here.
+			 */
+			asyh->core.x = 0;
+			asyh->core.y = 0;
+			asyh->core.w = asyh->state.mode.hdisplay;
+			asyh->core.h = asyh->state.mode.vdisplay;
+		}
+		asyh->core.handle = disp->mast.base.vram.handle;
+		asyh->core.offset = 0;
+		asyh->core.format = 0xcf;
+		asyh->core.kind = 0;
+		asyh->core.layout = 1;
+		asyh->core.block = 0;
+		asyh->core.pitch = ALIGN(asyh->core.w, 64) * 4;
+		asyh->lut.handle = disp->mast.base.vram.handle;
+		asyh->lut.offset = head->base.lut.nvbo->bo.offset;
+		asyh->set.base = armh->base.cpp != asyh->base.cpp;
+		asyh->set.ovly = armh->ovly.cpp != asyh->ovly.cpp;
+	} else {
+		asyh->core.visible = false;
+		asyh->curs.visible = false;
+		asyh->base.cpp = 0;
+		asyh->ovly.cpp = 0;
 	}
 
-	ret = nv50_crtc_swap_fbs(crtc, old_fb);
-	if (ret)
-		return ret;
+	if (!drm_atomic_crtc_needs_modeset(&asyh->state)) {
+		if (asyh->core.visible) {
+			if (memcmp(&armh->core, &asyh->core, sizeof(asyh->core)))
+				asyh->set.core = true;
+		} else
+		if (armh->core.visible) {
+			asyh->clr.core = true;
+		}
 
-	nv50_display_flip_stop(crtc);
-	nv50_crtc_set_image(nv_crtc, crtc->primary->fb, x, y, true);
-	nv50_display_flip_next(crtc, crtc->primary->fb, NULL, 1);
-	return 0;
-}
+		if (asyh->curs.visible) {
+			if (memcmp(&armh->curs, &asyh->curs, sizeof(asyh->curs)))
+				asyh->set.curs = true;
+		} else
+		if (armh->curs.visible) {
+			asyh->clr.curs = true;
+		}
+	} else {
+		asyh->clr.core = armh->core.visible;
+		asyh->clr.curs = armh->curs.visible;
+		asyh->set.core = asyh->core.visible;
+		asyh->set.curs = asyh->curs.visible;
+	}
 
-static int
-nv50_crtc_mode_set_base_atomic(struct drm_crtc *crtc,
-			       struct drm_framebuffer *fb, int x, int y,
-			       enum mode_set_atomic state)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	nv50_display_flip_stop(crtc);
-	nv50_crtc_set_image(nv_crtc, fb, x, y, true);
+	if (asyh->clr.mask || asyh->set.mask)
+		nv50_atom(asyh->state.state)->lock_core = true;
 	return 0;
 }
 
 static void
-nv50_crtc_lut_load(struct drm_crtc *crtc)
+nv50_head_lut_load(struct drm_crtc *crtc)
 {
 	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
@@ -1292,64 +2203,95 @@ nv50_crtc_lut_load(struct drm_crtc *crtc)
 	}
 }
 
-static void
-nv50_crtc_disable(struct drm_crtc *crtc)
+static int
+nv50_head_mode_set_base_atomic(struct drm_crtc *crtc,
+			       struct drm_framebuffer *fb, int x, int y,
+			       enum mode_set_atomic state)
 {
-	struct nv50_head *head = nv50_head(crtc);
-	evo_sync(crtc->dev);
-	if (head->image)
-		nouveau_bo_unpin(head->image);
-	nouveau_bo_ref(NULL, &head->image);
+	WARN_ON(1);
+	return 0;
 }
 
+static const struct drm_crtc_helper_funcs
+nv50_head_help = {
+	.mode_set_base_atomic = nv50_head_mode_set_base_atomic,
+	.load_lut = nv50_head_lut_load,
+	.atomic_check = nv50_head_atomic_check,
+};
+
+/* This is identical to the version in the atomic helpers, except that
+ * it supports non-vblanked ("async") page flips.
+ */
 static int
-nv50_crtc_cursor_set(struct drm_crtc *crtc, struct drm_file *file_priv,
-		     uint32_t handle, uint32_t width, uint32_t height)
+nv50_head_page_flip(struct drm_crtc *crtc, struct drm_framebuffer *fb,
+		    struct drm_pending_vblank_event *event, u32 flags)
 {
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct drm_gem_object *gem = NULL;
-	struct nouveau_bo *nvbo = NULL;
+	struct drm_plane *plane = crtc->primary;
+	struct drm_atomic_state *state;
+	struct drm_plane_state *plane_state;
+	struct drm_crtc_state *crtc_state;
 	int ret = 0;
 
-	if (handle) {
-		if (width != 64 || height != 64)
-			return -EINVAL;
+	state = drm_atomic_state_alloc(plane->dev);
+	if (!state)
+		return -ENOMEM;
 
-		gem = drm_gem_object_lookup(file_priv, handle);
-		if (unlikely(!gem))
-			return -ENOENT;
-		nvbo = nouveau_gem_object(gem);
+	state->acquire_ctx = drm_modeset_legacy_acquire_ctx(crtc);
+retry:
+	crtc_state = drm_atomic_get_crtc_state(state, crtc);
+	if (IS_ERR(crtc_state)) {
+		ret = PTR_ERR(crtc_state);
+		goto fail;
+	}
+	crtc_state->event = event;
 
-		ret = nouveau_bo_pin(nvbo, TTM_PL_FLAG_VRAM, true);
+	plane_state = drm_atomic_get_plane_state(state, plane);
+	if (IS_ERR(plane_state)) {
+		ret = PTR_ERR(plane_state);
+		goto fail;
 	}
 
-	if (ret == 0) {
-		if (nv_crtc->cursor.nvbo)
-			nouveau_bo_unpin(nv_crtc->cursor.nvbo);
-		nouveau_bo_ref(nvbo, &nv_crtc->cursor.nvbo);
+	ret = drm_atomic_set_crtc_for_plane(plane_state, crtc);
+	if (ret != 0)
+		goto fail;
+	drm_atomic_set_fb_for_plane(plane_state, fb);
+
+	/* Make sure we don't accidentally do a full modeset. */
+	state->allow_modeset = false;
+	if (!crtc_state->active) {
+		DRM_DEBUG_ATOMIC("[CRTC:%d] disabled, rejecting legacy flip\n",
+				 crtc->base.id);
+		ret = -EINVAL;
+		goto fail;
 	}
-	drm_gem_object_unreference_unlocked(gem);
 
-	nv50_crtc_cursor_show_hide(nv_crtc, true, true);
+	if (flags & DRM_MODE_PAGE_FLIP_ASYNC)
+		nv50_wndw_atom(plane_state)->interval = 0;
+
+	ret = drm_atomic_nonblocking_commit(state);
+fail:
+	if (ret == -EDEADLK)
+		goto backoff;
+
+	drm_atomic_state_put(state);
 	return ret;
-}
 
-static int
-nv50_crtc_cursor_move(struct drm_crtc *crtc, int x, int y)
-{
-	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
-	struct nv50_curs *curs = nv50_curs(crtc);
-	struct nv50_chan *chan = nv50_chan(curs);
-	nvif_wr32(&chan->user, 0x0084, (y << 16) | (x & 0xffff));
-	nvif_wr32(&chan->user, 0x0080, 0x00000000);
+backoff:
+	drm_atomic_state_clear(state);
+	drm_atomic_legacy_backoff(state);
 
-	nv_crtc->cursor_saved_x = x;
-	nv_crtc->cursor_saved_y = y;
-	return 0;
+	/*
+	 * Someone might have exchanged the framebuffer while we dropped locks
+	 * in the backoff code. We need to fix up the fb refcount tracking the
+	 * core does for us.
+	 */
+	plane->old_fb = plane->fb;
+
+	goto retry;
 }
 
 static int
-nv50_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
+nv50_head_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
 		    uint32_t size)
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
@@ -1361,47 +2303,71 @@ nv50_crtc_gamma_set(struct drm_crtc *crtc, u16 *r, u16 *g, u16 *b,
 		nv_crtc->lut.b[i] = b[i];
 	}
 
-	nv50_crtc_lut_load(crtc);
-
+	nv50_head_lut_load(crtc);
 	return 0;
 }
 
 static void
-nv50_crtc_cursor_restore(struct nouveau_crtc *nv_crtc, int x, int y)
+nv50_head_atomic_destroy_state(struct drm_crtc *crtc,
+			       struct drm_crtc_state *state)
 {
-	nv50_crtc_cursor_move(&nv_crtc->base, x, y);
+	struct nv50_head_atom *asyh = nv50_head_atom(state);
+	__drm_atomic_helper_crtc_destroy_state(&asyh->state);
+	kfree(asyh);
+}
 
-	nv50_crtc_cursor_show_hide(nv_crtc, true, true);
+static struct drm_crtc_state *
+nv50_head_atomic_duplicate_state(struct drm_crtc *crtc)
+{
+	struct nv50_head_atom *armh = nv50_head_atom(crtc->state);
+	struct nv50_head_atom *asyh;
+	if (!(asyh = kmalloc(sizeof(*asyh), GFP_KERNEL)))
+		return NULL;
+	__drm_atomic_helper_crtc_duplicate_state(crtc, &asyh->state);
+	asyh->view = armh->view;
+	asyh->mode = armh->mode;
+	asyh->lut  = armh->lut;
+	asyh->core = armh->core;
+	asyh->curs = armh->curs;
+	asyh->base = armh->base;
+	asyh->ovly = armh->ovly;
+	asyh->dither = armh->dither;
+	asyh->procamp = armh->procamp;
+	asyh->clr.mask = 0;
+	asyh->set.mask = 0;
+	return &asyh->state;
 }
 
 static void
-nv50_crtc_destroy(struct drm_crtc *crtc)
+__drm_atomic_helper_crtc_reset(struct drm_crtc *crtc,
+			       struct drm_crtc_state *state)
+{
+	if (crtc->state)
+		crtc->funcs->atomic_destroy_state(crtc, crtc->state);
+	crtc->state = state;
+	crtc->state->crtc = crtc;
+}
+
+static void
+nv50_head_reset(struct drm_crtc *crtc)
+{
+	struct nv50_head_atom *asyh;
+
+	if (WARN_ON(!(asyh = kzalloc(sizeof(*asyh), GFP_KERNEL))))
+		return;
+
+	__drm_atomic_helper_crtc_reset(crtc, &asyh->state);
+}
+
+static void
+nv50_head_destroy(struct drm_crtc *crtc)
 {
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(crtc);
 	struct nv50_disp *disp = nv50_disp(crtc->dev);
 	struct nv50_head *head = nv50_head(crtc);
-	struct nv50_fbdma *fbdma;
-
-	list_for_each_entry(fbdma, &disp->fbdma, head) {
-		nvif_object_fini(&fbdma->base[nv_crtc->index]);
-	}
 
 	nv50_dmac_destroy(&head->ovly.base, disp->disp);
 	nv50_pioc_destroy(&head->oimm.base);
-	nv50_dmac_destroy(&head->sync.base, disp->disp);
-	nv50_pioc_destroy(&head->curs.base);
-
-	/*XXX: this shouldn't be necessary, but the core doesn't call
-	 *     disconnect() during the cleanup paths
-	 */
-	if (head->image)
-		nouveau_bo_unpin(head->image);
-	nouveau_bo_ref(NULL, &head->image);
-
-	/*XXX: ditto */
-	if (nv_crtc->cursor.nvbo)
-		nouveau_bo_unpin(nv_crtc->cursor.nvbo);
-	nouveau_bo_ref(NULL, &nv_crtc->cursor.nvbo);
 
 	nouveau_bo_unmap(nv_crtc->lut.nvbo);
 	if (nv_crtc->lut.nvbo)
@@ -1412,34 +2378,27 @@ nv50_crtc_destroy(struct drm_crtc *crtc)
 	kfree(crtc);
 }
 
-static const struct drm_crtc_helper_funcs nv50_crtc_hfunc = {
-	.dpms = nv50_crtc_dpms,
-	.prepare = nv50_crtc_prepare,
-	.commit = nv50_crtc_commit,
-	.mode_fixup = nv50_crtc_mode_fixup,
-	.mode_set = nv50_crtc_mode_set,
-	.mode_set_base = nv50_crtc_mode_set_base,
-	.mode_set_base_atomic = nv50_crtc_mode_set_base_atomic,
-	.load_lut = nv50_crtc_lut_load,
-	.disable = nv50_crtc_disable,
-};
-
-static const struct drm_crtc_funcs nv50_crtc_func = {
-	.cursor_set = nv50_crtc_cursor_set,
-	.cursor_move = nv50_crtc_cursor_move,
-	.gamma_set = nv50_crtc_gamma_set,
-	.set_config = nouveau_crtc_set_config,
-	.destroy = nv50_crtc_destroy,
-	.page_flip = nouveau_crtc_page_flip,
+static const struct drm_crtc_funcs
+nv50_head_func = {
+	.reset = nv50_head_reset,
+	.gamma_set = nv50_head_gamma_set,
+	.destroy = nv50_head_destroy,
+	.set_config = drm_atomic_helper_set_config,
+	.page_flip = nv50_head_page_flip,
+	.set_property = drm_atomic_helper_crtc_set_property,
+	.atomic_duplicate_state = nv50_head_atomic_duplicate_state,
+	.atomic_destroy_state = nv50_head_atomic_destroy_state,
 };
 
 static int
-nv50_crtc_create(struct drm_device *dev, int index)
+nv50_head_create(struct drm_device *dev, int index)
 {
 	struct nouveau_drm *drm = nouveau_drm(dev);
-	struct nvif_device *device = &drm->device;
+	struct nvif_device *device = &drm->client.device;
 	struct nv50_disp *disp = nv50_disp(dev);
 	struct nv50_head *head;
+	struct nv50_base *base;
+	struct nv50_curs *curs;
 	struct drm_crtc *crtc;
 	int ret, i;
 
@@ -1448,24 +2407,28 @@ nv50_crtc_create(struct drm_device *dev, int index)
 		return -ENOMEM;
 
 	head->base.index = index;
-	head->base.set_dither = nv50_crtc_set_dither;
-	head->base.set_scale = nv50_crtc_set_scale;
-	head->base.set_color_vibrance = nv50_crtc_set_color_vibrance;
-	head->base.color_vibrance = 50;
-	head->base.vibrant_hue = 0;
-	head->base.cursor.set_pos = nv50_crtc_cursor_restore;
 	for (i = 0; i < 256; i++) {
 		head->base.lut.r[i] = i << 8;
 		head->base.lut.g[i] = i << 8;
 		head->base.lut.b[i] = i << 8;
 	}
 
+	ret = nv50_base_new(drm, head, &base);
+	if (ret == 0)
+		ret = nv50_curs_new(drm, head, &curs);
+	if (ret) {
+		kfree(head);
+		return ret;
+	}
+
 	crtc = &head->base.base;
-	drm_crtc_init(dev, crtc, &nv50_crtc_func);
-	drm_crtc_helper_add(crtc, &nv50_crtc_hfunc);
+	drm_crtc_init_with_planes(dev, crtc, &base->wndw.plane,
+				  &curs->wndw.plane, &nv50_head_func,
+				  "head-%d", head->base.index);
+	drm_crtc_helper_add(crtc, &nv50_head_help);
 	drm_mode_crtc_set_gamma_size(crtc, 256);
 
-	ret = nouveau_bo_new(dev, 8192, 0x100, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 8192, 0x100, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &head->base.lut.nvbo);
 	if (!ret) {
 		ret = nouveau_bo_pin(head->base.lut.nvbo, TTM_PL_FLAG_VRAM, true);
@@ -1481,20 +2444,6 @@ nv50_crtc_create(struct drm_device *dev, int index)
 	if (ret)
 		goto out;
 
-	/* allocate cursor resources */
-	ret = nv50_curs_create(device, disp->disp, index, &head->curs);
-	if (ret)
-		goto out;
-
-	/* allocate page flip / sync resources */
-	ret = nv50_base_create(device, disp->disp, index, disp->sync->bo.offset,
-			       &head->sync);
-	if (ret)
-		goto out;
-
-	head->sync.addr = EVO_FLIP_SEM0(index);
-	head->sync.data = 0x00000000;
-
 	/* allocate overlay resources */
 	ret = nv50_oimm_create(device, disp->disp, index, &head->oimm);
 	if (ret)
@@ -1507,43 +2456,64 @@ nv50_crtc_create(struct drm_device *dev, int index)
 
 out:
 	if (ret)
-		nv50_crtc_destroy(crtc);
+		nv50_head_destroy(crtc);
 	return ret;
 }
 
 /******************************************************************************
- * Encoder helpers
+ * Output path helpers
  *****************************************************************************/
-static bool
-nv50_encoder_mode_fixup(struct drm_encoder *encoder,
-			const struct drm_display_mode *mode,
-			struct drm_display_mode *adjusted_mode)
+static int
+nv50_outp_atomic_check_view(struct drm_encoder *encoder,
+			    struct drm_crtc_state *crtc_state,
+			    struct drm_connector_state *conn_state,
+			    struct drm_display_mode *native_mode)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nouveau_connector *nv_connector;
+	struct drm_display_mode *adjusted_mode = &crtc_state->adjusted_mode;
+	struct drm_display_mode *mode = &crtc_state->mode;
+	struct drm_connector *connector = conn_state->connector;
+	struct nouveau_conn_atom *asyc = nouveau_conn_atom(conn_state);
+	struct nouveau_drm *drm = nouveau_drm(encoder->dev);
+
+	NV_ATOMIC(drm, "%s atomic_check\n", encoder->name);
+	asyc->scaler.full = false;
+	if (!native_mode)
+		return 0;
 
-	nv_connector = nouveau_encoder_connector_get(nv_encoder);
-	if (nv_connector && nv_connector->native_mode) {
-		nv_connector->scaling_full = false;
-		if (nv_connector->scaling_mode == DRM_MODE_SCALE_NONE) {
-			switch (nv_connector->type) {
-			case DCB_CONNECTOR_LVDS:
-			case DCB_CONNECTOR_LVDS_SPWG:
-			case DCB_CONNECTOR_eDP:
-				/* force use of scaler for non-edid modes */
-				if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER)
-					return true;
-				nv_connector->scaling_full = true;
+	if (asyc->scaler.mode == DRM_MODE_SCALE_NONE) {
+		switch (connector->connector_type) {
+		case DRM_MODE_CONNECTOR_LVDS:
+		case DRM_MODE_CONNECTOR_eDP:
+			/* Force use of scaler for non-EDID modes. */
+			if (adjusted_mode->type & DRM_MODE_TYPE_DRIVER)
 				break;
-			default:
-				return true;
-			}
+			mode = native_mode;
+			asyc->scaler.full = true;
+			break;
+		default:
+			break;
 		}
+	} else {
+		mode = native_mode;
+	}
 
-		drm_mode_copy(adjusted_mode, nv_connector->native_mode);
+	if (!drm_mode_equal(adjusted_mode, mode)) {
+		drm_mode_copy(adjusted_mode, mode);
+		crtc_state->mode_changed = true;
 	}
 
-	return true;
+	return 0;
+}
+
+static int
+nv50_outp_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
+{
+	struct nouveau_connector *nv_connector =
+		nouveau_connector(conn_state->connector);
+	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+					   nv_connector->native_mode);
 }
 
 /******************************************************************************
@@ -1574,21 +2544,39 @@ nv50_dac_dpms(struct drm_encoder *encoder, int mode)
 }
 
 static void
-nv50_dac_commit(struct drm_encoder *encoder)
+nv50_dac_disable(struct drm_encoder *encoder)
 {
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	const int or = nv_encoder->or;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		push = evo_wait(mast, 4);
+		if (push) {
+			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
+				evo_mthd(push, 0x0400 + (or * 0x080), 1);
+				evo_data(push, 0x00000000);
+			} else {
+				evo_mthd(push, 0x0180 + (or * 0x020), 1);
+				evo_data(push, 0x00000000);
+			}
+			evo_kick(push, mast);
+		}
+	}
+
+	nv_encoder->crtc = NULL;
 }
 
 static void
-nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		  struct drm_display_mode *adjusted_mode)
+nv50_dac_enable(struct drm_encoder *encoder)
 {
 	struct nv50_mast *mast = nv50_mast(encoder->dev);
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
 	u32 *push;
 
-	nv50_dac_dpms(encoder, DRM_MODE_DPMS_ON);
-
 	push = evo_wait(mast, 8);
 	if (push) {
 		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
@@ -1627,33 +2615,6 @@ nv50_dac_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	nv_encoder->crtc = encoder->crtc;
 }
 
-static void
-nv50_dac_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	const int or = nv_encoder->or;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		nv50_crtc_prepare(nv_encoder->crtc);
-
-		push = evo_wait(mast, 4);
-		if (push) {
-			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-				evo_mthd(push, 0x0400 + (or * 0x080), 1);
-				evo_data(push, 0x00000000);
-			} else {
-				evo_mthd(push, 0x0180 + (or * 0x020), 1);
-				evo_data(push, 0x00000000);
-			}
-			evo_kick(push, mast);
-		}
-	}
-
-	nv_encoder->crtc = NULL;
-}
-
 static enum drm_connector_status
 nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 {
@@ -1681,6 +2642,15 @@ nv50_dac_detect(struct drm_encoder *encoder, struct drm_connector *connector)
 	return connector_status_connected;
 }
 
+static const struct drm_encoder_helper_funcs
+nv50_dac_help = {
+	.dpms = nv50_dac_dpms,
+	.atomic_check = nv50_outp_atomic_check,
+	.enable = nv50_dac_enable,
+	.disable = nv50_dac_disable,
+	.detect = nv50_dac_detect
+};
+
 static void
 nv50_dac_destroy(struct drm_encoder *encoder)
 {
@@ -1688,18 +2658,8 @@ nv50_dac_destroy(struct drm_encoder *encoder)
 	kfree(encoder);
 }
 
-static const struct drm_encoder_helper_funcs nv50_dac_hfunc = {
-	.dpms = nv50_dac_dpms,
-	.mode_fixup = nv50_encoder_mode_fixup,
-	.prepare = nv50_dac_disconnect,
-	.commit = nv50_dac_commit,
-	.mode_set = nv50_dac_mode_set,
-	.disable = nv50_dac_disconnect,
-	.get_crtc = nv50_display_crtc_get,
-	.detect = nv50_dac_detect
-};
-
-static const struct drm_encoder_funcs nv50_dac_func = {
+static const struct drm_encoder_funcs
+nv50_dac_func = {
 	.destroy = nv50_dac_destroy,
 };
 
@@ -1707,7 +2667,7 @@ static int
 nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus;
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
@@ -1726,8 +2686,9 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type, NULL);
-	drm_encoder_helper_add(encoder, &nv50_dac_hfunc);
+	drm_encoder_init(connector->dev, encoder, &nv50_dac_func, type,
+			 "dac-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_dac_help);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
 	return 0;
@@ -1737,7 +2698,26 @@ nv50_dac_create(struct drm_connector *connector, struct dcb_output *dcbe)
  * Audio
  *****************************************************************************/
 static void
-nv50_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
+nv50_audio_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
+{
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_hda_eld_v0 eld;
+	} args = {
+		.base.version = 1,
+		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
+		.base.hasht   = nv_encoder->dcb->hasht,
+		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
+				(0x0100 << nv_crtc->index),
+	};
+
+	nvif_mthd(disp->disp, 0, &args, sizeof(args));
+}
+
+static void
+nv50_audio_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
@@ -1768,30 +2748,30 @@ nv50_audio_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
 		  sizeof(args.base) + drm_eld_size(args.data));
 }
 
+/******************************************************************************
+ * HDMI
+ *****************************************************************************/
 static void
-nv50_audio_disconnect(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
+nv50_hdmi_disable(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
 	struct {
 		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hda_eld_v0 eld;
+		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
 	} args = {
 		.base.version = 1,
-		.base.method  = NV50_DISP_MTHD_V1_SOR_HDA_ELD,
-		.base.hasht   = nv_encoder->dcb->hasht,
-		.base.hashm   = (0xf0ff & nv_encoder->dcb->hashm) |
-				(0x0100 << nv_crtc->index),
+		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
+		.base.hasht  = nv_encoder->dcb->hasht,
+		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
+			       (0x0100 << nv_crtc->index),
 	};
 
 	nvif_mthd(disp->disp, 0, &args, sizeof(args));
 }
 
-/******************************************************************************
- * HDMI
- *****************************************************************************/
 static void
-nv50_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
+nv50_hdmi_enable(struct drm_encoder *encoder, struct drm_display_mode *mode)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
@@ -1821,26 +2801,635 @@ nv50_hdmi_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode)
 	args.pwr.max_ac_packet = max_ac_packet / 32;
 
 	nvif_mthd(disp->disp, 0, &args, sizeof(args));
-	nv50_audio_mode_set(encoder, mode);
+	nv50_audio_enable(encoder, mode);
+}
+
+/******************************************************************************
+ * MST
+ *****************************************************************************/
+#define nv50_mstm(p) container_of((p), struct nv50_mstm, mgr)
+#define nv50_mstc(p) container_of((p), struct nv50_mstc, connector)
+#define nv50_msto(p) container_of((p), struct nv50_msto, encoder)
+
+struct nv50_mstm {
+	struct nouveau_encoder *outp;
+
+	struct drm_dp_mst_topology_mgr mgr;
+	struct nv50_msto *msto[4];
+
+	bool modified;
+};
+
+struct nv50_mstc {
+	struct nv50_mstm *mstm;
+	struct drm_dp_mst_port *port;
+	struct drm_connector connector;
+
+	struct drm_display_mode *native;
+	struct edid *edid;
+
+	int pbn;
+};
+
+struct nv50_msto {
+	struct drm_encoder encoder;
+
+	struct nv50_head *head;
+	struct nv50_mstc *mstc;
+	bool disabled;
+};
+
+static struct drm_dp_payload *
+nv50_msto_payload(struct nv50_msto *msto)
+{
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+	int vcpi = mstc->port->vcpi.vcpi, i;
+
+	NV_ATOMIC(drm, "%s: vcpi %d\n", msto->encoder.name, vcpi);
+	for (i = 0; i < mstm->mgr.max_payloads; i++) {
+		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
+		NV_ATOMIC(drm, "%s: %d: vcpi %d start 0x%02x slots 0x%02x\n",
+			  mstm->outp->base.base.name, i, payload->vcpi,
+			  payload->start_slot, payload->num_slots);
+	}
+
+	for (i = 0; i < mstm->mgr.max_payloads; i++) {
+		struct drm_dp_payload *payload = &mstm->mgr.payloads[i];
+		if (payload->vcpi == vcpi)
+			return payload;
+	}
+
+	return NULL;
 }
 
 static void
-nv50_hdmi_disconnect(struct drm_encoder *encoder, struct nouveau_crtc *nv_crtc)
+nv50_msto_cleanup(struct nv50_msto *msto)
 {
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_disp *disp = nv50_disp(encoder->dev);
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+
+	NV_ATOMIC(drm, "%s: msto cleanup\n", msto->encoder.name);
+	if (mstc->port && mstc->port->vcpi.vcpi > 0 && !nv50_msto_payload(msto))
+		drm_dp_mst_deallocate_vcpi(&mstm->mgr, mstc->port);
+	if (msto->disabled) {
+		msto->mstc = NULL;
+		msto->head = NULL;
+		msto->disabled = false;
+	}
+}
+
+static void
+nv50_msto_prepare(struct nv50_msto *msto)
+{
+	struct nouveau_drm *drm = nouveau_drm(msto->encoder.dev);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
 	struct {
 		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_hdmi_pwr_v0 pwr;
+		struct nv50_disp_sor_dp_mst_vcpi_v0 vcpi;
 	} args = {
 		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_HDMI_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = (0xf0ff & nv_encoder->dcb->hashm) |
-			       (0x0100 << nv_crtc->index),
+		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI,
+		.base.hasht  = mstm->outp->dcb->hasht,
+		.base.hashm  = (0xf0ff & mstm->outp->dcb->hashm) |
+			       (0x0100 << msto->head->base.index),
 	};
 
-	nvif_mthd(disp->disp, 0, &args, sizeof(args));
+	NV_ATOMIC(drm, "%s: msto prepare\n", msto->encoder.name);
+	if (mstc->port && mstc->port->vcpi.vcpi > 0) {
+		struct drm_dp_payload *payload = nv50_msto_payload(msto);
+		if (payload) {
+			args.vcpi.start_slot = payload->start_slot;
+			args.vcpi.num_slots = payload->num_slots;
+			args.vcpi.pbn = mstc->port->vcpi.pbn;
+			args.vcpi.aligned_pbn = mstc->port->vcpi.aligned_pbn;
+		}
+	}
+
+	NV_ATOMIC(drm, "%s: %s: %02x %02x %04x %04x\n",
+		  msto->encoder.name, msto->head->base.base.name,
+		  args.vcpi.start_slot, args.vcpi.num_slots,
+		  args.vcpi.pbn, args.vcpi.aligned_pbn);
+	nvif_mthd(&drm->display->disp, 0, &args, sizeof(args));
+}
+
+static int
+nv50_msto_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
+{
+	struct nv50_mstc *mstc = nv50_mstc(conn_state->connector);
+	struct nv50_mstm *mstm = mstc->mstm;
+	int bpp = conn_state->connector->display_info.bpc * 3;
+	int slots;
+
+	mstc->pbn = drm_dp_calc_pbn_mode(crtc_state->adjusted_mode.clock, bpp);
+
+	slots = drm_dp_find_vcpi_slots(&mstm->mgr, mstc->pbn);
+	if (slots < 0)
+		return slots;
+
+	return nv50_outp_atomic_check_view(encoder, crtc_state, conn_state,
+					   mstc->native);
+}
+
+static void
+nv50_msto_enable(struct drm_encoder *encoder)
+{
+	struct nv50_head *head = nv50_head(encoder->crtc);
+	struct nv50_msto *msto = nv50_msto(encoder);
+	struct nv50_mstc *mstc = NULL;
+	struct nv50_mstm *mstm = NULL;
+	struct drm_connector *connector;
+	u8 proto, depth;
+	int slots;
+	bool r;
+
+	drm_for_each_connector(connector, encoder->dev) {
+		if (connector->state->best_encoder == &msto->encoder) {
+			mstc = nv50_mstc(connector);
+			mstm = mstc->mstm;
+			break;
+		}
+	}
+
+	if (WARN_ON(!mstc))
+		return;
+
+	r = drm_dp_mst_allocate_vcpi(&mstm->mgr, mstc->port, mstc->pbn, &slots);
+	WARN_ON(!r);
+
+	if (mstm->outp->dcb->sorconf.link & 1)
+		proto = 0x8;
+	else
+		proto = 0x9;
+
+	switch (mstc->connector.display_info.bpc) {
+	case  6: depth = 0x2; break;
+	case  8: depth = 0x5; break;
+	case 10:
+	default: depth = 0x6; break;
+	}
+
+	mstm->outp->update(mstm->outp, head->base.index,
+			   &head->base.base.state->adjusted_mode, proto, depth);
+
+	msto->head = head;
+	msto->mstc = mstc;
+	mstm->modified = true;
+}
+
+static void
+nv50_msto_disable(struct drm_encoder *encoder)
+{
+	struct nv50_msto *msto = nv50_msto(encoder);
+	struct nv50_mstc *mstc = msto->mstc;
+	struct nv50_mstm *mstm = mstc->mstm;
+
+	if (mstc->port)
+		drm_dp_mst_reset_vcpi_slots(&mstm->mgr, mstc->port);
+
+	mstm->outp->update(mstm->outp, msto->head->base.index, NULL, 0, 0);
+	mstm->modified = true;
+	msto->disabled = true;
+}
+
+static const struct drm_encoder_helper_funcs
+nv50_msto_help = {
+	.disable = nv50_msto_disable,
+	.enable = nv50_msto_enable,
+	.atomic_check = nv50_msto_atomic_check,
+};
+
+static void
+nv50_msto_destroy(struct drm_encoder *encoder)
+{
+	struct nv50_msto *msto = nv50_msto(encoder);
+	drm_encoder_cleanup(&msto->encoder);
+	kfree(msto);
+}
+
+static const struct drm_encoder_funcs
+nv50_msto = {
+	.destroy = nv50_msto_destroy,
+};
+
+static int
+nv50_msto_new(struct drm_device *dev, u32 heads, const char *name, int id,
+	      struct nv50_msto **pmsto)
+{
+	struct nv50_msto *msto;
+	int ret;
+
+	if (!(msto = *pmsto = kzalloc(sizeof(*msto), GFP_KERNEL)))
+		return -ENOMEM;
+
+	ret = drm_encoder_init(dev, &msto->encoder, &nv50_msto,
+			       DRM_MODE_ENCODER_DPMST, "%s-mst-%d", name, id);
+	if (ret) {
+		kfree(*pmsto);
+		*pmsto = NULL;
+		return ret;
+	}
+
+	drm_encoder_helper_add(&msto->encoder, &nv50_msto_help);
+	msto->encoder.possible_crtcs = heads;
+	return 0;
+}
+
+static struct drm_encoder *
+nv50_mstc_atomic_best_encoder(struct drm_connector *connector,
+			      struct drm_connector_state *connector_state)
+{
+	struct nv50_head *head = nv50_head(connector_state->crtc);
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (mstc->port) {
+		struct nv50_mstm *mstm = mstc->mstm;
+		return &mstm->msto[head->base.index]->encoder;
+	}
+	return NULL;
+}
+
+static struct drm_encoder *
+nv50_mstc_best_encoder(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (mstc->port) {
+		struct nv50_mstm *mstm = mstc->mstm;
+		return &mstm->msto[0]->encoder;
+	}
+	return NULL;
+}
+
+static enum drm_mode_status
+nv50_mstc_mode_valid(struct drm_connector *connector,
+		     struct drm_display_mode *mode)
+{
+	return MODE_OK;
+}
+
+static int
+nv50_mstc_get_modes(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	int ret = 0;
+
+	mstc->edid = drm_dp_mst_get_edid(&mstc->connector, mstc->port->mgr, mstc->port);
+	drm_mode_connector_update_edid_property(&mstc->connector, mstc->edid);
+	if (mstc->edid) {
+		ret = drm_add_edid_modes(&mstc->connector, mstc->edid);
+		drm_edid_to_eld(&mstc->connector, mstc->edid);
+	}
+
+	if (!mstc->connector.display_info.bpc)
+		mstc->connector.display_info.bpc = 8;
+
+	if (mstc->native)
+		drm_mode_destroy(mstc->connector.dev, mstc->native);
+	mstc->native = nouveau_conn_native_mode(&mstc->connector);
+	return ret;
+}
+
+static const struct drm_connector_helper_funcs
+nv50_mstc_help = {
+	.get_modes = nv50_mstc_get_modes,
+	.mode_valid = nv50_mstc_mode_valid,
+	.best_encoder = nv50_mstc_best_encoder,
+	.atomic_best_encoder = nv50_mstc_atomic_best_encoder,
+};
+
+static enum drm_connector_status
+nv50_mstc_detect(struct drm_connector *connector, bool force)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	if (!mstc->port)
+		return connector_status_disconnected;
+	return drm_dp_mst_detect_port(connector, mstc->port->mgr, mstc->port);
+}
+
+static void
+nv50_mstc_destroy(struct drm_connector *connector)
+{
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+	drm_connector_cleanup(&mstc->connector);
+	kfree(mstc);
+}
+
+static const struct drm_connector_funcs
+nv50_mstc = {
+	.dpms = drm_atomic_helper_connector_dpms,
+	.reset = nouveau_conn_reset,
+	.detect = nv50_mstc_detect,
+	.fill_modes = drm_helper_probe_single_connector_modes,
+	.set_property = drm_atomic_helper_connector_set_property,
+	.destroy = nv50_mstc_destroy,
+	.atomic_duplicate_state = nouveau_conn_atomic_duplicate_state,
+	.atomic_destroy_state = nouveau_conn_atomic_destroy_state,
+	.atomic_set_property = nouveau_conn_atomic_set_property,
+	.atomic_get_property = nouveau_conn_atomic_get_property,
+};
+
+static int
+nv50_mstc_new(struct nv50_mstm *mstm, struct drm_dp_mst_port *port,
+	      const char *path, struct nv50_mstc **pmstc)
+{
+	struct drm_device *dev = mstm->outp->base.base.dev;
+	struct nv50_mstc *mstc;
+	int ret, i;
+
+	if (!(mstc = *pmstc = kzalloc(sizeof(*mstc), GFP_KERNEL)))
+		return -ENOMEM;
+	mstc->mstm = mstm;
+	mstc->port = port;
+
+	ret = drm_connector_init(dev, &mstc->connector, &nv50_mstc,
+				 DRM_MODE_CONNECTOR_DisplayPort);
+	if (ret) {
+		kfree(*pmstc);
+		*pmstc = NULL;
+		return ret;
+	}
+
+	drm_connector_helper_add(&mstc->connector, &nv50_mstc_help);
+
+	mstc->connector.funcs->reset(&mstc->connector);
+	nouveau_conn_attach_properties(&mstc->connector);
+
+	for (i = 0; i < ARRAY_SIZE(mstm->msto) && mstm->msto; i++)
+		drm_mode_connector_attach_encoder(&mstc->connector, &mstm->msto[i]->encoder);
+
+	drm_object_attach_property(&mstc->connector.base, dev->mode_config.path_property, 0);
+	drm_object_attach_property(&mstc->connector.base, dev->mode_config.tile_property, 0);
+	drm_mode_connector_set_path_property(&mstc->connector, path);
+	return 0;
+}
+
+static void
+nv50_mstm_cleanup(struct nv50_mstm *mstm)
+{
+	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
+	struct drm_encoder *encoder;
+	int ret;
+
+	NV_ATOMIC(drm, "%s: mstm cleanup\n", mstm->outp->base.base.name);
+	ret = drm_dp_check_act_status(&mstm->mgr);
+
+	ret = drm_dp_update_payload_part2(&mstm->mgr);
+
+	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
+		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			struct nv50_msto *msto = nv50_msto(encoder);
+			struct nv50_mstc *mstc = msto->mstc;
+			if (mstc && mstc->mstm == mstm)
+				nv50_msto_cleanup(msto);
+		}
+	}
+
+	mstm->modified = false;
+}
+
+static void
+nv50_mstm_prepare(struct nv50_mstm *mstm)
+{
+	struct nouveau_drm *drm = nouveau_drm(mstm->outp->base.base.dev);
+	struct drm_encoder *encoder;
+	int ret;
+
+	NV_ATOMIC(drm, "%s: mstm prepare\n", mstm->outp->base.base.name);
+	ret = drm_dp_update_payload_part1(&mstm->mgr);
+
+	drm_for_each_encoder(encoder, mstm->outp->base.base.dev) {
+		if (encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			struct nv50_msto *msto = nv50_msto(encoder);
+			struct nv50_mstc *mstc = msto->mstc;
+			if (mstc && mstc->mstm == mstm)
+				nv50_msto_prepare(msto);
+		}
+	}
+}
+
+static void
+nv50_mstm_hotplug(struct drm_dp_mst_topology_mgr *mgr)
+{
+	struct nv50_mstm *mstm = nv50_mstm(mgr);
+	drm_kms_helper_hotplug_event(mstm->outp->base.base.dev);
+}
+
+static void
+nv50_mstm_destroy_connector(struct drm_dp_mst_topology_mgr *mgr,
+			    struct drm_connector *connector)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+	struct nv50_mstc *mstc = nv50_mstc(connector);
+
+	drm_connector_unregister(&mstc->connector);
+
+	drm_modeset_lock_all(drm->dev);
+	drm_fb_helper_remove_one_connector(&drm->fbcon->helper, &mstc->connector);
+	mstc->port = NULL;
+	drm_modeset_unlock_all(drm->dev);
+
+	drm_connector_unreference(&mstc->connector);
+}
+
+static void
+nv50_mstm_register_connector(struct drm_connector *connector)
+{
+	struct nouveau_drm *drm = nouveau_drm(connector->dev);
+
+	drm_modeset_lock_all(drm->dev);
+	drm_fb_helper_add_one_connector(&drm->fbcon->helper, connector);
+	drm_modeset_unlock_all(drm->dev);
+
+	drm_connector_register(connector);
+}
+
+static struct drm_connector *
+nv50_mstm_add_connector(struct drm_dp_mst_topology_mgr *mgr,
+			struct drm_dp_mst_port *port, const char *path)
+{
+	struct nv50_mstm *mstm = nv50_mstm(mgr);
+	struct nv50_mstc *mstc;
+	int ret;
+
+	ret = nv50_mstc_new(mstm, port, path, &mstc);
+	if (ret) {
+		if (mstc)
+			mstc->connector.funcs->destroy(&mstc->connector);
+		return NULL;
+	}
+
+	return &mstc->connector;
+}
+
+static const struct drm_dp_mst_topology_cbs
+nv50_mstm = {
+	.add_connector = nv50_mstm_add_connector,
+	.register_connector = nv50_mstm_register_connector,
+	.destroy_connector = nv50_mstm_destroy_connector,
+	.hotplug = nv50_mstm_hotplug,
+};
+
+void
+nv50_mstm_service(struct nv50_mstm *mstm)
+{
+	struct drm_dp_aux *aux = mstm->mgr.aux;
+	bool handled = true;
+	int ret;
+	u8 esi[8] = {};
+
+	while (handled) {
+		ret = drm_dp_dpcd_read(aux, DP_SINK_COUNT_ESI, esi, 8);
+		if (ret != 8) {
+			drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
+			return;
+		}
+
+		drm_dp_mst_hpd_irq(&mstm->mgr, esi, &handled);
+		if (!handled)
+			break;
+
+		drm_dp_dpcd_write(aux, DP_SINK_COUNT_ESI + 1, &esi[1], 3);
+	}
+}
+
+void
+nv50_mstm_remove(struct nv50_mstm *mstm)
+{
+	if (mstm)
+		drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, false);
+}
+
+static int
+nv50_mstm_enable(struct nv50_mstm *mstm, u8 dpcd, int state)
+{
+	struct nouveau_encoder *outp = mstm->outp;
+	struct {
+		struct nv50_disp_mthd_v1 base;
+		struct nv50_disp_sor_dp_mst_link_v0 mst;
+	} args = {
+		.base.version = 1,
+		.base.method = NV50_DISP_MTHD_V1_SOR_DP_MST_LINK,
+		.base.hasht = outp->dcb->hasht,
+		.base.hashm = outp->dcb->hashm,
+		.mst.state = state,
+	};
+	struct nouveau_drm *drm = nouveau_drm(outp->base.base.dev);
+	struct nvif_object *disp = &drm->display->disp;
+	int ret;
+
+	if (dpcd >= 0x12) {
+		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CTRL, &dpcd);
+		if (ret < 0)
+			return ret;
+
+		dpcd &= ~DP_MST_EN;
+		if (state)
+			dpcd |= DP_MST_EN;
+
+		ret = drm_dp_dpcd_writeb(mstm->mgr.aux, DP_MSTM_CTRL, dpcd);
+		if (ret < 0)
+			return ret;
+	}
+
+	return nvif_mthd(disp, 0, &args, sizeof(args));
+}
+
+int
+nv50_mstm_detect(struct nv50_mstm *mstm, u8 dpcd[8], int allow)
+{
+	int ret, state = 0;
+
+	if (!mstm)
+		return 0;
+
+	if (dpcd[0] >= 0x12) {
+		ret = drm_dp_dpcd_readb(mstm->mgr.aux, DP_MSTM_CAP, &dpcd[1]);
+		if (ret < 0)
+			return ret;
+
+		if (!(dpcd[1] & DP_MST_CAP))
+			dpcd[0] = 0x11;
+		else
+			state = allow;
+	}
+
+	ret = nv50_mstm_enable(mstm, dpcd[0], state);
+	if (ret)
+		return ret;
+
+	ret = drm_dp_mst_topology_mgr_set_mst(&mstm->mgr, state);
+	if (ret)
+		return nv50_mstm_enable(mstm, dpcd[0], 0);
+
+	return mstm->mgr.mst_state;
+}
+
+static void
+nv50_mstm_fini(struct nv50_mstm *mstm)
+{
+	if (mstm && mstm->mgr.mst_state)
+		drm_dp_mst_topology_mgr_suspend(&mstm->mgr);
+}
+
+static void
+nv50_mstm_init(struct nv50_mstm *mstm)
+{
+	if (mstm && mstm->mgr.mst_state)
+		drm_dp_mst_topology_mgr_resume(&mstm->mgr);
+}
+
+static void
+nv50_mstm_del(struct nv50_mstm **pmstm)
+{
+	struct nv50_mstm *mstm = *pmstm;
+	if (mstm) {
+		kfree(*pmstm);
+		*pmstm = NULL;
+	}
+}
+
+static int
+nv50_mstm_new(struct nouveau_encoder *outp, struct drm_dp_aux *aux, int aux_max,
+	      int conn_base_id, struct nv50_mstm **pmstm)
+{
+	const int max_payloads = hweight8(outp->dcb->heads);
+	struct drm_device *dev = outp->base.base.dev;
+	struct nv50_mstm *mstm;
+	int ret, i;
+	u8 dpcd;
+
+	/* This is a workaround for some monitors not functioning
+	 * correctly in MST mode on initial module load.  I think
+	 * some bad interaction with the VBIOS may be responsible.
+	 *
+	 * A good ol' off and on again seems to work here ;)
+	 */
+	ret = drm_dp_dpcd_readb(aux, DP_DPCD_REV, &dpcd);
+	if (ret >= 0 && dpcd >= 0x12)
+		drm_dp_dpcd_writeb(aux, DP_MSTM_CTRL, 0);
+
+	if (!(mstm = *pmstm = kzalloc(sizeof(*mstm), GFP_KERNEL)))
+		return -ENOMEM;
+	mstm->outp = outp;
+	mstm->mgr.cbs = &nv50_mstm;
+
+	ret = drm_dp_mst_topology_mgr_init(&mstm->mgr, dev, aux, aux_max,
+					   max_payloads, conn_base_id);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < max_payloads; i++) {
+		ret = nv50_msto_new(dev, outp->dcb->heads, outp->base.base.name,
+				    i, &mstm->msto[i]);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
 }
 
 /******************************************************************************
@@ -1861,89 +3450,91 @@ nv50_sor_dpms(struct drm_encoder *encoder, int mode)
 		.base.hashm  = nv_encoder->dcb->hashm,
 		.pwr.state = mode == DRM_MODE_DPMS_ON,
 	};
-	struct {
-		struct nv50_disp_mthd_v1 base;
-		struct nv50_disp_sor_dp_pwr_v0 pwr;
-	} link = {
-		.base.version = 1,
-		.base.method = NV50_DISP_MTHD_V1_SOR_DP_PWR,
-		.base.hasht  = nv_encoder->dcb->hasht,
-		.base.hashm  = nv_encoder->dcb->hashm,
-		.pwr.state = mode == DRM_MODE_DPMS_ON,
-	};
-	struct drm_device *dev = encoder->dev;
-	struct drm_encoder *partner;
 
-	nv_encoder->last_dpms = mode;
-
-	list_for_each_entry(partner, &dev->mode_config.encoder_list, head) {
-		struct nouveau_encoder *nv_partner = nouveau_encoder(partner);
-
-		if (partner->encoder_type != DRM_MODE_ENCODER_TMDS)
-			continue;
+	nvif_mthd(disp->disp, 0, &args, sizeof(args));
+}
 
-		if (nv_partner != nv_encoder &&
-		    nv_partner->dcb->or == nv_encoder->dcb->or) {
-			if (nv_partner->last_dpms == DRM_MODE_DPMS_ON)
-				return;
-			break;
-		}
-	}
+static void
+nv50_sor_update(struct nouveau_encoder *nv_encoder, u8 head,
+		struct drm_display_mode *mode, u8 proto, u8 depth)
+{
+	struct nv50_dmac *core = &nv50_mast(nv_encoder->base.base.dev)->base;
+	u32 *push;
 
-	if (nv_encoder->dcb->type == DCB_OUTPUT_DP) {
-		args.pwr.state = 1;
-		nvif_mthd(disp->disp, 0, &args, sizeof(args));
-		nvif_mthd(disp->disp, 0, &link, sizeof(link));
+	if (!mode) {
+		nv_encoder->ctrl &= ~BIT(head);
+		if (!(nv_encoder->ctrl & 0x0000000f))
+			nv_encoder->ctrl = 0;
 	} else {
-		nvif_mthd(disp->disp, 0, &args, sizeof(args));
+		nv_encoder->ctrl |= proto << 8;
+		nv_encoder->ctrl |= BIT(head);
 	}
-}
 
-static void
-nv50_sor_ctrl(struct nouveau_encoder *nv_encoder, u32 mask, u32 data)
-{
-	struct nv50_mast *mast = nv50_mast(nv_encoder->base.base.dev);
-	u32 temp = (nv_encoder->ctrl & ~mask) | (data & mask), *push;
-	if (temp != nv_encoder->ctrl && (push = evo_wait(mast, 2))) {
-		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
+	if ((push = evo_wait(core, 6))) {
+		if (core->base.user.oclass < GF110_DISP_CORE_CHANNEL_DMA) {
+			if (mode) {
+				if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+					nv_encoder->ctrl |= 0x00001000;
+				if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+					nv_encoder->ctrl |= 0x00002000;
+				nv_encoder->ctrl |= depth << 16;
+			}
 			evo_mthd(push, 0x0600 + (nv_encoder->or * 0x40), 1);
-			evo_data(push, (nv_encoder->ctrl = temp));
 		} else {
+			if (mode) {
+				u32 magic = 0x31ec6000 | (head << 25);
+				u32 syncs = 0x00000001;
+				if (mode->flags & DRM_MODE_FLAG_NHSYNC)
+					syncs |= 0x00000008;
+				if (mode->flags & DRM_MODE_FLAG_NVSYNC)
+					syncs |= 0x00000010;
+				if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+					magic |= 0x00000001;
+
+				evo_mthd(push, 0x0404 + (head * 0x300), 2);
+				evo_data(push, syncs | (depth << 6));
+				evo_data(push, magic);
+			}
 			evo_mthd(push, 0x0200 + (nv_encoder->or * 0x20), 1);
-			evo_data(push, (nv_encoder->ctrl = temp));
 		}
-		evo_kick(push, mast);
+		evo_data(push, nv_encoder->ctrl);
+		evo_kick(push, core);
 	}
 }
 
 static void
-nv50_sor_disconnect(struct drm_encoder *encoder)
+nv50_sor_disable(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(nv_encoder->crtc);
 
-	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
 	nv_encoder->crtc = NULL;
 
 	if (nv_crtc) {
-		nv50_crtc_prepare(&nv_crtc->base);
-		nv50_sor_ctrl(nv_encoder, 1 << nv_crtc->index, 0);
-		nv50_audio_disconnect(encoder, nv_crtc);
-		nv50_hdmi_disconnect(&nv_encoder->base.base, nv_crtc);
-	}
-}
+		struct nvkm_i2c_aux *aux = nv_encoder->aux;
+		u8 pwr;
 
-static void
-nv50_sor_commit(struct drm_encoder *encoder)
-{
+		if (aux) {
+			int ret = nvkm_rdaux(aux, DP_SET_POWER, &pwr, 1);
+			if (ret == 0) {
+				pwr &= ~DP_SET_POWER_MASK;
+				pwr |=  DP_SET_POWER_D3;
+				nvkm_wraux(aux, DP_SET_POWER, &pwr, 1);
+			}
+		}
+
+		nv_encoder->update(nv_encoder, nv_crtc->index, NULL, 0, 0);
+		nv50_audio_disable(encoder, nv_crtc);
+		nv50_hdmi_disable(&nv_encoder->base.base, nv_crtc);
+	}
 }
 
 static void
-nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
-		  struct drm_display_mode *mode)
+nv50_sor_enable(struct drm_encoder *encoder)
 {
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
+	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
 	struct {
 		struct nv50_disp_mthd_v1 base;
 		struct nv50_disp_sor_lvds_script_v0 lvds;
@@ -1954,13 +3545,10 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
 		.base.hashm   = nv_encoder->dcb->hashm,
 	};
 	struct nv50_disp *disp = nv50_disp(encoder->dev);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
 	struct drm_device *dev = encoder->dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_connector *nv_connector;
 	struct nvbios *bios = &drm->vbios;
-	u32 mask, ctrl;
-	u8 owner = 1 << nv_crtc->index;
 	u8 proto = 0xf;
 	u8 depth = 0x0;
 
@@ -1985,7 +3573,7 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
 			proto = 0x2;
 		}
 
-		nv50_hdmi_mode_set(&nv_encoder->base.base, mode);
+		nv50_hdmi_enable(&nv_encoder->base.base, mode);
 		break;
 	case DCB_OUTPUT_LVDS:
 		proto = 0x0;
@@ -2019,94 +3607,60 @@ nv50_sor_mode_set(struct drm_encoder *encoder, struct drm_display_mode *umode,
 		nvif_mthd(disp->disp, 0, &lvds, sizeof(lvds));
 		break;
 	case DCB_OUTPUT_DP:
-		if (nv_connector->base.display_info.bpc == 6) {
-			nv_encoder->dp.datarate = mode->clock * 18 / 8;
+		if (nv_connector->base.display_info.bpc == 6)
 			depth = 0x2;
-		} else
-		if (nv_connector->base.display_info.bpc == 8) {
-			nv_encoder->dp.datarate = mode->clock * 24 / 8;
+		else
+		if (nv_connector->base.display_info.bpc == 8)
 			depth = 0x5;
-		} else {
-			nv_encoder->dp.datarate = mode->clock * 30 / 8;
+		else
 			depth = 0x6;
-		}
 
 		if (nv_encoder->dcb->sorconf.link & 1)
 			proto = 0x8;
 		else
 			proto = 0x9;
-		nv50_audio_mode_set(encoder, mode);
+
+		nv50_audio_enable(encoder, mode);
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
-	nv50_sor_dpms(&nv_encoder->base.base, DRM_MODE_DPMS_ON);
-
-	if (nv50_vers(mast) >= GF110_DISP) {
-		u32 *push = evo_wait(mast, 3);
-		if (push) {
-			u32 magic = 0x31ec6000 | (nv_crtc->index << 25);
-			u32 syncs = 0x00000001;
-
-			if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-				syncs |= 0x00000008;
-			if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-				syncs |= 0x00000010;
-
-			if (mode->flags & DRM_MODE_FLAG_INTERLACE)
-				magic |= 0x00000001;
-
-			evo_mthd(push, 0x0404 + (nv_crtc->index * 0x300), 2);
-			evo_data(push, syncs | (depth << 6));
-			evo_data(push, magic);
-			evo_kick(push, mast);
-		}
-
-		ctrl = proto << 8;
-		mask = 0x00000f00;
-	} else {
-		ctrl = (depth << 16) | (proto << 8);
-		if (mode->flags & DRM_MODE_FLAG_NHSYNC)
-			ctrl |= 0x00001000;
-		if (mode->flags & DRM_MODE_FLAG_NVSYNC)
-			ctrl |= 0x00002000;
-		mask = 0x000f3f00;
-	}
-
-	nv50_sor_ctrl(nv_encoder, mask | owner, ctrl | owner);
+	nv_encoder->update(nv_encoder, nv_crtc->index, mode, proto, depth);
 }
 
+static const struct drm_encoder_helper_funcs
+nv50_sor_help = {
+	.dpms = nv50_sor_dpms,
+	.atomic_check = nv50_outp_atomic_check,
+	.enable = nv50_sor_enable,
+	.disable = nv50_sor_disable,
+};
+
 static void
 nv50_sor_destroy(struct drm_encoder *encoder)
 {
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	nv50_mstm_del(&nv_encoder->dp.mstm);
 	drm_encoder_cleanup(encoder);
 	kfree(encoder);
 }
 
-static const struct drm_encoder_helper_funcs nv50_sor_hfunc = {
-	.dpms = nv50_sor_dpms,
-	.mode_fixup = nv50_encoder_mode_fixup,
-	.prepare = nv50_sor_disconnect,
-	.commit = nv50_sor_commit,
-	.mode_set = nv50_sor_mode_set,
-	.disable = nv50_sor_disconnect,
-	.get_crtc = nv50_display_crtc_get,
-};
-
-static const struct drm_encoder_funcs nv50_sor_func = {
+static const struct drm_encoder_funcs
+nv50_sor_func = {
 	.destroy = nv50_sor_destroy,
 };
 
 static int
 nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
+	struct nouveau_connector *nv_connector = nouveau_connector(connector);
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nouveau_encoder *nv_encoder;
 	struct drm_encoder *encoder;
-	int type;
+	int type, ret;
 
 	switch (dcbe->type) {
 	case DCB_OUTPUT_LVDS: type = DRM_MODE_ENCODER_LVDS; break;
@@ -2122,7 +3676,16 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 		return -ENOMEM;
 	nv_encoder->dcb = dcbe;
 	nv_encoder->or = ffs(dcbe->or) - 1;
-	nv_encoder->last_dpms = DRM_MODE_DPMS_OFF;
+	nv_encoder->update = nv50_sor_update;
+
+	encoder = to_drm_encoder(nv_encoder);
+	encoder->possible_crtcs = dcbe->heads;
+	encoder->possible_clones = 0;
+	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type,
+			 "sor-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_sor_help);
+
+	drm_mode_connector_attach_encoder(connector, encoder);
 
 	if (dcbe->type == DCB_OUTPUT_DP) {
 		struct nvkm_i2c_aux *aux =
@@ -2131,6 +3694,15 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 			nv_encoder->i2c = &aux->i2c;
 			nv_encoder->aux = aux;
 		}
+
+		/*TODO: Use DP Info Table to check for support. */
+		if (nv50_disp(encoder->dev)->disp->oclass >= GF110_DISP) {
+			ret = nv50_mstm_new(nv_encoder, &nv_connector->aux, 16,
+					    nv_connector->base.base.id,
+					    &nv_encoder->dp.mstm);
+			if (ret)
+				return ret;
+		}
 	} else {
 		struct nvkm_i2c_bus *bus =
 			nvkm_i2c_bus_find(i2c, dcbe->i2c_index);
@@ -2138,20 +3710,12 @@ nv50_sor_create(struct drm_connector *connector, struct dcb_output *dcbe)
 			nv_encoder->i2c = &bus->i2c;
 	}
 
-	encoder = to_drm_encoder(nv_encoder);
-	encoder->possible_crtcs = dcbe->heads;
-	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_sor_func, type, NULL);
-	drm_encoder_helper_add(encoder, &nv50_sor_hfunc);
-
-	drm_mode_connector_attach_encoder(connector, encoder);
 	return 0;
 }
 
 /******************************************************************************
  * PIOR
  *****************************************************************************/
-
 static void
 nv50_pior_dpms(struct drm_encoder *encoder, int mode)
 {
@@ -2172,30 +3736,48 @@ nv50_pior_dpms(struct drm_encoder *encoder, int mode)
 	nvif_mthd(disp->disp, 0, &args, sizeof(args));
 }
 
-static bool
-nv50_pior_mode_fixup(struct drm_encoder *encoder,
-		     const struct drm_display_mode *mode,
-		     struct drm_display_mode *adjusted_mode)
+static int
+nv50_pior_atomic_check(struct drm_encoder *encoder,
+		       struct drm_crtc_state *crtc_state,
+		       struct drm_connector_state *conn_state)
 {
-	if (!nv50_encoder_mode_fixup(encoder, mode, adjusted_mode))
-		return false;
-	adjusted_mode->clock *= 2;
-	return true;
+	int ret = nv50_outp_atomic_check(encoder, crtc_state, conn_state);
+	if (ret)
+		return ret;
+	crtc_state->adjusted_mode.clock *= 2;
+	return 0;
 }
 
 static void
-nv50_pior_commit(struct drm_encoder *encoder)
+nv50_pior_disable(struct drm_encoder *encoder)
 {
+	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
+	struct nv50_mast *mast = nv50_mast(encoder->dev);
+	const int or = nv_encoder->or;
+	u32 *push;
+
+	if (nv_encoder->crtc) {
+		push = evo_wait(mast, 4);
+		if (push) {
+			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
+				evo_mthd(push, 0x0700 + (or * 0x040), 1);
+				evo_data(push, 0x00000000);
+			}
+			evo_kick(push, mast);
+		}
+	}
+
+	nv_encoder->crtc = NULL;
 }
 
 static void
-nv50_pior_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
-		   struct drm_display_mode *adjusted_mode)
+nv50_pior_enable(struct drm_encoder *encoder)
 {
 	struct nv50_mast *mast = nv50_mast(encoder->dev);
 	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
 	struct nouveau_crtc *nv_crtc = nouveau_crtc(encoder->crtc);
 	struct nouveau_connector *nv_connector;
+	struct drm_display_mode *mode = &nv_crtc->base.state->adjusted_mode;
 	u8 owner = 1 << nv_crtc->index;
 	u8 proto, depth;
 	u32 *push;
@@ -2214,12 +3796,10 @@ nv50_pior_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 		proto = 0x0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 		break;
 	}
 
-	nv50_pior_dpms(encoder, DRM_MODE_DPMS_ON);
-
 	push = evo_wait(mast, 8);
 	if (push) {
 		if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
@@ -2238,29 +3818,13 @@ nv50_pior_mode_set(struct drm_encoder *encoder, struct drm_display_mode *mode,
 	nv_encoder->crtc = encoder->crtc;
 }
 
-static void
-nv50_pior_disconnect(struct drm_encoder *encoder)
-{
-	struct nouveau_encoder *nv_encoder = nouveau_encoder(encoder);
-	struct nv50_mast *mast = nv50_mast(encoder->dev);
-	const int or = nv_encoder->or;
-	u32 *push;
-
-	if (nv_encoder->crtc) {
-		nv50_crtc_prepare(nv_encoder->crtc);
-
-		push = evo_wait(mast, 4);
-		if (push) {
-			if (nv50_vers(mast) < GF110_DISP_CORE_CHANNEL_DMA) {
-				evo_mthd(push, 0x0700 + (or * 0x040), 1);
-				evo_data(push, 0x00000000);
-			}
-			evo_kick(push, mast);
-		}
-	}
-
-	nv_encoder->crtc = NULL;
-}
+static const struct drm_encoder_helper_funcs
+nv50_pior_help = {
+	.dpms = nv50_pior_dpms,
+	.atomic_check = nv50_pior_atomic_check,
+	.enable = nv50_pior_enable,
+	.disable = nv50_pior_disable,
+};
 
 static void
 nv50_pior_destroy(struct drm_encoder *encoder)
@@ -2269,17 +3833,8 @@ nv50_pior_destroy(struct drm_encoder *encoder)
 	kfree(encoder);
 }
 
-static const struct drm_encoder_helper_funcs nv50_pior_hfunc = {
-	.dpms = nv50_pior_dpms,
-	.mode_fixup = nv50_pior_mode_fixup,
-	.prepare = nv50_pior_disconnect,
-	.commit = nv50_pior_commit,
-	.mode_set = nv50_pior_mode_set,
-	.disable = nv50_pior_disconnect,
-	.get_crtc = nv50_display_crtc_get,
-};
-
-static const struct drm_encoder_funcs nv50_pior_func = {
+static const struct drm_encoder_funcs
+nv50_pior_func = {
 	.destroy = nv50_pior_destroy,
 };
 
@@ -2287,7 +3842,7 @@ static int
 nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 {
 	struct nouveau_drm *drm = nouveau_drm(connector->dev);
-	struct nvkm_i2c *i2c = nvxx_i2c(&drm->device);
+	struct nvkm_i2c *i2c = nvxx_i2c(&drm->client.device);
 	struct nvkm_i2c_bus *bus = NULL;
 	struct nvkm_i2c_aux *aux = NULL;
 	struct i2c_adapter *ddc;
@@ -2321,149 +3876,470 @@ nv50_pior_create(struct drm_connector *connector, struct dcb_output *dcbe)
 	encoder = to_drm_encoder(nv_encoder);
 	encoder->possible_crtcs = dcbe->heads;
 	encoder->possible_clones = 0;
-	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type, NULL);
-	drm_encoder_helper_add(encoder, &nv50_pior_hfunc);
+	drm_encoder_init(connector->dev, encoder, &nv50_pior_func, type,
+			 "pior-%04x-%04x", dcbe->hasht, dcbe->hashm);
+	drm_encoder_helper_add(encoder, &nv50_pior_help);
 
 	drm_mode_connector_attach_encoder(connector, encoder);
 	return 0;
 }
 
 /******************************************************************************
- * Framebuffer
+ * Atomic
  *****************************************************************************/
 
 static void
-nv50_fbdma_fini(struct nv50_fbdma *fbdma)
+nv50_disp_atomic_commit_core(struct nouveau_drm *drm, u32 interlock)
 {
-	int i;
-	for (i = 0; i < ARRAY_SIZE(fbdma->base); i++)
-		nvif_object_fini(&fbdma->base[i]);
-	nvif_object_fini(&fbdma->core);
-	list_del(&fbdma->head);
-	kfree(fbdma);
+	struct nv50_disp *disp = nv50_disp(drm->dev);
+	struct nv50_dmac *core = &disp->mast.base;
+	struct nv50_mstm *mstm;
+	struct drm_encoder *encoder;
+	u32 *push;
+
+	NV_ATOMIC(drm, "commit core %08x\n", interlock);
+
+	drm_for_each_encoder(encoder, drm->dev) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			mstm = nouveau_encoder(encoder)->dp.mstm;
+			if (mstm && mstm->modified)
+				nv50_mstm_prepare(mstm);
+		}
+	}
+
+	if ((push = evo_wait(core, 5))) {
+		evo_mthd(push, 0x0084, 1);
+		evo_data(push, 0x80000000);
+		evo_mthd(push, 0x0080, 2);
+		evo_data(push, interlock);
+		evo_data(push, 0x00000000);
+		nouveau_bo_wr32(disp->sync, 0, 0x00000000);
+		evo_kick(push, core);
+		if (nvif_msec(&drm->client.device, 2000ULL,
+			if (nouveau_bo_rd32(disp->sync, 0))
+				break;
+			usleep_range(1, 2);
+		) < 0)
+			NV_ERROR(drm, "EVO timeout\n");
+	}
+
+	drm_for_each_encoder(encoder, drm->dev) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			mstm = nouveau_encoder(encoder)->dp.mstm;
+			if (mstm && mstm->modified)
+				nv50_mstm_cleanup(mstm);
+		}
+	}
 }
 
-static int
-nv50_fbdma_init(struct drm_device *dev, u32 name, u64 offset, u64 length, u8 kind)
+static void
+nv50_disp_atomic_commit_tail(struct drm_atomic_state *state)
 {
+	struct drm_device *dev = state->dev;
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	struct drm_plane_state *plane_state;
+	struct drm_plane *plane;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nv50_disp *disp = nv50_disp(dev);
-	struct nv50_mast *mast = nv50_mast(dev);
-	struct __attribute__ ((packed)) {
-		struct nv_dma_v0 base;
-		union {
-			struct nv50_dma_v0 nv50;
-			struct gf100_dma_v0 gf100;
-			struct gf119_dma_v0 gf119;
-		};
-	} args = {};
-	struct nv50_fbdma *fbdma;
-	struct drm_crtc *crtc;
-	u32 size = sizeof(args.base);
-	int ret;
+	struct nv50_atom *atom = nv50_atom(state);
+	struct nv50_outp_atom *outp, *outt;
+	u32 interlock_core = 0;
+	u32 interlock_chan = 0;
+	int i;
+
+	NV_ATOMIC(drm, "commit %d %d\n", atom->lock_core, atom->flush_disable);
+	drm_atomic_helper_wait_for_fences(dev, state, false);
+	drm_atomic_helper_wait_for_dependencies(state);
+	drm_atomic_helper_update_legacy_modeset_state(dev, state);
+
+	if (atom->lock_core)
+		mutex_lock(&disp->mutex);
 
-	list_for_each_entry(fbdma, &disp->fbdma, head) {
-		if (fbdma->core.handle == name)
-			return 0;
+	/* Disable head(s). */
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct nv50_head_atom *asyh = nv50_head_atom(crtc->state);
+		struct nv50_head *head = nv50_head(crtc);
+
+		NV_ATOMIC(drm, "%s: clr %04x (set %04x)\n", crtc->name,
+			  asyh->clr.mask, asyh->set.mask);
+
+		if (asyh->clr.mask) {
+			nv50_head_flush_clr(head, asyh, atom->flush_disable);
+			interlock_core |= 1;
+		}
 	}
 
-	fbdma = kzalloc(sizeof(*fbdma), GFP_KERNEL);
-	if (!fbdma)
-		return -ENOMEM;
-	list_add(&fbdma->head, &disp->fbdma);
+	/* Disable plane(s). */
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
 
-	args.base.target = NV_DMA_V0_TARGET_VRAM;
-	args.base.access = NV_DMA_V0_ACCESS_RDWR;
-	args.base.start = offset;
-	args.base.limit = offset + length - 1;
+		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", plane->name,
+			  asyw->clr.mask, asyw->set.mask);
+		if (!asyw->clr.mask)
+			continue;
 
-	if (drm->device.info.chipset < 0x80) {
-		args.nv50.part = NV50_DMA_V0_PART_256;
-		size += sizeof(args.nv50);
-	} else
-	if (drm->device.info.chipset < 0xc0) {
-		args.nv50.part = NV50_DMA_V0_PART_256;
-		args.nv50.kind = kind;
-		size += sizeof(args.nv50);
-	} else
-	if (drm->device.info.chipset < 0xd0) {
-		args.gf100.kind = kind;
-		size += sizeof(args.gf100);
-	} else {
-		args.gf119.page = GF119_DMA_V0_PAGE_LP;
-		args.gf119.kind = kind;
-		size += sizeof(args.gf119);
+		interlock_chan |= nv50_wndw_flush_clr(wndw, interlock_core,
+						      atom->flush_disable,
+						      asyw);
+	}
+
+	/* Disable output path(s). */
+	list_for_each_entry(outp, &atom->outp, head) {
+		const struct drm_encoder_helper_funcs *help;
+		struct drm_encoder *encoder;
+
+		encoder = outp->encoder;
+		help = encoder->helper_private;
+
+		NV_ATOMIC(drm, "%s: clr %02x (set %02x)\n", encoder->name,
+			  outp->clr.mask, outp->set.mask);
+
+		if (outp->clr.mask) {
+			help->disable(encoder);
+			interlock_core |= 1;
+			if (outp->flush_disable) {
+				nv50_disp_atomic_commit_core(drm, interlock_chan);
+				interlock_core = 0;
+				interlock_chan = 0;
+			}
+		}
+	}
+
+	/* Flush disable. */
+	if (interlock_core) {
+		if (atom->flush_disable) {
+			nv50_disp_atomic_commit_core(drm, interlock_chan);
+			interlock_core = 0;
+			interlock_chan = 0;
+		}
+	}
+
+	/* Update output path(s). */
+	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
+		const struct drm_encoder_helper_funcs *help;
+		struct drm_encoder *encoder;
+
+		encoder = outp->encoder;
+		help = encoder->helper_private;
+
+		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", encoder->name,
+			  outp->set.mask, outp->clr.mask);
+
+		if (outp->set.mask) {
+			help->enable(encoder);
+			interlock_core = 1;
+		}
+
+		list_del(&outp->head);
+		kfree(outp);
 	}
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+	/* Update head(s). */
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		struct nv50_head_atom *asyh = nv50_head_atom(crtc->state);
 		struct nv50_head *head = nv50_head(crtc);
-		int ret = nvif_object_init(&head->sync.base.base.user, name,
-					   NV_DMA_IN_MEMORY, &args, size,
-					   &fbdma->base[head->base.index]);
-		if (ret) {
-			nv50_fbdma_fini(fbdma);
-			return ret;
+
+		NV_ATOMIC(drm, "%s: set %04x (clr %04x)\n", crtc->name,
+			  asyh->set.mask, asyh->clr.mask);
+
+		if (asyh->set.mask) {
+			nv50_head_flush_set(head, asyh);
+			interlock_core = 1;
 		}
 	}
 
-	ret = nvif_object_init(&mast->base.base.user, name, NV_DMA_IN_MEMORY,
-			       &args, size, &fbdma->core);
-	if (ret) {
-		nv50_fbdma_fini(fbdma);
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc->state->event)
+			drm_crtc_vblank_get(crtc);
+	}
+
+	/* Update plane(s). */
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+
+		NV_ATOMIC(drm, "%s: set %02x (clr %02x)\n", plane->name,
+			  asyw->set.mask, asyw->clr.mask);
+		if ( !asyw->set.mask &&
+		    (!asyw->clr.mask || atom->flush_disable))
+			continue;
+
+		interlock_chan |= nv50_wndw_flush_set(wndw, interlock_core, asyw);
+	}
+
+	/* Flush update. */
+	if (interlock_core) {
+		if (!interlock_chan && atom->state.legacy_cursor_update) {
+			u32 *push = evo_wait(&disp->mast, 2);
+			if (push) {
+				evo_mthd(push, 0x0080, 1);
+				evo_data(push, 0x00000000);
+				evo_kick(push, &disp->mast);
+			}
+		} else {
+			nv50_disp_atomic_commit_core(drm, interlock_chan);
+		}
+	}
+
+	if (atom->lock_core)
+		mutex_unlock(&disp->mutex);
+
+	/* Wait for HW to signal completion. */
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane->state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		int ret = nv50_wndw_wait_armed(wndw, asyw);
+		if (ret)
+			NV_ERROR(drm, "%s: timeout\n", plane->name);
+	}
+
+	for_each_crtc_in_state(state, crtc, crtc_state, i) {
+		if (crtc->state->event) {
+			unsigned long flags;
+			/* Get correct count/ts if racing with vblank irq */
+			drm_accurate_vblank_count(crtc);
+			spin_lock_irqsave(&crtc->dev->event_lock, flags);
+			drm_crtc_send_vblank_event(crtc, crtc->state->event);
+			spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
+			crtc->state->event = NULL;
+			drm_crtc_vblank_put(crtc);
+		}
+	}
+
+	drm_atomic_helper_commit_hw_done(state);
+	drm_atomic_helper_cleanup_planes(dev, state);
+	drm_atomic_helper_commit_cleanup_done(state);
+	drm_atomic_state_put(state);
+}
+
+static void
+nv50_disp_atomic_commit_work(struct work_struct *work)
+{
+	struct drm_atomic_state *state =
+		container_of(work, typeof(*state), commit_work);
+	nv50_disp_atomic_commit_tail(state);
+}
+
+static int
+nv50_disp_atomic_commit(struct drm_device *dev,
+			struct drm_atomic_state *state, bool nonblock)
+{
+	struct nouveau_drm *drm = nouveau_drm(dev);
+	struct nv50_disp *disp = nv50_disp(dev);
+	struct drm_plane_state *plane_state;
+	struct drm_plane *plane;
+	struct drm_crtc *crtc;
+	bool active = false;
+	int ret, i;
+
+	ret = pm_runtime_get_sync(dev->dev);
+	if (ret < 0 && ret != -EACCES)
 		return ret;
+
+	ret = drm_atomic_helper_setup_commit(state, nonblock);
+	if (ret)
+		goto done;
+
+	INIT_WORK(&state->commit_work, nv50_disp_atomic_commit_work);
+
+	ret = drm_atomic_helper_prepare_planes(dev, state);
+	if (ret)
+		goto done;
+
+	if (!nonblock) {
+		ret = drm_atomic_helper_wait_for_fences(dev, state, true);
+		if (ret)
+			goto done;
+	}
+
+	for_each_plane_in_state(state, plane, plane_state, i) {
+		struct nv50_wndw_atom *asyw = nv50_wndw_atom(plane_state);
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (asyw->set.image) {
+			asyw->ntfy.handle = wndw->dmac->sync.handle;
+			asyw->ntfy.offset = wndw->ntfy;
+			asyw->ntfy.awaken = false;
+			asyw->set.ntfy = true;
+			nouveau_bo_wr32(disp->sync, wndw->ntfy / 4, 0x00000000);
+			wndw->ntfy ^= 0x10;
+		}
+	}
+
+	drm_atomic_helper_swap_state(state, true);
+	drm_atomic_state_get(state);
+
+	if (nonblock)
+		queue_work(system_unbound_wq, &state->commit_work);
+	else
+		nv50_disp_atomic_commit_tail(state);
+
+	drm_for_each_crtc(crtc, dev) {
+		if (crtc->state->enable) {
+			if (!drm->have_disp_power_ref) {
+				drm->have_disp_power_ref = true;
+				return ret;
+			}
+			active = true;
+			break;
+		}
+	}
+
+	if (!active && drm->have_disp_power_ref) {
+		pm_runtime_put_autosuspend(dev->dev);
+		drm->have_disp_power_ref = false;
+	}
+
+done:
+	pm_runtime_put_autosuspend(dev->dev);
+	return ret;
+}
+
+static struct nv50_outp_atom *
+nv50_disp_outp_atomic_add(struct nv50_atom *atom, struct drm_encoder *encoder)
+{
+	struct nv50_outp_atom *outp;
+
+	list_for_each_entry(outp, &atom->outp, head) {
+		if (outp->encoder == encoder)
+			return outp;
+	}
+
+	outp = kzalloc(sizeof(*outp), GFP_KERNEL);
+	if (!outp)
+		return ERR_PTR(-ENOMEM);
+
+	list_add(&outp->head, &atom->outp);
+	outp->encoder = encoder;
+	return outp;
+}
+
+static int
+nv50_disp_outp_atomic_check_clr(struct nv50_atom *atom,
+				struct drm_connector *connector)
+{
+	struct drm_encoder *encoder = connector->state->best_encoder;
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	struct nv50_outp_atom *outp;
+
+	if (!(crtc = connector->state->crtc))
+		return 0;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(&atom->state, crtc);
+	if (crtc->state->active && drm_atomic_crtc_needs_modeset(crtc_state)) {
+		outp = nv50_disp_outp_atomic_add(atom, encoder);
+		if (IS_ERR(outp))
+			return PTR_ERR(outp);
+
+		if (outp->encoder->encoder_type == DRM_MODE_ENCODER_DPMST) {
+			outp->flush_disable = true;
+			atom->flush_disable = true;
+		}
+		outp->clr.ctrl = true;
+		atom->lock_core = true;
 	}
 
 	return 0;
 }
 
-static void
-nv50_fb_dtor(struct drm_framebuffer *fb)
+static int
+nv50_disp_outp_atomic_check_set(struct nv50_atom *atom,
+				struct drm_connector_state *connector_state)
 {
+	struct drm_encoder *encoder = connector_state->best_encoder;
+	struct drm_crtc_state *crtc_state;
+	struct drm_crtc *crtc;
+	struct nv50_outp_atom *outp;
+
+	if (!(crtc = connector_state->crtc))
+		return 0;
+
+	crtc_state = drm_atomic_get_existing_crtc_state(&atom->state, crtc);
+	if (crtc_state->active && drm_atomic_crtc_needs_modeset(crtc_state)) {
+		outp = nv50_disp_outp_atomic_add(atom, encoder);
+		if (IS_ERR(outp))
+			return PTR_ERR(outp);
+
+		outp->set.ctrl = true;
+		atom->lock_core = true;
+	}
+
+	return 0;
 }
 
 static int
-nv50_fb_ctor(struct drm_framebuffer *fb)
-{
-	struct nouveau_framebuffer *nv_fb = nouveau_framebuffer(fb);
-	struct nouveau_drm *drm = nouveau_drm(fb->dev);
-	struct nouveau_bo *nvbo = nv_fb->nvbo;
-	struct nv50_disp *disp = nv50_disp(fb->dev);
-	u8 kind = nouveau_bo_tile_layout(nvbo) >> 8;
-	u8 tile = nvbo->tile_mode;
-
-	if (drm->device.info.chipset >= 0xc0)
-		tile >>= 4; /* yep.. */
-
-	switch (fb->depth) {
-	case  8: nv_fb->r_format = 0x1e00; break;
-	case 15: nv_fb->r_format = 0xe900; break;
-	case 16: nv_fb->r_format = 0xe800; break;
-	case 24:
-	case 32: nv_fb->r_format = 0xcf00; break;
-	case 30: nv_fb->r_format = 0xd100; break;
-	default:
-		 NV_ERROR(drm, "unknown depth %d\n", fb->depth);
-		 return -EINVAL;
+nv50_disp_atomic_check(struct drm_device *dev, struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	struct drm_connector_state *connector_state;
+	struct drm_connector *connector;
+	int ret, i;
+
+	ret = drm_atomic_helper_check(dev, state);
+	if (ret)
+		return ret;
+
+	for_each_connector_in_state(state, connector, connector_state, i) {
+		ret = nv50_disp_outp_atomic_check_clr(atom, connector);
+		if (ret)
+			return ret;
+
+		ret = nv50_disp_outp_atomic_check_set(atom, connector_state);
+		if (ret)
+			return ret;
 	}
 
-	if (disp->disp->oclass < G82_DISP) {
-		nv_fb->r_pitch   = kind ? (((fb->pitches[0] / 4) << 4) | tile) :
-					    (fb->pitches[0] | 0x00100000);
-		nv_fb->r_format |= kind << 16;
-	} else
-	if (disp->disp->oclass < GF110_DISP) {
-		nv_fb->r_pitch  = kind ? (((fb->pitches[0] / 4) << 4) | tile) :
-					   (fb->pitches[0] | 0x00100000);
-	} else {
-		nv_fb->r_pitch  = kind ? (((fb->pitches[0] / 4) << 4) | tile) :
-					   (fb->pitches[0] | 0x01000000);
+	return 0;
+}
+
+static void
+nv50_disp_atomic_state_clear(struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	struct nv50_outp_atom *outp, *outt;
+
+	list_for_each_entry_safe(outp, outt, &atom->outp, head) {
+		list_del(&outp->head);
+		kfree(outp);
 	}
-	nv_fb->r_handle = 0xffff0000 | kind;
 
-	return nv50_fbdma_init(fb->dev, nv_fb->r_handle, 0,
-			       drm->device.info.ram_user, kind);
+	drm_atomic_state_default_clear(state);
+}
+
+static void
+nv50_disp_atomic_state_free(struct drm_atomic_state *state)
+{
+	struct nv50_atom *atom = nv50_atom(state);
+	drm_atomic_state_default_release(&atom->state);
+	kfree(atom);
 }
 
+static struct drm_atomic_state *
+nv50_disp_atomic_state_alloc(struct drm_device *dev)
+{
+	struct nv50_atom *atom;
+	if (!(atom = kzalloc(sizeof(*atom), GFP_KERNEL)) ||
+	    drm_atomic_state_init(dev, &atom->state) < 0) {
+		kfree(atom);
+		return NULL;
+	}
+	INIT_LIST_HEAD(&atom->outp);
+	return &atom->state;
+}
+
+static const struct drm_mode_config_funcs
+nv50_disp_func = {
+	.fb_create = nouveau_user_framebuffer_create,
+	.output_poll_changed = nouveau_fbcon_output_poll_changed,
+	.atomic_check = nv50_disp_atomic_check,
+	.atomic_commit = nv50_disp_atomic_commit,
+	.atomic_state_alloc = nv50_disp_atomic_state_alloc,
+	.atomic_state_clear = nv50_disp_atomic_state_clear,
+	.atomic_state_free = nv50_disp_atomic_state_free,
+};
+
 /******************************************************************************
  * Init
  *****************************************************************************/
@@ -2471,12 +4347,30 @@ nv50_fb_ctor(struct drm_framebuffer *fb)
 void
 nv50_display_fini(struct drm_device *dev)
 {
+	struct nouveau_encoder *nv_encoder;
+	struct drm_encoder *encoder;
+	struct drm_plane *plane;
+
+	drm_for_each_plane(plane, dev) {
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (plane->funcs != &nv50_wndw)
+			continue;
+		nv50_wndw_fini(wndw);
+	}
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			nv_encoder = nouveau_encoder(encoder);
+			nv50_mstm_fini(nv_encoder->dp.mstm);
+		}
+	}
 }
 
 int
 nv50_display_init(struct drm_device *dev)
 {
-	struct nv50_disp *disp = nv50_disp(dev);
+	struct drm_encoder *encoder;
+	struct drm_plane *plane;
 	struct drm_crtc *crtc;
 	u32 *push;
 
@@ -2484,16 +4378,35 @@ nv50_display_init(struct drm_device *dev)
 	if (!push)
 		return -EBUSY;
 
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct nv50_sync *sync = nv50_sync(crtc);
-
-		nv50_crtc_lut_load(crtc);
-		nouveau_bo_wr32(disp->sync, sync->addr / 4, sync->data);
-	}
-
 	evo_mthd(push, 0x0088, 1);
 	evo_data(push, nv50_mast(dev)->base.sync.handle);
 	evo_kick(push, nv50_mast(dev));
+
+	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
+		if (encoder->encoder_type != DRM_MODE_ENCODER_DPMST) {
+			const struct drm_encoder_helper_funcs *help;
+			struct nouveau_encoder *nv_encoder;
+
+			nv_encoder = nouveau_encoder(encoder);
+			help = encoder->helper_private;
+			if (help && help->dpms)
+				help->dpms(encoder, DRM_MODE_DPMS_ON);
+
+			nv50_mstm_init(nv_encoder->dp.mstm);
+		}
+	}
+
+	drm_for_each_crtc(crtc, dev) {
+		nv50_head_lut_load(crtc);
+	}
+
+	drm_for_each_plane(plane, dev) {
+		struct nv50_wndw *wndw = nv50_wndw(plane);
+		if (plane->funcs != &nv50_wndw)
+			continue;
+		nv50_wndw_init(wndw);
+	}
+
 	return 0;
 }
 
@@ -2501,11 +4414,6 @@ void
 nv50_display_destroy(struct drm_device *dev)
 {
 	struct nv50_disp *disp = nv50_disp(dev);
-	struct nv50_fbdma *fbdma, *fbtmp;
-
-	list_for_each_entry_safe(fbdma, fbtmp, &disp->fbdma, head) {
-		nv50_fbdma_fini(fbdma);
-	}
 
 	nv50_dmac_destroy(&disp->mast.base, disp->disp);
 
@@ -2518,10 +4426,14 @@ nv50_display_destroy(struct drm_device *dev)
 	kfree(disp);
 }
 
+MODULE_PARM_DESC(atomic, "Expose atomic ioctl (default: disabled)");
+static int nouveau_atomic = 0;
+module_param_named(atomic, nouveau_atomic, int, 0400);
+
 int
 nv50_display_create(struct drm_device *dev)
 {
-	struct nvif_device *device = &nouveau_drm(dev)->device;
+	struct nvif_device *device = &nouveau_drm(dev)->client.device;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct dcb_table *dcb = &drm->vbios.dcb;
 	struct drm_connector *connector, *tmp;
@@ -2532,18 +4444,20 @@ nv50_display_create(struct drm_device *dev)
 	disp = kzalloc(sizeof(*disp), GFP_KERNEL);
 	if (!disp)
 		return -ENOMEM;
-	INIT_LIST_HEAD(&disp->fbdma);
+
+	mutex_init(&disp->mutex);
 
 	nouveau_display(dev)->priv = disp;
 	nouveau_display(dev)->dtor = nv50_display_destroy;
 	nouveau_display(dev)->init = nv50_display_init;
 	nouveau_display(dev)->fini = nv50_display_fini;
-	nouveau_display(dev)->fb_ctor = nv50_fb_ctor;
-	nouveau_display(dev)->fb_dtor = nv50_fb_dtor;
 	disp->disp = &nouveau_display(dev)->disp;
+	dev->mode_config.funcs = &nv50_disp_func;
+	if (nouveau_atomic)
+		dev->driver->driver_features |= DRIVER_ATOMIC;
 
 	/* small shared memory area we use for notifiers and semaphores */
-	ret = nouveau_bo_new(dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &disp->sync);
 	if (!ret) {
 		ret = nouveau_bo_pin(disp->sync, TTM_PL_FLAG_VRAM, true);
@@ -2572,7 +4486,7 @@ nv50_display_create(struct drm_device *dev)
 		crtcs = 2;
 
 	for (i = 0; i < crtcs; i++) {
-		ret = nv50_crtc_create(dev, i);
+		ret = nv50_head_create(dev, i);
 		if (ret)
 			goto out;
 	}
diff --git a/drivers/gpu/drm/nouveau/nv50_display.h b/drivers/gpu/drm/nouveau/nv50_display.h
index 70da347aa8c5..918187cee84b 100644
--- a/drivers/gpu/drm/nouveau/nv50_display.h
+++ b/drivers/gpu/drm/nouveau/nv50_display.h
@@ -35,11 +35,4 @@ int  nv50_display_create(struct drm_device *);
 void nv50_display_destroy(struct drm_device *);
 int  nv50_display_init(struct drm_device *);
 void nv50_display_fini(struct drm_device *);
-
-void nv50_display_flip_stop(struct drm_crtc *);
-int  nv50_display_flip_next(struct drm_crtc *, struct drm_framebuffer *,
-			    struct nouveau_channel *, u32 swap_interval);
-
-struct nouveau_bo *nv50_display_crtc_sema(struct drm_device *, int head);
-
 #endif /* __NV50_DISPLAY_H__ */
diff --git a/drivers/gpu/drm/nouveau/nv50_fbcon.c b/drivers/gpu/drm/nouveau/nv50_fbcon.c
index af3d3c49411a..327dcd7901ed 100644
--- a/drivers/gpu/drm/nouveau/nv50_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nv50_fbcon.c
@@ -30,7 +30,7 @@ int
 nv50_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -65,7 +65,7 @@ int
 nv50_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -93,7 +93,7 @@ int
 nv50_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
@@ -148,8 +148,8 @@ int
 nv50_fbcon_accel_init(struct fb_info *info)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_framebuffer *fb = &nfbdev->nouveau_fb;
-	struct drm_device *dev = nfbdev->dev;
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(nfbdev->helper.fb);
+	struct drm_device *dev = nfbdev->helper.dev;
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret, format;
diff --git a/drivers/gpu/drm/nouveau/nv50_fence.c b/drivers/gpu/drm/nouveau/nv50_fence.c
index 4d6f202b7770..a369d978e267 100644
--- a/drivers/gpu/drm/nouveau/nv50_fence.c
+++ b/drivers/gpu/drm/nouveau/nv50_fence.c
@@ -35,13 +35,12 @@
 static int
 nv50_fence_context_new(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->drm->dev;
 	struct nv10_fence_priv *priv = chan->drm->fence;
 	struct nv10_fence_chan *fctx;
-	struct ttm_mem_reg *mem = &priv->bo->bo.mem;
-	u32 start = mem->start * PAGE_SIZE;
-	u32 limit = start + mem->size - 1;
-	int ret, i;
+	struct ttm_mem_reg *reg = &priv->bo->bo.mem;
+	u32 start = reg->start * PAGE_SIZE;
+	u32 limit = start + reg->size - 1;
+	int ret;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
 	if (!fctx)
@@ -60,23 +59,6 @@ nv50_fence_context_new(struct nouveau_channel *chan)
 					.limit = limit,
 			       }, sizeof(struct nv_dma_v0),
 			       &fctx->sema);
-
-	/* dma objects for display sync channel semaphore blocks */
-	for (i = 0; !ret && i < dev->mode_config.num_crtc; i++) {
-		struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i);
-		u32 start = bo->bo.mem.start * PAGE_SIZE;
-		u32 limit = start + bo->bo.mem.size - 1;
-
-		ret = nvif_object_init(&chan->user, NvEvoSema0 + i,
-				       NV_DMA_IN_MEMORY, &(struct nv_dma_v0) {
-						.target = NV_DMA_V0_TARGET_VRAM,
-						.access = NV_DMA_V0_ACCESS_RDWR,
-						.start = start,
-						.limit = limit,
-				       }, sizeof(struct nv_dma_v0),
-				       &fctx->head[i]);
-	}
-
 	if (ret)
 		nv10_fence_context_del(chan);
 	return ret;
@@ -97,10 +79,10 @@ nv50_fence_create(struct nouveau_drm *drm)
 	priv->base.context_new = nv50_fence_context_new;
 	priv->base.context_del = nv10_fence_context_del;
 	priv->base.contexts = 127;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	spin_lock_init(&priv->lock);
 
-	ret = nouveau_bo_new(drm->dev, 4096, 0x1000, TTM_PL_FLAG_VRAM,
+	ret = nouveau_bo_new(&drm->client, 4096, 0x1000, TTM_PL_FLAG_VRAM,
 			     0, 0x0000, NULL, NULL, &priv->bo);
 	if (!ret) {
 		ret = nouveau_bo_pin(priv->bo, TTM_PL_FLAG_VRAM, false);
diff --git a/drivers/gpu/drm/nouveau/nv84_fence.c b/drivers/gpu/drm/nouveau/nv84_fence.c
index 18bde9d8e6d6..bd7a8a1e4ad9 100644
--- a/drivers/gpu/drm/nouveau/nv84_fence.c
+++ b/drivers/gpu/drm/nouveau/nv84_fence.c
@@ -28,13 +28,6 @@
 
 #include "nv50_display.h"
 
-u64
-nv84_fence_crtc(struct nouveau_channel *chan, int crtc)
-{
-	struct nv84_fence_chan *fctx = chan->fence;
-	return fctx->dispc_vma[crtc].offset;
-}
-
 static int
 nv84_fence_emit32(struct nouveau_channel *chan, u64 virtual, u32 sequence)
 {
@@ -110,19 +103,14 @@ nv84_fence_read(struct nouveau_channel *chan)
 static void
 nv84_fence_context_del(struct nouveau_channel *chan)
 {
-	struct drm_device *dev = chan->drm->dev;
 	struct nv84_fence_priv *priv = chan->drm->fence;
 	struct nv84_fence_chan *fctx = chan->fence;
-	int i;
-
-	for (i = 0; i < dev->mode_config.num_crtc; i++) {
-		struct nouveau_bo *bo = nv50_display_crtc_sema(dev, i);
-		nouveau_bo_vma_del(bo, &fctx->dispc_vma[i]);
-	}
 
 	nouveau_bo_wr32(priv->bo, chan->chid * 16 / 4, fctx->base.sequence);
+	mutex_lock(&priv->mutex);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma_gart);
 	nouveau_bo_vma_del(priv->bo, &fctx->vma);
+	mutex_unlock(&priv->mutex);
 	nouveau_fence_context_del(&fctx->base);
 	chan->fence = NULL;
 	nouveau_fence_context_free(&fctx->base);
@@ -134,7 +122,7 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	struct nouveau_cli *cli = (void *)chan->user.client;
 	struct nv84_fence_priv *priv = chan->drm->fence;
 	struct nv84_fence_chan *fctx;
-	int ret, i;
+	int ret;
 
 	fctx = chan->fence = kzalloc(sizeof(*fctx), GFP_KERNEL);
 	if (!fctx)
@@ -148,17 +136,13 @@ nv84_fence_context_new(struct nouveau_channel *chan)
 	fctx->base.sync32 = nv84_fence_sync32;
 	fctx->base.sequence = nv84_fence_read(chan);
 
+	mutex_lock(&priv->mutex);
 	ret = nouveau_bo_vma_add(priv->bo, cli->vm, &fctx->vma);
 	if (ret == 0) {
 		ret = nouveau_bo_vma_add(priv->bo_gart, cli->vm,
 					&fctx->vma_gart);
 	}
-
-	/* map display semaphore buffers into channel's vm */
-	for (i = 0; !ret && i < chan->drm->dev->mode_config.num_crtc; i++) {
-		struct nouveau_bo *bo = nv50_display_crtc_sema(chan->drm->dev, i);
-		ret = nouveau_bo_vma_add(bo, cli->vm, &fctx->dispc_vma[i]);
-	}
+	mutex_unlock(&priv->mutex);
 
 	if (ret)
 		nv84_fence_context_del(chan);
@@ -213,7 +197,7 @@ nv84_fence_destroy(struct nouveau_drm *drm)
 int
 nv84_fence_create(struct nouveau_drm *drm)
 {
-	struct nvkm_fifo *fifo = nvxx_fifo(&drm->device);
+	struct nvkm_fifo *fifo = nvxx_fifo(&drm->client.device);
 	struct nv84_fence_priv *priv;
 	u32 domain;
 	int ret;
@@ -229,18 +213,20 @@ nv84_fence_create(struct nouveau_drm *drm)
 	priv->base.context_del = nv84_fence_context_del;
 
 	priv->base.contexts = fifo->nr;
-	priv->base.context_base = fence_context_alloc(priv->base.contexts);
+	priv->base.context_base = dma_fence_context_alloc(priv->base.contexts);
 	priv->base.uevent = true;
 
+	mutex_init(&priv->mutex);
+
 	/* Use VRAM if there is any ; otherwise fallback to system memory */
-	domain = drm->device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
+	domain = drm->client.device.info.ram_size != 0 ? TTM_PL_FLAG_VRAM :
 			 /*
 			  * fences created in sysmem must be non-cached or we
 			  * will lose CPU/GPU coherency!
 			  */
 			 TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED;
-	ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0, domain, 0,
-			     0, NULL, NULL, &priv->bo);
+	ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
+			     domain, 0, 0, NULL, NULL, &priv->bo);
 	if (ret == 0) {
 		ret = nouveau_bo_pin(priv->bo, domain, false);
 		if (ret == 0) {
@@ -253,7 +239,7 @@ nv84_fence_create(struct nouveau_drm *drm)
 	}
 
 	if (ret == 0)
-		ret = nouveau_bo_new(drm->dev, 16 * priv->base.contexts, 0,
+		ret = nouveau_bo_new(&drm->client, 16 * priv->base.contexts, 0,
 				     TTM_PL_FLAG_TT | TTM_PL_FLAG_UNCACHED, 0,
 				     0, NULL, NULL, &priv->bo_gart);
 	if (ret == 0) {
diff --git a/drivers/gpu/drm/nouveau/nvc0_fbcon.c b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
index 054b6a056d99..90f27bfa381f 100644
--- a/drivers/gpu/drm/nouveau/nvc0_fbcon.c
+++ b/drivers/gpu/drm/nouveau/nvc0_fbcon.c
@@ -30,7 +30,7 @@ int
 nvc0_fbcon_fillrect(struct fb_info *info, const struct fb_fillrect *rect)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -65,7 +65,7 @@ int
 nvc0_fbcon_copyarea(struct fb_info *info, const struct fb_copyarea *region)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret;
 
@@ -93,7 +93,7 @@ int
 nvc0_fbcon_imageblit(struct fb_info *info, const struct fb_image *image)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct nouveau_drm *drm = nouveau_drm(nfbdev->dev);
+	struct nouveau_drm *drm = nouveau_drm(nfbdev->helper.dev);
 	struct nouveau_channel *chan = drm->channel;
 	uint32_t dwords, *data = (uint32_t *)image->data;
 	uint32_t mask = ~(~0 >> (32 - info->var.bits_per_pixel));
@@ -148,8 +148,8 @@ int
 nvc0_fbcon_accel_init(struct fb_info *info)
 {
 	struct nouveau_fbdev *nfbdev = info->par;
-	struct drm_device *dev = nfbdev->dev;
-	struct nouveau_framebuffer *fb = &nfbdev->nouveau_fb;
+	struct drm_device *dev = nfbdev->helper.dev;
+	struct nouveau_framebuffer *fb = nouveau_framebuffer(nfbdev->helper.fb);
 	struct nouveau_drm *drm = nouveau_drm(dev);
 	struct nouveau_channel *chan = drm->channel;
 	int ret, format;
diff --git a/drivers/gpu/drm/nouveau/nvif/Kbuild b/drivers/gpu/drm/nouveau/nvif/Kbuild
index ff8ed3a04d06..067b5e9f5ec1 100644
--- a/drivers/gpu/drm/nouveau/nvif/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvif/Kbuild
@@ -1,4 +1,5 @@
 nvif-y := nvif/object.o
 nvif-y += nvif/client.o
 nvif-y += nvif/device.o
+nvif-y += nvif/driver.o
 nvif-y += nvif/notify.o
diff --git a/drivers/gpu/drm/nouveau/nvif/client.c b/drivers/gpu/drm/nouveau/nvif/client.c
index 1ee9294eca2e..12db54965c20 100644
--- a/drivers/gpu/drm/nouveau/nvif/client.c
+++ b/drivers/gpu/drm/nouveau/nvif/client.c
@@ -26,6 +26,9 @@
 #include <nvif/driver.h>
 #include <nvif/ioctl.h>
 
+#include <nvif/class.h>
+#include <nvif/if0000.h>
+
 int
 nvif_client_ioctl(struct nvif_client *client, void *data, u32 size)
 {
@@ -47,37 +50,29 @@ nvif_client_resume(struct nvif_client *client)
 void
 nvif_client_fini(struct nvif_client *client)
 {
+	nvif_object_fini(&client->object);
 	if (client->driver) {
-		client->driver->fini(client->object.priv);
+		if (client->driver->fini)
+			client->driver->fini(client->object.priv);
 		client->driver = NULL;
-		client->object.client = NULL;
-		nvif_object_fini(&client->object);
 	}
 }
 
-const struct nvif_driver *
-nvif_drivers[] = {
-#ifdef __KERNEL__
-	&nvif_driver_nvkm,
-#else
-	&nvif_driver_drm,
-	&nvif_driver_lib,
-	&nvif_driver_null,
-#endif
-	NULL
-};
-
 int
-nvif_client_init(const char *driver, const char *name, u64 device,
-		 const char *cfg, const char *dbg, struct nvif_client *client)
+nvif_client_init(struct nvif_client *parent, const char *name, u64 device,
+		 struct nvif_client *client)
 {
+	struct nvif_client_v0 args = { .device = device };
 	struct {
 		struct nvif_ioctl_v0 ioctl;
 		struct nvif_ioctl_nop_v0 nop;
-	} args = {};
-	int ret, i;
+	} nop = {};
+	int ret;
 
-	ret = nvif_object_init(NULL, 0, 0, NULL, 0, &client->object);
+	strncpy(args.name, name, sizeof(args.name));
+	ret = nvif_object_init(parent != client ? &parent->object : NULL,
+			       0, NVIF_CLASS_CLIENT, &args, sizeof(args),
+			       &client->object);
 	if (ret)
 		return ret;
 
@@ -85,19 +80,11 @@ nvif_client_init(const char *driver, const char *name, u64 device,
 	client->object.handle = ~0;
 	client->route = NVIF_IOCTL_V0_ROUTE_NVIF;
 	client->super = true;
-
-	for (i = 0, ret = -EINVAL; (client->driver = nvif_drivers[i]); i++) {
-		if (!driver || !strcmp(client->driver->name, driver)) {
-			ret = client->driver->init(name, device, cfg, dbg,
-						  &client->object.priv);
-			if (!ret || driver)
-				break;
-		}
-	}
+	client->driver = parent->driver;
 
 	if (ret == 0) {
-		ret = nvif_client_ioctl(client, &args, sizeof(args));
-		client->version = args.nop.version;
+		ret = nvif_client_ioctl(client, &nop, sizeof(nop));
+		client->version = nop.nop.version;
 	}
 
 	if (ret)
diff --git a/drivers/gpu/drm/nouveau/nvif/driver.c b/drivers/gpu/drm/nouveau/nvif/driver.c
new file mode 100644
index 000000000000..701330956e33
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvif/driver.c
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs
+ */
+#include <nvif/driver.h>
+#include <nvif/client.h>
+
+static const struct nvif_driver *
+nvif_driver[] = {
+#ifdef __KERNEL__
+	&nvif_driver_nvkm,
+#else
+	&nvif_driver_drm,
+	&nvif_driver_lib,
+	&nvif_driver_null,
+#endif
+	NULL
+};
+
+int
+nvif_driver_init(const char *drv, const char *cfg, const char *dbg,
+		 const char *name, u64 device, struct nvif_client *client)
+{
+	int ret = -EINVAL, i;
+
+	for (i = 0; (client->driver = nvif_driver[i]); i++) {
+		if (!drv || !strcmp(client->driver->name, drv)) {
+			ret = client->driver->init(name, device, cfg, dbg,
+						   &client->object.priv);
+			if (ret == 0)
+				break;
+			client->driver->fini(client->object.priv);
+		}
+	}
+
+	if (ret == 0)
+		ret = nvif_client_init(client, name, device, client);
+	return ret;
+}
diff --git a/drivers/gpu/drm/nouveau/nvif/notify.c b/drivers/gpu/drm/nouveau/nvif/notify.c
index b0787ff833ef..278b3933dc96 100644
--- a/drivers/gpu/drm/nouveau/nvif/notify.c
+++ b/drivers/gpu/drm/nouveau/nvif/notify.c
@@ -155,10 +155,8 @@ nvif_notify_fini(struct nvif_notify *notify)
 	int ret = nvif_notify_put(notify);
 	if (ret >= 0 && object) {
 		ret = nvif_object_ioctl(object, &args, sizeof(args), NULL);
-		if (ret == 0) {
-			notify->object = NULL;
-			kfree((void *)notify->data);
-		}
+		notify->object = NULL;
+		kfree((void *)notify->data);
 	}
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/Kbuild b/drivers/gpu/drm/nouveau/nvkm/Kbuild
index 2832147b676c..e664378f6eda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/Kbuild
@@ -1,3 +1,4 @@
 include $(src)/nvkm/core/Kbuild
+include $(src)/nvkm/falcon/Kbuild
 include $(src)/nvkm/subdev/Kbuild
 include $(src)/nvkm/engine/Kbuild
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/client.c b/drivers/gpu/drm/nouveau/nvkm/core/client.c
index e1943910858e..0d3a896892b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/client.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/client.c
@@ -31,6 +31,43 @@
 #include <nvif/if0000.h>
 #include <nvif/unpack.h>
 
+static int
+nvkm_uclient_new(const struct nvkm_oclass *oclass, void *argv, u32 argc,
+		 struct nvkm_object **pobject)
+{
+	union {
+		struct nvif_client_v0 v0;
+	} *args = argv;
+	struct nvkm_client *client;
+	int ret = -ENOSYS;
+
+	if (!(ret = nvif_unpack(ret, &argv, &argc, args->v0, 0, 0, false))){
+		args->v0.name[sizeof(args->v0.name) - 1] = 0;
+		ret = nvkm_client_new(args->v0.name, args->v0.device, NULL,
+				      NULL, oclass->client->ntfy, &client);
+		if (ret)
+			return ret;
+	} else
+		return ret;
+
+	client->object.client = oclass->client;
+	client->object.handle = oclass->handle;
+	client->object.route  = oclass->route;
+	client->object.token  = oclass->token;
+	client->object.object = oclass->object;
+	client->debug = oclass->client->debug;
+	*pobject = &client->object;
+	return 0;
+}
+
+const struct nvkm_sclass
+nvkm_uclient_sclass = {
+	.oclass = NVIF_CLASS_CLIENT,
+	.minver = 0,
+	.maxver = 0,
+	.ctor = nvkm_uclient_new,
+};
+
 struct nvkm_client_notify {
 	struct nvkm_client *client;
 	struct nvkm_notify n;
@@ -138,17 +175,30 @@ nvkm_client_notify_new(struct nvkm_object *object,
 	return ret;
 }
 
+static const struct nvkm_object_func nvkm_client;
+struct nvkm_client *
+nvkm_client_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_client);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_client(object);
+}
+
 static int
-nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
+nvkm_client_mthd_devlist(struct nvkm_client *client, void *data, u32 size)
 {
 	union {
-		struct nv_client_devlist_v0 v0;
+		struct nvif_client_devlist_v0 v0;
 	} *args = data;
 	int ret = -ENOSYS;
 
-	nvif_ioctl(object, "client devlist size %d\n", size);
+	nvif_ioctl(&client->object, "client devlist size %d\n", size);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
-		nvif_ioctl(object, "client devlist vers %d count %d\n",
+		nvif_ioctl(&client->object, "client devlist vers %d count %d\n",
 			   args->v0.version, args->v0.count);
 		if (size == sizeof(args->v0.device[0]) * args->v0.count) {
 			ret = nvkm_device_list(args->v0.device, args->v0.count);
@@ -167,9 +217,10 @@ nvkm_client_mthd_devlist(struct nvkm_object *object, void *data, u32 size)
 static int
 nvkm_client_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
+	struct nvkm_client *client = nvkm_client(object);
 	switch (mthd) {
-	case NV_CLIENT_DEVLIST:
-		return nvkm_client_mthd_devlist(object, data, size);
+	case NVIF_CLIENT_V0_DEVLIST:
+		return nvkm_client_mthd_devlist(client, data, size);
 	default:
 		break;
 	}
@@ -190,7 +241,8 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	const struct nvkm_sclass *sclass;
 
 	switch (index) {
-	case 0: sclass = &nvkm_udevice_sclass; break;
+	case 0: sclass = &nvkm_uclient_sclass; break;
+	case 1: sclass = &nvkm_udevice_sclass; break;
 	default:
 		return -EINVAL;
 	}
@@ -200,110 +252,54 @@ nvkm_client_child_get(struct nvkm_object *object, int index,
 	return 0;
 }
 
-static const struct nvkm_object_func
-nvkm_client_object_func = {
-	.mthd = nvkm_client_mthd,
-	.sclass = nvkm_client_child_get,
-};
-
-void
-nvkm_client_remove(struct nvkm_client *client, struct nvkm_object *object)
-{
-	if (!RB_EMPTY_NODE(&object->node))
-		rb_erase(&object->node, &client->objroot);
-}
-
-bool
-nvkm_client_insert(struct nvkm_client *client, struct nvkm_object *object)
-{
-	struct rb_node **ptr = &client->objroot.rb_node;
-	struct rb_node *parent = NULL;
-
-	while (*ptr) {
-		struct nvkm_object *this =
-			container_of(*ptr, typeof(*this), node);
-		parent = *ptr;
-		if (object->object < this->object)
-			ptr = &parent->rb_left;
-		else
-		if (object->object > this->object)
-			ptr = &parent->rb_right;
-		else
-			return false;
-	}
-
-	rb_link_node(&object->node, parent, ptr);
-	rb_insert_color(&object->node, &client->objroot);
-	return true;
-}
-
-struct nvkm_object *
-nvkm_client_search(struct nvkm_client *client, u64 handle)
-{
-	struct rb_node *node = client->objroot.rb_node;
-	while (node) {
-		struct nvkm_object *object =
-			container_of(node, typeof(*object), node);
-		if (handle < object->object)
-			node = node->rb_left;
-		else
-		if (handle > object->object)
-			node = node->rb_right;
-		else
-			return object;
-	}
-	return NULL;
-}
-
-int
-nvkm_client_fini(struct nvkm_client *client, bool suspend)
+static int
+nvkm_client_fini(struct nvkm_object *object, bool suspend)
 {
-	struct nvkm_object *object = &client->object;
+	struct nvkm_client *client = nvkm_client(object);
 	const char *name[2] = { "fini", "suspend" };
 	int i;
 	nvif_debug(object, "%s notify\n", name[suspend]);
 	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
 		nvkm_client_notify_put(client, i);
-	return nvkm_object_fini(&client->object, suspend);
-}
-
-int
-nvkm_client_init(struct nvkm_client *client)
-{
-	return nvkm_object_init(&client->object);
+	return 0;
 }
 
-void
-nvkm_client_del(struct nvkm_client **pclient)
+static void *
+nvkm_client_dtor(struct nvkm_object *object)
 {
-	struct nvkm_client *client = *pclient;
+	struct nvkm_client *client = nvkm_client(object);
 	int i;
-	if (client) {
-		nvkm_client_fini(client, false);
-		for (i = 0; i < ARRAY_SIZE(client->notify); i++)
-			nvkm_client_notify_del(client, i);
-		nvkm_object_dtor(&client->object);
-		kfree(*pclient);
-		*pclient = NULL;
-	}
+	for (i = 0; i < ARRAY_SIZE(client->notify); i++)
+		nvkm_client_notify_del(client, i);
+	return client;
 }
 
+static const struct nvkm_object_func
+nvkm_client = {
+	.dtor = nvkm_client_dtor,
+	.fini = nvkm_client_fini,
+	.mthd = nvkm_client_mthd,
+	.sclass = nvkm_client_child_get,
+};
+
 int
 nvkm_client_new(const char *name, u64 device, const char *cfg,
-		const char *dbg, struct nvkm_client **pclient)
+		const char *dbg,
+		int (*ntfy)(const void *, u32, const void *, u32),
+		struct nvkm_client **pclient)
 {
-	struct nvkm_oclass oclass = {};
+	struct nvkm_oclass oclass = { .base = nvkm_uclient_sclass };
 	struct nvkm_client *client;
 
 	if (!(client = *pclient = kzalloc(sizeof(*client), GFP_KERNEL)))
 		return -ENOMEM;
 	oclass.client = client;
 
-	nvkm_object_ctor(&nvkm_client_object_func, &oclass, &client->object);
+	nvkm_object_ctor(&nvkm_client, &oclass, &client->object);
 	snprintf(client->name, sizeof(client->name), "%s", name);
 	client->device = device;
 	client->debug = nvkm_dbgopt(dbg, "CLIENT");
 	client->objroot = RB_ROOT;
-	client->dmaroot = RB_ROOT;
+	client->ntfy = ntfy;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/engine.c b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
index ee8e5831fe37..b6c916954a10 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/engine.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/engine.c
@@ -27,6 +27,14 @@
 
 #include <subdev/fb.h>
 
+bool
+nvkm_engine_chsw_load(struct nvkm_engine *engine)
+{
+	if (engine->func->chsw_load)
+		return engine->func->chsw_load(engine);
+	return false;
+}
+
 void
 nvkm_engine_unref(struct nvkm_engine **pengine)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
index 34ecd4a7e0c1..058ff46b5f16 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/firmware.c
@@ -20,6 +20,7 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include <core/device.h>
+#include <core/firmware.h>
 
 /**
  * nvkm_firmware_get - load firmware from the official nvidia/chip/ directory
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
index b0db51847c36..be19bbe56bba 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/ioctl.c
@@ -29,7 +29,8 @@
 #include <nvif/ioctl.h>
 
 static int
-nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_nop(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_nop_v0 v0;
@@ -46,7 +47,8 @@ nvkm_ioctl_nop(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_sclass(struct nvkm_client *client,
+		  struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_sclass_v0 v0;
@@ -78,12 +80,12 @@ nvkm_ioctl_sclass(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
+nvkm_ioctl_new(struct nvkm_client *client,
+	       struct nvkm_object *parent, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_new_v0 v0;
 	} *args = data;
-	struct nvkm_client *client = parent->client;
 	struct nvkm_object *object = NULL;
 	struct nvkm_oclass oclass;
 	int ret = -ENOSYS, i = 0;
@@ -104,9 +106,11 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 
 	do {
 		memset(&oclass, 0x00, sizeof(oclass));
-		oclass.client = client;
 		oclass.handle = args->v0.handle;
+		oclass.route  = args->v0.route;
+		oclass.token  = args->v0.token;
 		oclass.object = args->v0.object;
+		oclass.client = client;
 		oclass.parent = parent;
 		ret = parent->func->sclass(parent, i++, &oclass);
 		if (ret)
@@ -125,10 +129,7 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 		ret = nvkm_object_init(object);
 		if (ret == 0) {
 			list_add(&object->head, &parent->tree);
-			object->route = args->v0.route;
-			object->token = args->v0.token;
-			object->object = args->v0.object;
-			if (nvkm_client_insert(client, object)) {
+			if (nvkm_object_insert(object)) {
 				client->data = object;
 				return 0;
 			}
@@ -142,7 +143,8 @@ nvkm_ioctl_new(struct nvkm_object *parent, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_del(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_del none;
@@ -156,11 +158,12 @@ nvkm_ioctl_del(struct nvkm_object *object, void *data, u32 size)
 		nvkm_object_del(&object);
 	}
 
-	return ret;
+	return ret ? ret : 1;
 }
 
 static int
-nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_mthd(struct nvkm_client *client,
+		struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_mthd_v0 v0;
@@ -179,7 +182,8 @@ nvkm_ioctl_mthd(struct nvkm_object *object, void *data, u32 size)
 
 
 static int
-nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_rd(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_rd_v0 v0;
@@ -218,7 +222,8 @@ nvkm_ioctl_rd(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_wr(struct nvkm_client *client,
+	      struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_wr_v0 v0;
@@ -246,7 +251,8 @@ nvkm_ioctl_wr(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_map(struct nvkm_client *client,
+	       struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_map_v0 v0;
@@ -264,7 +270,8 @@ nvkm_ioctl_map(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_unmap(struct nvkm_client *client,
+		 struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_unmap none;
@@ -280,7 +287,8 @@ nvkm_ioctl_unmap(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_new(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
 	union {
 		struct nvif_ioctl_ntfy_new_v0 v0;
@@ -306,9 +314,9 @@ nvkm_ioctl_ntfy_new(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_del(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_del_v0 v0;
 	} *args = data;
@@ -325,9 +333,9 @@ nvkm_ioctl_ntfy_del(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_get(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_get_v0 v0;
 	} *args = data;
@@ -344,9 +352,9 @@ nvkm_ioctl_ntfy_get(struct nvkm_object *object, void *data, u32 size)
 }
 
 static int
-nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
+nvkm_ioctl_ntfy_put(struct nvkm_client *client,
+		    struct nvkm_object *object, void *data, u32 size)
 {
-	struct nvkm_client *client = object->client;
 	union {
 		struct nvif_ioctl_ntfy_put_v0 v0;
 	} *args = data;
@@ -364,7 +372,7 @@ nvkm_ioctl_ntfy_put(struct nvkm_object *object, void *data, u32 size)
 
 static struct {
 	int version;
-	int (*func)(struct nvkm_object *, void *, u32);
+	int (*func)(struct nvkm_client *, struct nvkm_object *, void *, u32);
 }
 nvkm_ioctl_v0[] = {
 	{ 0x00, nvkm_ioctl_nop },
@@ -389,13 +397,10 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 	struct nvkm_object *object;
 	int ret;
 
-	if (handle)
-		object = nvkm_client_search(client, handle);
-	else
-		object = &client->object;
-	if (unlikely(!object)) {
+	object = nvkm_object_search(client, handle, NULL);
+	if (IS_ERR(object)) {
 		nvif_ioctl(&client->object, "object not found\n");
-		return -ENOENT;
+		return PTR_ERR(object);
 	}
 
 	if (owner != NVIF_IOCTL_V0_OWNER_ANY && owner != object->route) {
@@ -407,7 +412,7 @@ nvkm_ioctl_path(struct nvkm_client *client, u64 handle, u32 type,
 
 	if (ret = -EINVAL, type < ARRAY_SIZE(nvkm_ioctl_v0)) {
 		if (nvkm_ioctl_v0[type].version == 0)
-			ret = nvkm_ioctl_v0[type].func(object, data, size);
+			ret = nvkm_ioctl_v0[type].func(client, object, data, size);
 	}
 
 	return ret;
@@ -436,12 +441,13 @@ nvkm_ioctl(struct nvkm_client *client, bool supervisor,
 				      &args->v0.route, &args->v0.token);
 	}
 
-	nvif_ioctl(object, "return %d\n", ret);
-	if (hack) {
-		*hack = client->data;
-		client->data = NULL;
+	if (ret != 1) {
+		nvif_ioctl(object, "return %d\n", ret);
+		if (hack) {
+			*hack = client->data;
+			client->data = NULL;
+		}
 	}
 
-	client->super = false;
 	return ret;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/mm.c b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
index 09a1eee8fd33..fd19d652a7ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/mm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/mm.c
@@ -147,6 +147,7 @@ nvkm_mm_head(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
@@ -225,6 +226,7 @@ nvkm_mm_tail(struct nvkm_mm *mm, u8 heap, u8 type, u32 size_max, u32 size_min,
 		if (!this)
 			return -ENOMEM;
 
+		this->next = NULL;
 		this->type = type;
 		list_del(&this->fl_entry);
 		*pnode = this;
diff --git a/drivers/gpu/drm/nouveau/nvkm/core/object.c b/drivers/gpu/drm/nouveau/nvkm/core/object.c
index 67aa7223dcd7..89d2e9da11c7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/core/object.c
+++ b/drivers/gpu/drm/nouveau/nvkm/core/object.c
@@ -25,6 +25,65 @@
 #include <core/client.h>
 #include <core/engine.h>
 
+struct nvkm_object *
+nvkm_object_search(struct nvkm_client *client, u64 handle,
+		   const struct nvkm_object_func *func)
+{
+	struct nvkm_object *object;
+
+	if (handle) {
+		struct rb_node *node = client->objroot.rb_node;
+		while (node) {
+			object = rb_entry(node, typeof(*object), node);
+			if (handle < object->object)
+				node = node->rb_left;
+			else
+			if (handle > object->object)
+				node = node->rb_right;
+			else
+				goto done;
+		}
+		return ERR_PTR(-ENOENT);
+	} else {
+		object = &client->object;
+	}
+
+done:
+	if (unlikely(func && object->func != func))
+		return ERR_PTR(-EINVAL);
+	return object;
+}
+
+void
+nvkm_object_remove(struct nvkm_object *object)
+{
+	if (!RB_EMPTY_NODE(&object->node))
+		rb_erase(&object->node, &object->client->objroot);
+}
+
+bool
+nvkm_object_insert(struct nvkm_object *object)
+{
+	struct rb_node **ptr = &object->client->objroot.rb_node;
+	struct rb_node *parent = NULL;
+
+	while (*ptr) {
+		struct nvkm_object *this = rb_entry(*ptr, typeof(*this), node);
+		parent = *ptr;
+		if (object->object < this->object)
+			ptr = &parent->rb_left;
+		else
+		if (object->object > this->object)
+			ptr = &parent->rb_right;
+		else
+			return false;
+	}
+
+	rb_link_node(&object->node, parent, ptr);
+	rb_insert_color(&object->node, &object->client->objroot);
+	return true;
+}
+
 int
 nvkm_object_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
@@ -214,7 +273,7 @@ nvkm_object_del(struct nvkm_object **pobject)
 	struct nvkm_object *object = *pobject;
 	if (object && !WARN_ON(!object->func)) {
 		*pobject = nvkm_object_dtor(object);
-		nvkm_client_remove(object->client, object);
+		nvkm_object_remove(object);
 		list_del(&object->head);
 		kfree(*pobject);
 		*pobject = NULL;
@@ -230,6 +289,9 @@ nvkm_object_ctor(const struct nvkm_object_func *func,
 	object->engine = nvkm_engine_ref(oclass->engine);
 	object->oclass = oclass->base.oclass;
 	object->handle = oclass->handle;
+	object->route  = oclass->route;
+	object->token  = oclass->token;
+	object->object = oclass->object;
 	INIT_LIST_HEAD(&object->head);
 	INIT_LIST_HEAD(&object->tree);
 	RB_CLEAR_NODE(&object->node);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
index a4458a8eb30a..255d81ccf916 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/Kbuild
@@ -4,4 +4,4 @@ nvkm-y += nvkm/engine/ce/gk104.o
 nvkm-y += nvkm/engine/ce/gm107.o
 nvkm-y += nvkm/engine/ce/gm200.o
 nvkm-y += nvkm/engine/ce/gp100.o
-nvkm-y += nvkm/engine/ce/gp104.o
+nvkm-y += nvkm/engine/ce/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h
index 05bb65608dfe..d9ca9636a3e3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gf100.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf100_ce_data[] = {
+static uint32_t gf100_ce_data[] = {
 /* 0x0000: ctx_object */
 	0x00000000,
 /* 0x0004: ctx_query_address_high */
@@ -171,7 +171,7 @@ uint32_t gf100_ce_data[] = {
 	0x00000800,
 };
 
-uint32_t gf100_ce_code[] = {
+static uint32_t gf100_ce_code[] = {
 /* 0x0000: main */
 	0x04fe04bd,
 	0x3517f000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h
index 972281d10f38..f0a1cf31c7ca 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/fuc/gt215.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gt215_ce_data[] = {
+static uint32_t gt215_ce_data[] = {
 /* 0x0000: ctx_object */
 	0x00000000,
 /* 0x0004: ctx_dma */
@@ -183,7 +183,7 @@ uint32_t gt215_ce_data[] = {
 	0x00000800,
 };
 
-uint32_t gt215_ce_code[] = {
+static uint32_t gt215_ce_code[] = {
 /* 0x0000: main */
 	0x04fe04bd,
 	0x3517f000,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp102.c
index 20e019788a53..985c8f653874 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/ce/gp102.c
@@ -27,7 +27,7 @@
 #include <nvif/class.h>
 
 static const struct nvkm_engine_func
-gp104_ce = {
+gp102_ce = {
 	.intr = gp100_ce_intr,
 	.sclass = {
 		{ -1, -1, PASCAL_DMA_COPY_B },
@@ -37,8 +37,8 @@ gp104_ce = {
 };
 
 int
-gp104_ce_new(struct nvkm_device *device, int index,
+gp102_ce_new(struct nvkm_device *device, int index,
 	     struct nvkm_engine **pengine)
 {
-	return nvkm_engine_new_(&gp104_ce, device, index, true, pengine);
+	return nvkm_engine_new_(&gp102_ce, device, index, true, pengine);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
index 7218a067a6c5..3b86a7399567 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/base.c
@@ -714,7 +714,7 @@ nv4a_chipset = {
 	.i2c = nv04_i2c_new,
 	.imem = nv40_instmem_new,
 	.mc = nv44_mc_new,
-	.mmu = nv44_mmu_new,
+	.mmu = nv04_mmu_new,
 	.pci = nv40_pci_new,
 	.therm = nv40_therm_new,
 	.timer = nv41_timer_new,
@@ -993,7 +993,7 @@ nv92_chipset = {
 	.mc = g84_mc_new,
 	.mmu = nv50_mmu_new,
 	.mxm = nv50_mxm_new,
-	.pci = g84_pci_new,
+	.pci = g92_pci_new,
 	.therm = g84_therm_new,
 	.timer = nv41_timer_new,
 	.volt = nv40_volt_new,
@@ -1357,7 +1357,7 @@ nvc0_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.ce[1] = gf100_ce_new,
 	.disp = gt215_disp_new,
@@ -1394,7 +1394,7 @@ nvc1_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gt215_disp_new,
 	.dma = gf100_dma_new,
@@ -1430,7 +1430,7 @@ nvc3_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gt215_disp_new,
 	.dma = gf100_dma_new,
@@ -1466,7 +1466,7 @@ nvc4_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.ce[1] = gf100_ce_new,
 	.disp = gt215_disp_new,
@@ -1503,7 +1503,7 @@ nvc8_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.ce[1] = gf100_ce_new,
 	.disp = gt215_disp_new,
@@ -1540,7 +1540,7 @@ nvce_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.ce[1] = gf100_ce_new,
 	.disp = gt215_disp_new,
@@ -1577,7 +1577,7 @@ nvcf_chipset = {
 	.pmu = gf100_pmu_new,
 	.therm = gt215_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gt215_disp_new,
 	.dma = gf100_dma_new,
@@ -1612,6 +1612,7 @@ nvd7_chipset = {
 	.pci = gf106_pci_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gf119_disp_new,
 	.dma = gf119_dma_new,
@@ -1647,7 +1648,7 @@ nvd9_chipset = {
 	.pmu = gf119_pmu_new,
 	.therm = gf119_therm_new,
 	.timer = nv41_timer_new,
-	.volt = nv40_volt_new,
+	.volt = gf100_volt_new,
 	.ce[0] = gf100_ce_new,
 	.disp = gf119_disp_new,
 	.dma = gf119_dma_new,
@@ -1851,7 +1852,7 @@ nvf1_chipset = {
 	.fb = gk104_fb_new,
 	.fuse = gf100_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gf119_i2c_new,
+	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1965,7 +1966,7 @@ nv117_chipset = {
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gf119_i2c_new,
+	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -1999,7 +2000,7 @@ nv118_chipset = {
 	.fb = gm107_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
-	.i2c = gf119_i2c_new,
+	.i2c = gk104_i2c_new,
 	.ibus = gk104_ibus_new,
 	.iccsense = gf100_iccsense_new,
 	.imem = nv50_instmem_new,
@@ -2130,13 +2131,14 @@ nv12b_chipset = {
 	.bar = gk20a_bar_new,
 	.bus = gf100_bus_new,
 	.clk = gm20b_clk_new,
-	.fb = gk20a_fb_new,
+	.fb = gm20b_fb_new,
 	.fuse = gm107_fuse_new,
 	.ibus = gk20a_ibus_new,
 	.imem = gk20a_instmem_new,
 	.ltc = gm200_ltc_new,
 	.mc = gk20a_mc_new,
 	.mmu = gf100_mmu_new,
+	.pmu = gm20b_pmu_new,
 	.secboot = gm20b_secboot_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
@@ -2166,6 +2168,7 @@ nv130_chipset = {
 	.mmu = gf100_mmu_new,
 	.secboot = gm200_secboot_new,
 	.pci = gp100_pci_new,
+	.pmu = gp100_pmu_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
 	.ce[0] = gp100_ce_new,
@@ -2182,13 +2185,100 @@ nv130_chipset = {
 };
 
 static const struct nvkm_device_chip
+nv132_chipset = {
+	.name = "GP102",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp102_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp102_ce_new,
+	.ce[1] = gp102_ce_new,
+	.ce[2] = gp102_ce_new,
+	.ce[3] = gp102_ce_new,
+	.disp = gp102_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
+static const struct nvkm_device_chip
 nv134_chipset = {
 	.name = "GP104",
 	.bar = gf100_bar_new,
 	.bios = nvkm_bios_new,
 	.bus = gf100_bus_new,
 	.devinit = gm200_devinit_new,
-	.fb = gp104_fb_new,
+	.fb = gp102_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp102_ce_new,
+	.ce[1] = gp102_ce_new,
+	.ce[2] = gp102_ce_new,
+	.ce[3] = gp102_ce_new,
+	.disp = gp102_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
+static const struct nvkm_device_chip
+nv136_chipset = {
+	.name = "GP106",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp102_fb_new,
+	.fuse = gm107_fuse_new,
+	.gpio = gk104_gpio_new,
+	.i2c = gm200_i2c_new,
+	.ibus = gm200_ibus_new,
+	.imem = nv50_instmem_new,
+	.ltc = gp100_ltc_new,
+	.mc = gp100_mc_new,
+	.mmu = gf100_mmu_new,
+	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
+	.timer = gk20a_timer_new,
+	.top = gk104_top_new,
+	.ce[0] = gp102_ce_new,
+	.ce[1] = gp102_ce_new,
+	.ce[2] = gp102_ce_new,
+	.ce[3] = gp102_ce_new,
+	.disp = gp102_disp_new,
+	.dma = gf119_dma_new,
+	.fifo = gp100_fifo_new,
+};
+
+static const struct nvkm_device_chip
+nv137_chipset = {
+	.name = "GP107",
+	.bar = gf100_bar_new,
+	.bios = nvkm_bios_new,
+	.bus = gf100_bus_new,
+	.devinit = gm200_devinit_new,
+	.fb = gp102_fb_new,
 	.fuse = gm107_fuse_new,
 	.gpio = gk104_gpio_new,
 	.i2c = gm200_i2c_new,
@@ -2198,13 +2288,14 @@ nv134_chipset = {
 	.mc = gp100_mc_new,
 	.mmu = gf100_mmu_new,
 	.pci = gp100_pci_new,
+	.pmu = gp102_pmu_new,
 	.timer = gk20a_timer_new,
 	.top = gk104_top_new,
-	.ce[0] = gp104_ce_new,
-	.ce[1] = gp104_ce_new,
-	.ce[2] = gp104_ce_new,
-	.ce[3] = gp104_ce_new,
-	.disp = gp104_disp_new,
+	.ce[0] = gp102_ce_new,
+	.ce[1] = gp102_ce_new,
+	.ce[2] = gp102_ce_new,
+	.ce[3] = gp102_ce_new,
+	.disp = gp102_disp_new,
 	.dma = gf119_dma_new,
 	.fifo = gp100_fifo_new,
 };
@@ -2643,7 +2734,10 @@ nvkm_device_ctor(const struct nvkm_device_func *func,
 		case 0x126: device->chip = &nv126_chipset; break;
 		case 0x12b: device->chip = &nv12b_chipset; break;
 		case 0x130: device->chip = &nv130_chipset; break;
+		case 0x132: device->chip = &nv132_chipset; break;
 		case 0x134: device->chip = &nv134_chipset; break;
+		case 0x136: device->chip = &nv136_chipset; break;
+		case 0x137: device->chip = &nv137_chipset; break;
 		default:
 			nvdev_error(device, "unknown chipset (%08x)\n", boot0);
 			goto done;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
index 62ad0300cfa5..74a1ffa425f7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/pci.c
@@ -1665,14 +1665,31 @@ nvkm_device_pci_new(struct pci_dev *pci_dev, const char *cfg, const char *dbg,
 	*pdevice = &pdev->device;
 	pdev->pdev = pci_dev;
 
-	return nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev,
-				pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE :
-				pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ?
-				NVKM_DEVICE_AGP : NVKM_DEVICE_PCI,
-				(u64)pci_domain_nr(pci_dev->bus) << 32 |
-				     pci_dev->bus->number << 16 |
-				     PCI_SLOT(pci_dev->devfn) << 8 |
-				     PCI_FUNC(pci_dev->devfn), name,
-				cfg, dbg, detect, mmio, subdev_mask,
-				&pdev->device);
+	ret = nvkm_device_ctor(&nvkm_device_pci_func, quirk, &pci_dev->dev,
+			       pci_is_pcie(pci_dev) ? NVKM_DEVICE_PCIE :
+			       pci_find_capability(pci_dev, PCI_CAP_ID_AGP) ?
+			       NVKM_DEVICE_AGP : NVKM_DEVICE_PCI,
+			       (u64)pci_domain_nr(pci_dev->bus) << 32 |
+				    pci_dev->bus->number << 16 |
+				    PCI_SLOT(pci_dev->devfn) << 8 |
+				    PCI_FUNC(pci_dev->devfn), name,
+			       cfg, dbg, detect, mmio, subdev_mask,
+			       &pdev->device);
+
+	if (ret)
+		return ret;
+
+	/*
+	 * Set a preliminary DMA mask based on the .dma_bits member of the
+	 * MMU subdevice. This allows other subdevices to create DMA mappings
+	 * in their init() or oneinit() methods, which may be called before the
+	 * TTM layer sets the DMA mask definitively.
+	 * This is necessary for platforms where the default DMA mask of 32
+	 * does not cover any system memory, i.e., when all RAM is > 4 GB.
+	 */
+	if (pdev->device.mmu)
+		dma_set_mask_and_coherent(&pci_dev->dev,
+				DMA_BIT_MASK(pdev->device.mmu->dma_bits));
+
+	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
index 9b638bd905ff..f2bc0b7d9b93 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/tegra.c
@@ -102,7 +102,7 @@ nvkm_device_tegra_probe_iommu(struct nvkm_device_tegra *tdev)
 
 	if (iommu_present(&platform_bus_type)) {
 		tdev->iommu.domain = iommu_domain_alloc(&platform_bus_type);
-		if (IS_ERR(tdev->iommu.domain))
+		if (!tdev->iommu.domain)
 			goto error;
 
 		/*
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
index 79a8f71cf788..513ee6b79553 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/device/user.c
@@ -326,7 +326,7 @@ nvkm_udevice = {
 	.sclass = nvkm_udevice_child_get,
 };
 
-int
+static int
 nvkm_udevice_new(const struct nvkm_oclass *oclass, void *data, u32 size,
 		 struct nvkm_object **pobject)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
index 77a52b54a31e..fa05d16ae948 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/Kbuild
@@ -11,7 +11,7 @@ nvkm-y += nvkm/engine/disp/gk110.o
 nvkm-y += nvkm/engine/disp/gm107.o
 nvkm-y += nvkm/engine/disp/gm200.o
 nvkm-y += nvkm/engine/disp/gp100.o
-nvkm-y += nvkm/engine/disp/gp104.o
+nvkm-y += nvkm/engine/disp/gp102.o
 
 nvkm-y += nvkm/engine/disp/outp.o
 nvkm-y += nvkm/engine/disp/outpdp.o
@@ -48,14 +48,14 @@ nvkm-y += nvkm/engine/disp/rootgk110.o
 nvkm-y += nvkm/engine/disp/rootgm107.o
 nvkm-y += nvkm/engine/disp/rootgm200.o
 nvkm-y += nvkm/engine/disp/rootgp100.o
-nvkm-y += nvkm/engine/disp/rootgp104.o
+nvkm-y += nvkm/engine/disp/rootgp102.o
 
 nvkm-y += nvkm/engine/disp/channv50.o
 nvkm-y += nvkm/engine/disp/changf119.o
 
 nvkm-y += nvkm/engine/disp/dmacnv50.o
 nvkm-y += nvkm/engine/disp/dmacgf119.o
-nvkm-y += nvkm/engine/disp/dmacgp104.o
+nvkm-y += nvkm/engine/disp/dmacgp102.o
 
 nvkm-y += nvkm/engine/disp/basenv50.o
 nvkm-y += nvkm/engine/disp/baseg84.o
@@ -64,7 +64,7 @@ nvkm-y += nvkm/engine/disp/basegt215.o
 nvkm-y += nvkm/engine/disp/basegf119.o
 nvkm-y += nvkm/engine/disp/basegk104.o
 nvkm-y += nvkm/engine/disp/basegk110.o
-nvkm-y += nvkm/engine/disp/basegp104.o
+nvkm-y += nvkm/engine/disp/basegp102.o
 
 nvkm-y += nvkm/engine/disp/corenv50.o
 nvkm-y += nvkm/engine/disp/coreg84.o
@@ -77,7 +77,7 @@ nvkm-y += nvkm/engine/disp/coregk110.o
 nvkm-y += nvkm/engine/disp/coregm107.o
 nvkm-y += nvkm/engine/disp/coregm200.o
 nvkm-y += nvkm/engine/disp/coregp100.o
-nvkm-y += nvkm/engine/disp/coregp104.o
+nvkm-y += nvkm/engine/disp/coregp102.o
 
 nvkm-y += nvkm/engine/disp/ovlynv50.o
 nvkm-y += nvkm/engine/disp/ovlyg84.o
@@ -85,7 +85,7 @@ nvkm-y += nvkm/engine/disp/ovlygt200.o
 nvkm-y += nvkm/engine/disp/ovlygt215.o
 nvkm-y += nvkm/engine/disp/ovlygf119.o
 nvkm-y += nvkm/engine/disp/ovlygk104.o
-nvkm-y += nvkm/engine/disp/ovlygp104.o
+nvkm-y += nvkm/engine/disp/ovlygp102.o
 
 nvkm-y += nvkm/engine/disp/piocnv50.o
 nvkm-y += nvkm/engine/disp/piocgf119.o
@@ -95,9 +95,11 @@ nvkm-y += nvkm/engine/disp/cursg84.o
 nvkm-y += nvkm/engine/disp/cursgt215.o
 nvkm-y += nvkm/engine/disp/cursgf119.o
 nvkm-y += nvkm/engine/disp/cursgk104.o
+nvkm-y += nvkm/engine/disp/cursgp102.o
 
 nvkm-y += nvkm/engine/disp/oimmnv50.o
 nvkm-y += nvkm/engine/disp/oimmg84.o
 nvkm-y += nvkm/engine/disp/oimmgt215.o
 nvkm-y += nvkm/engine/disp/oimmgf119.o
 nvkm-y += nvkm/engine/disp/oimmgk104.o
+nvkm-y += nvkm/engine/disp/oimmgp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c
index 51688e37c54e..8a3cdeef8d2c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/basegp102.c
@@ -27,12 +27,12 @@
 #include <nvif/class.h>
 
 const struct nv50_disp_dmac_oclass
-gp104_disp_base_oclass = {
+gp102_disp_base_oclass = {
 	.base.oclass = GK110_DISP_BASE_CHANNEL_DMA,
 	.base.minver = 0,
 	.base.maxver = 0,
 	.ctor = nv50_disp_base_new,
-	.func = &gp104_disp_dmac_func,
+	.func = &gp102_disp_dmac_func,
 	.mthd = &gf119_disp_base_chan_mthd,
 	.chid = 1,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
index dd2953bc9264..524a24eae1a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.c
@@ -82,7 +82,7 @@ nv50_disp_chan_mthd(struct nv50_disp_chan *chan, int debug)
 
 			if (mthd->addr) {
 				snprintf(cname_, sizeof(cname_), "%s %d",
-					 mthd->name, chan->chid);
+					 mthd->name, chan->chid.user);
 				cname = cname_;
 			}
 
@@ -139,7 +139,7 @@ nv50_disp_chan_uevent_ctor(struct nvkm_object *object, void *data, u32 size,
 	if (!(ret = nvif_unvers(ret, &data, &size, args->none))) {
 		notify->size  = sizeof(struct nvif_notify_uevent_rep);
 		notify->types = 1;
-		notify->index = chan->chid;
+		notify->index = chan->chid.user;
 		return 0;
 	}
 
@@ -153,27 +153,27 @@ nv50_disp_chan_uevent = {
 	.fini = nv50_disp_chan_uevent_fini,
 };
 
-int
+static int
 nv50_disp_chan_rd32(struct nvkm_object *object, u64 addr, u32 *data)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_device *device = disp->base.engine.subdev.device;
-	*data = nvkm_rd32(device, 0x640000 + (chan->chid * 0x1000) + addr);
+	*data = nvkm_rd32(device, 0x640000 + (chan->chid.user * 0x1000) + addr);
 	return 0;
 }
 
-int
+static int
 nv50_disp_chan_wr32(struct nvkm_object *object, u64 addr, u32 data)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_device *device = disp->base.engine.subdev.device;
-	nvkm_wr32(device, 0x640000 + (chan->chid * 0x1000) + addr, data);
+	nvkm_wr32(device, 0x640000 + (chan->chid.user * 0x1000) + addr, data);
 	return 0;
 }
 
-int
+static int
 nv50_disp_chan_ntfy(struct nvkm_object *object, u32 type,
 		    struct nvkm_event **pevent)
 {
@@ -189,14 +189,14 @@ nv50_disp_chan_ntfy(struct nvkm_object *object, u32 type,
 	return -EINVAL;
 }
 
-int
+static int
 nv50_disp_chan_map(struct nvkm_object *object, u64 *addr, u32 *size)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_device *device = disp->base.engine.subdev.device;
 	*addr = device->func->resource_addr(device, 0) +
-		0x640000 + (chan->chid * 0x1000);
+		0x640000 + (chan->chid.user * 0x1000);
 	*size = 0x001000;
 	return 0;
 }
@@ -243,8 +243,8 @@ nv50_disp_chan_dtor(struct nvkm_object *object)
 {
 	struct nv50_disp_chan *chan = nv50_disp_chan(object);
 	struct nv50_disp *disp = chan->root->disp;
-	if (chan->chid >= 0)
-		disp->chan[chan->chid] = NULL;
+	if (chan->chid.user >= 0)
+		disp->chan[chan->chid.user] = NULL;
 	return chan->func->dtor ? chan->func->dtor(chan) : chan;
 }
 
@@ -263,7 +263,7 @@ nv50_disp_chan = {
 int
 nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func,
 		    const struct nv50_disp_chan_mthd *mthd,
-		    struct nv50_disp_root *root, int chid, int head,
+		    struct nv50_disp_root *root, int ctrl, int user, int head,
 		    const struct nvkm_oclass *oclass,
 		    struct nv50_disp_chan *chan)
 {
@@ -273,21 +273,22 @@ nv50_disp_chan_ctor(const struct nv50_disp_chan_func *func,
 	chan->func = func;
 	chan->mthd = mthd;
 	chan->root = root;
-	chan->chid = chid;
+	chan->chid.ctrl = ctrl;
+	chan->chid.user = user;
 	chan->head = head;
 
-	if (disp->chan[chan->chid]) {
-		chan->chid = -1;
+	if (disp->chan[chan->chid.user]) {
+		chan->chid.user = -1;
 		return -EBUSY;
 	}
-	disp->chan[chan->chid] = chan;
+	disp->chan[chan->chid.user] = chan;
 	return 0;
 }
 
 int
 nv50_disp_chan_new_(const struct nv50_disp_chan_func *func,
 		    const struct nv50_disp_chan_mthd *mthd,
-		    struct nv50_disp_root *root, int chid, int head,
+		    struct nv50_disp_root *root, int ctrl, int user, int head,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
@@ -297,5 +298,6 @@ nv50_disp_chan_new_(const struct nv50_disp_chan_func *func,
 		return -ENOMEM;
 	*pobject = &chan->object;
 
-	return nv50_disp_chan_ctor(func, mthd, root, chid, head, oclass, chan);
+	return nv50_disp_chan_ctor(func, mthd, root, ctrl, user,
+				   head, oclass, chan);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
index f5f683d9fd20..737b38f6fbd2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/channv50.h
@@ -7,7 +7,11 @@ struct nv50_disp_chan {
 	const struct nv50_disp_chan_func *func;
 	const struct nv50_disp_chan_mthd *mthd;
 	struct nv50_disp_root *root;
-	int chid;
+
+	struct {
+		int ctrl;
+		int user;
+	} chid;
 	int head;
 
 	struct nvkm_object object;
@@ -25,11 +29,11 @@ struct nv50_disp_chan_func {
 
 int nv50_disp_chan_ctor(const struct nv50_disp_chan_func *,
 			const struct nv50_disp_chan_mthd *,
-			struct nv50_disp_root *, int chid, int head,
+			struct nv50_disp_root *, int ctrl, int user, int head,
 			const struct nvkm_oclass *, struct nv50_disp_chan *);
 int nv50_disp_chan_new_(const struct nv50_disp_chan_func *,
 			const struct nv50_disp_chan_mthd *,
-			struct nv50_disp_root *, int chid, int head,
+			struct nv50_disp_root *, int ctrl, int user, int head,
 			const struct nvkm_oclass *, struct nvkm_object **);
 
 extern const struct nv50_disp_chan_func nv50_disp_pioc_func;
@@ -90,13 +94,16 @@ extern const struct nv50_disp_chan_mthd gk104_disp_ovly_chan_mthd;
 struct nv50_disp_pioc_oclass {
 	int (*ctor)(const struct nv50_disp_chan_func *,
 		    const struct nv50_disp_chan_mthd *,
-		    struct nv50_disp_root *, int chid,
+		    struct nv50_disp_root *, int ctrl, int user,
 		    const struct nvkm_oclass *, void *data, u32 size,
 		    struct nvkm_object **);
 	struct nvkm_sclass base;
 	const struct nv50_disp_chan_func *func;
 	const struct nv50_disp_chan_mthd *mthd;
-	int chid;
+	struct {
+		int ctrl;
+		int user;
+	} chid;
 };
 
 extern const struct nv50_disp_pioc_oclass nv50_disp_oimm_oclass;
@@ -114,15 +121,17 @@ extern const struct nv50_disp_pioc_oclass gf119_disp_curs_oclass;
 extern const struct nv50_disp_pioc_oclass gk104_disp_oimm_oclass;
 extern const struct nv50_disp_pioc_oclass gk104_disp_curs_oclass;
 
+extern const struct nv50_disp_pioc_oclass gp102_disp_oimm_oclass;
+extern const struct nv50_disp_pioc_oclass gp102_disp_curs_oclass;
 
 int nv50_disp_curs_new(const struct nv50_disp_chan_func *,
 		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int chid,
+		       struct nv50_disp_root *, int ctrl, int user,
 		       const struct nvkm_oclass *, void *data, u32 size,
 		       struct nvkm_object **);
 int nv50_disp_oimm_new(const struct nv50_disp_chan_func *,
 		       const struct nv50_disp_chan_mthd *,
-		       struct nv50_disp_root *, int chid,
+		       struct nv50_disp_root *, int ctrl, int user,
 		       const struct nvkm_oclass *, void *data, u32 size,
 		       struct nvkm_object **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
index 019379a3a01c..c65c9f3ff69f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coreg94.c
@@ -26,7 +26,7 @@
 
 #include <nvif/class.h>
 
-const struct nv50_disp_mthd_list
+static const struct nv50_disp_mthd_list
 g94_disp_core_mthd_sor = {
 	.mthd = 0x0040,
 	.addr = 0x000008,
@@ -43,8 +43,8 @@ g94_disp_core_chan_mthd = {
 	.prev = 0x000004,
 	.data = {
 		{ "Global", 1, &nv50_disp_core_mthd_base },
-		{    "DAC", 3, &g84_disp_core_mthd_dac  },
-		{    "SOR", 4, &g94_disp_core_mthd_sor  },
+		{    "DAC", 3, &g84_disp_core_mthd_dac },
+		{    "SOR", 4, &g94_disp_core_mthd_sor },
 		{   "PIOR", 3, &nv50_disp_core_mthd_pior },
 		{   "HEAD", 2, &g84_disp_core_mthd_head },
 		{}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c
index 6922f4007b61..b0df4b752b8c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/coregp102.c
@@ -29,7 +29,7 @@
 #include <nvif/class.h>
 
 static int
-gp104_disp_core_init(struct nv50_disp_dmac *chan)
+gp102_disp_core_init(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
@@ -59,20 +59,20 @@ gp104_disp_core_init(struct nv50_disp_dmac *chan)
 	return 0;
 }
 
-const struct nv50_disp_dmac_func
-gp104_disp_core_func = {
-	.init = gp104_disp_core_init,
+static const struct nv50_disp_dmac_func
+gp102_disp_core_func = {
+	.init = gp102_disp_core_init,
 	.fini = gf119_disp_core_fini,
 	.bind = gf119_disp_dmac_bind,
 };
 
 const struct nv50_disp_dmac_oclass
-gp104_disp_core_oclass = {
-	.base.oclass = GP104_DISP_CORE_CHANNEL_DMA,
+gp102_disp_core_oclass = {
+	.base.oclass = GP102_DISP_CORE_CHANNEL_DMA,
 	.base.minver = 0,
 	.base.maxver = 0,
 	.ctor = nv50_disp_core_new,
-	.func = &gp104_disp_core_func,
+	.func = &gp102_disp_core_func,
 	.mthd = &gk104_disp_core_chan_mthd,
 	.chid = 0,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c
index dd99fc7060b1..fa781b5a7e07 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursg84.c
@@ -33,5 +33,5 @@ g84_disp_curs_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_curs_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 7,
+	.chid = { 7, 7 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
index 2a1574e06ad6..2be6fb052c65 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgf119.c
@@ -33,5 +33,5 @@ gf119_disp_curs_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_curs_new,
 	.func = &gf119_disp_pioc_func,
-	.chid = 13,
+	.chid = { 13, 13 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c
index 28e8f06c9472..2a99db4bf8f8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgk104.c
@@ -33,5 +33,5 @@ gk104_disp_curs_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_curs_new,
 	.func = &gf119_disp_pioc_func,
-	.chid = 13,
+	.chid = { 13, 13 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c
new file mode 100644
index 000000000000..e958210d8105
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgp102.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "channv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_pioc_oclass
+gp102_disp_curs_oclass = {
+	.base.oclass = GK104_DISP_CURSOR,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_curs_new,
+	.func = &gf119_disp_pioc_func,
+	.chid = { 13, 17 },
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c
index d8a4b9ca139c..00a7f3564450 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursgt215.c
@@ -33,5 +33,5 @@ gt215_disp_curs_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_curs_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 7,
+	.chid = { 7, 7 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
index 8b1320499a0f..82ff82d8c1ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/cursnv50.c
@@ -33,7 +33,7 @@
 int
 nv50_disp_curs_new(const struct nv50_disp_chan_func *func,
 		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int chid,
+		   struct nv50_disp_root *root, int ctrl, int user,
 		   const struct nvkm_oclass *oclass, void *data, u32 size,
 		   struct nvkm_object **pobject)
 {
@@ -54,7 +54,7 @@ nv50_disp_curs_new(const struct nv50_disp_chan_func *func,
 	} else
 		return ret;
 
-	return nv50_disp_chan_new_(func, mthd, root, chid + head,
+	return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head,
 				   head, oclass, pobject);
 }
 
@@ -65,5 +65,5 @@ nv50_disp_curs_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_curs_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 7,
+	.chid = { 7, 7 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
index a57f7cef307a..ce7cd74fbd5d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgf119.c
@@ -32,8 +32,8 @@ gf119_disp_dmac_bind(struct nv50_disp_dmac *chan,
 		     struct nvkm_object *object, u32 handle)
 {
 	return nvkm_ramht_insert(chan->base.root->ramht, object,
-				 chan->base.chid, -9, handle,
-				 chan->base.chid << 27 | 0x00000001);
+				 chan->base.chid.user, -9, handle,
+				 chan->base.chid.user << 27 | 0x00000001);
 }
 
 void
@@ -42,22 +42,23 @@ gf119_disp_dmac_fini(struct nv50_disp_dmac *chan)
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->base.chid;
+	int ctrl = chan->base.chid.ctrl;
+	int user = chan->base.chid.user;
 
 	/* deactivate channel */
-	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00001010, 0x00001000);
-	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000003, 0x00000000);
+	nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00001010, 0x00001000);
+	nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000003, 0x00000000);
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x001e0000))
+		if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x001e0000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d fini: %08x\n", chid,
-			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d fini: %08x\n", user,
+			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 	}
 
 	/* disable error reporting and completion notification */
-	nvkm_mask(device, 0x610090, 0x00000001 << chid, 0x00000000);
-	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000000);
+	nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000);
+	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000);
 }
 
 static int
@@ -66,26 +67,27 @@ gf119_disp_dmac_init(struct nv50_disp_dmac *chan)
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->base.chid;
+	int ctrl = chan->base.chid.ctrl;
+	int user = chan->base.chid.user;
 
 	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
 
 	/* initialise channel for dma command submission */
-	nvkm_wr32(device, 0x610494 + (chid * 0x0010), chan->push);
-	nvkm_wr32(device, 0x610498 + (chid * 0x0010), 0x00010000);
-	nvkm_wr32(device, 0x61049c + (chid * 0x0010), 0x00000001);
-	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
-	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
-	nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013);
+	nvkm_wr32(device, 0x610494 + (ctrl * 0x0010), chan->push);
+	nvkm_wr32(device, 0x610498 + (ctrl * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61049c + (ctrl * 0x0010), 0x00000001);
+	nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610490 + (ctrl * 0x0010), 0x00000013);
 
 	/* wait for it to go inactive */
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000))
+		if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x80000000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d init: %08x\n", chid,
-			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d init: %08x\n", user,
+			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c
index ad24c2c57696..cdead9500343 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacgp102.c
@@ -27,31 +27,32 @@
 #include <subdev/timer.h>
 
 static int
-gp104_disp_dmac_init(struct nv50_disp_dmac *chan)
+gp102_disp_dmac_init(struct nv50_disp_dmac *chan)
 {
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->base.chid;
+	int ctrl = chan->base.chid.ctrl;
+	int user = chan->base.chid.user;
 
 	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
 
 	/* initialise channel for dma command submission */
-	nvkm_wr32(device, 0x611494 + (chid * 0x0010), chan->push);
-	nvkm_wr32(device, 0x611498 + (chid * 0x0010), 0x00010000);
-	nvkm_wr32(device, 0x61149c + (chid * 0x0010), 0x00000001);
-	nvkm_mask(device, 0x610490 + (chid * 0x0010), 0x00000010, 0x00000010);
-	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
-	nvkm_wr32(device, 0x610490 + (chid * 0x0010), 0x00000013);
+	nvkm_wr32(device, 0x611494 + (ctrl * 0x0010), chan->push);
+	nvkm_wr32(device, 0x611498 + (ctrl * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61149c + (ctrl * 0x0010), 0x00000001);
+	nvkm_mask(device, 0x610490 + (ctrl * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610490 + (ctrl * 0x0010), 0x00000013);
 
 	/* wait for it to go inactive */
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x80000000))
+		if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x80000000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d init: %08x\n", chid,
-			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d init: %08x\n", user,
+			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
@@ -59,8 +60,8 @@ gp104_disp_dmac_init(struct nv50_disp_dmac *chan)
 }
 
 const struct nv50_disp_dmac_func
-gp104_disp_dmac_func = {
-	.init = gp104_disp_dmac_init,
+gp102_disp_dmac_func = {
+	.init = gp102_disp_dmac_init,
 	.fini = gf119_disp_dmac_fini,
 	.bind = gf119_disp_dmac_bind,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
index 9c6645a357b9..070ec5e18fdb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.c
@@ -137,7 +137,6 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 		    const struct nvkm_oclass *oclass,
 		    struct nvkm_object **pobject)
 {
-	struct nvkm_device *device = root->disp->base.engine.subdev.device;
 	struct nvkm_client *client = oclass->client;
 	struct nvkm_dmaobj *dmaobj;
 	struct nv50_disp_dmac *chan;
@@ -149,13 +148,13 @@ nv50_disp_dmac_new_(const struct nv50_disp_dmac_func *func,
 	chan->func = func;
 
 	ret = nv50_disp_chan_ctor(&nv50_disp_dmac_func_, mthd, root,
-				  chid, head, oclass, &chan->base);
+				  chid, chid, head, oclass, &chan->base);
 	if (ret)
 		return ret;
 
-	dmaobj = nvkm_dma_search(device->dma, client, push);
-	if (!dmaobj)
-		return -ENOENT;
+	dmaobj = nvkm_dmaobj_search(client, push);
+	if (IS_ERR(dmaobj))
+		return PTR_ERR(dmaobj);
 
 	if (dmaobj->limit - dmaobj->start != 0xfff)
 		return -EINVAL;
@@ -179,9 +178,9 @@ nv50_disp_dmac_bind(struct nv50_disp_dmac *chan,
 		    struct nvkm_object *object, u32 handle)
 {
 	return nvkm_ramht_insert(chan->base.root->ramht, object,
-				 chan->base.chid, -10, handle,
-				 chan->base.chid << 28 |
-				 chan->base.chid);
+				 chan->base.chid.user, -10, handle,
+				 chan->base.chid.user << 28 |
+				 chan->base.chid.user);
 }
 
 static void
@@ -190,21 +189,22 @@ nv50_disp_dmac_fini(struct nv50_disp_dmac *chan)
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->base.chid;
+	int ctrl = chan->base.chid.ctrl;
+	int user = chan->base.chid.user;
 
 	/* deactivate channel */
-	nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00001010, 0x00001000);
-	nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00000003, 0x00000000);
+	nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00001010, 0x00001000);
+	nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00000003, 0x00000000);
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x001e0000))
+		if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x001e0000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d fini timeout, %08x\n", chid,
-			   nvkm_rd32(device, 0x610200 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d fini timeout, %08x\n", user,
+			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 	}
 
 	/* disable error reporting and completion notifications */
-	nvkm_mask(device, 0x610028, 0x00010001 << chid, 0x00000000 << chid);
+	nvkm_mask(device, 0x610028, 0x00010001 << user, 0x00000000 << user);
 }
 
 static int
@@ -213,26 +213,27 @@ nv50_disp_dmac_init(struct nv50_disp_dmac *chan)
 	struct nv50_disp *disp = chan->base.root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->base.chid;
+	int ctrl = chan->base.chid.ctrl;
+	int user = chan->base.chid.user;
 
 	/* enable error reporting */
-	nvkm_mask(device, 0x610028, 0x00010000 << chid, 0x00010000 << chid);
+	nvkm_mask(device, 0x610028, 0x00010000 << user, 0x00010000 << user);
 
 	/* initialise channel for dma command submission */
-	nvkm_wr32(device, 0x610204 + (chid * 0x0010), chan->push);
-	nvkm_wr32(device, 0x610208 + (chid * 0x0010), 0x00010000);
-	nvkm_wr32(device, 0x61020c + (chid * 0x0010), chid);
-	nvkm_mask(device, 0x610200 + (chid * 0x0010), 0x00000010, 0x00000010);
-	nvkm_wr32(device, 0x640000 + (chid * 0x1000), 0x00000000);
-	nvkm_wr32(device, 0x610200 + (chid * 0x0010), 0x00000013);
+	nvkm_wr32(device, 0x610204 + (ctrl * 0x0010), chan->push);
+	nvkm_wr32(device, 0x610208 + (ctrl * 0x0010), 0x00010000);
+	nvkm_wr32(device, 0x61020c + (ctrl * 0x0010), ctrl);
+	nvkm_mask(device, 0x610200 + (ctrl * 0x0010), 0x00000010, 0x00000010);
+	nvkm_wr32(device, 0x640000 + (ctrl * 0x1000), 0x00000000);
+	nvkm_wr32(device, 0x610200 + (ctrl * 0x0010), 0x00000013);
 
 	/* wait for it to go inactive */
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x80000000))
+		if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x80000000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d init timeout, %08x\n", chid,
-			   nvkm_rd32(device, 0x610200 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d init timeout, %08x\n", user,
+			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
index 43ac05857853..ea4a0d062e31 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dmacnv50.h
@@ -30,7 +30,7 @@ int gf119_disp_dmac_bind(struct nv50_disp_dmac *, struct nvkm_object *, u32);
 extern const struct nv50_disp_dmac_func gf119_disp_core_func;
 void gf119_disp_core_fini(struct nv50_disp_dmac *);
 
-extern const struct nv50_disp_dmac_func gp104_disp_dmac_func;
+extern const struct nv50_disp_dmac_func gp102_disp_dmac_func;
 
 struct nv50_disp_dmac_oclass {
 	int (*ctor)(const struct nv50_disp_dmac_func *,
@@ -95,7 +95,7 @@ extern const struct nv50_disp_dmac_oclass gm200_disp_core_oclass;
 
 extern const struct nv50_disp_dmac_oclass gp100_disp_core_oclass;
 
-extern const struct nv50_disp_dmac_oclass gp104_disp_core_oclass;
-extern const struct nv50_disp_dmac_oclass gp104_disp_base_oclass;
-extern const struct nv50_disp_dmac_oclass gp104_disp_ovly_oclass;
+extern const struct nv50_disp_dmac_oclass gp102_disp_core_oclass;
+extern const struct nv50_disp_dmac_oclass gp102_disp_base_oclass;
+extern const struct nv50_disp_dmac_oclass gp102_disp_ovly_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
index 9688970eca47..4a93ceb850ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.c
@@ -319,9 +319,8 @@ static const struct dp_rates {
 };
 
 void
-nvkm_dp_train(struct work_struct *w)
+nvkm_dp_train(struct nvkm_output_dp *outp)
 {
-	struct nvkm_output_dp *outp = container_of(w, typeof(*outp), lt.work);
 	struct nv50_disp *disp = nv50_disp(outp->base.disp);
 	const struct dp_rates *cfg = nvkm_dp_rates;
 	struct dp_state _dp = {
@@ -353,9 +352,6 @@ nvkm_dp_train(struct work_struct *w)
 	}
 	cfg--;
 
-	/* disable link interrupt handling during link training */
-	nvkm_notify_put(&outp->irq);
-
 	/* ensure sink is not in a low-power state */
 	if (!nvkm_rdaux(outp->aux, DPCD_SC00, &pwr, 1)) {
 		if ((pwr & DPCD_SC00_SET_POWER) != DPCD_SC00_SET_POWER_D0) {
@@ -400,9 +396,6 @@ nvkm_dp_train(struct work_struct *w)
 
 	dp_link_train_fini(dp);
 
-	/* signal completion and enable link interrupt handling */
 	OUTP_DBG(&outp->base, "training complete");
 	atomic_set(&outp->lt.done, 1);
-	wake_up(&outp->lt.wait);
-	nvkm_notify_get(&outp->irq);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
index 6e10c5e0ef11..baf1dd9ff975 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/dport.h
@@ -1,6 +1,6 @@
 #ifndef __NVKM_DISP_DPORT_H__
 #define __NVKM_DISP_DPORT_H__
-#include <core/os.h>
+struct nvkm_output_dp;
 
 /* DPCD Receiver Capabilities */
 #define DPCD_RC00_DPCD_REV                                              0x00000
@@ -77,5 +77,5 @@
 #define DPCD_SC00_SET_POWER_D0                                             0x01
 #define DPCD_SC00_SET_POWER_D3                                             0x03
 
-void nvkm_dp_train(struct work_struct *);
+void nvkm_dp_train(struct nvkm_output_dp *);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
index 29e84b241cca..7b346ccc38b7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gf119.c
@@ -203,17 +203,20 @@ gf119_disp_intr_unk2_0(struct nv50_disp *disp, int head)
 	/* see note in nv50_disp_intr_unk20_0() */
 	if (outp && outp->info.type == DCB_OUTPUT_DP) {
 		struct nvkm_output_dp *outpdp = nvkm_output_dp(outp);
-		struct nvbios_init init = {
-			.subdev = subdev,
-			.bios = subdev->device->bios,
-			.outp = &outp->info,
-			.crtc = head,
-			.offset = outpdp->info.script[4],
-			.execute = 1,
-		};
+		if (!outpdp->lt.mst) {
+			struct nvbios_init init = {
+				.subdev = subdev,
+				.bios = subdev->device->bios,
+				.outp = &outp->info,
+				.crtc = head,
+				.offset = outpdp->info.script[4],
+				.execute = 1,
+			};
 
-		nvbios_exec(&init);
-		atomic_set(&outpdp->lt.done, 0);
+			nvkm_notify_put(&outpdp->irq);
+			nvbios_exec(&init);
+			atomic_set(&outpdp->lt.done, 0);
+		}
 	}
 }
 
@@ -314,7 +317,7 @@ gf119_disp_intr_unk2_2(struct nv50_disp *disp, int head)
 			break;
 		}
 
-		if (nvkm_output_dp_train(outp, pclk, true))
+		if (nvkm_output_dp_train(outp, pclk))
 			OUTP_ERR(outp, "link not trained before attach");
 	} else {
 		if (disp->func->sor.magic)
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c
index 3bf3380336e4..f5d613f82709 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/gp102.c
@@ -25,7 +25,7 @@
 #include "rootnv50.h"
 
 static void
-gp104_disp_intr_error(struct nv50_disp *disp, int chid)
+gp102_disp_intr_error(struct nv50_disp *disp, int chid)
 {
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
@@ -51,12 +51,12 @@ gp104_disp_intr_error(struct nv50_disp *disp, int chid)
 }
 
 static const struct nv50_disp_func
-gp104_disp = {
+gp102_disp = {
 	.intr = gf119_disp_intr,
-	.intr_error = gp104_disp_intr_error,
+	.intr_error = gp102_disp_intr_error,
 	.uevent = &gf119_disp_chan_uevent,
 	.super = gf119_disp_intr_supervisor,
-	.root = &gp104_disp_root_oclass,
+	.root = &gp102_disp_root_oclass,
 	.head.vblank_init = gf119_disp_vblank_init,
 	.head.vblank_fini = gf119_disp_vblank_fini,
 	.head.scanoutpos = gf119_disp_root_scanoutpos,
@@ -75,7 +75,7 @@ gp104_disp = {
 };
 
 int
-gp104_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
+gp102_disp_new(struct nvkm_device *device, int index, struct nvkm_disp **pdisp)
 {
-	return gf119_disp_new_(&gp104_disp, device, index, pdisp);
+	return gf119_disp_new_(&gp102_disp, device, index, pdisp);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
index 6f0436df0219..f8f2f16c22a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/hdagt215.c
@@ -59,7 +59,7 @@ gt215_hda_eld(NV50_DISP_MTHD_V1)
 			);
 		}
 		for (i = 0; i < size; i++)
-			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[0]);
+			nvkm_wr32(device, 0x61c440 + soff, (i << 8) | args->v0.data[i]);
 		for (; i < 0x60; i++)
 			nvkm_wr32(device, 0x61c440 + soff, (i << 8));
 		nvkm_mask(device, 0x61c448 + soff, 0x80000003, 0x80000003);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
index fbb8c7dc18fd..0db8efbf1c2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/nv50.c
@@ -433,8 +433,6 @@ nv50_disp_dptmds_war(struct nvkm_device *device)
 	case 0x94:
 	case 0x96:
 	case 0x98:
-	case 0xaa:
-	case 0xac:
 		return true;
 	default:
 		break;
@@ -590,6 +588,7 @@ nv50_disp_intr_unk20_0(struct nv50_disp *disp, int head)
 			.execute = 1,
 		};
 
+		nvkm_notify_put(&outpdp->irq);
 		nvbios_exec(&init);
 		atomic_set(&outpdp->lt.done, 0);
 	}
@@ -779,7 +778,7 @@ nv50_disp_intr_unk20_2(struct nv50_disp *disp, int head)
 			break;
 		}
 
-		if (nvkm_output_dp_train(outp, datarate / soff, true))
+		if (nvkm_output_dp_train(outp, datarate / soff))
 			OUTP_ERR(outp, "link not trained before attach");
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c
index 54a4ae8d66c6..5ad5d0f5db05 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmg84.c
@@ -33,5 +33,5 @@ g84_disp_oimm_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_oimm_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 5,
+	.chid = { 5, 5 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
index c658db54afc5..1f9fd3403f07 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgf119.c
@@ -33,5 +33,5 @@ gf119_disp_oimm_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_oimm_new,
 	.func = &gf119_disp_pioc_func,
-	.chid = 9,
+	.chid = { 9, 9 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c
index b1fde8c125d6..0c09fe85e952 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgk104.c
@@ -33,5 +33,5 @@ gk104_disp_oimm_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_oimm_new,
 	.func = &gf119_disp_pioc_func,
-	.chid = 9,
+	.chid = { 9, 9 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c
new file mode 100644
index 000000000000..abf82365c671
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgp102.c
@@ -0,0 +1,37 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "channv50.h"
+#include "rootnv50.h"
+
+#include <nvif/class.h>
+
+const struct nv50_disp_pioc_oclass
+gp102_disp_oimm_oclass = {
+	.base.oclass = GK104_DISP_OVERLAY,
+	.base.minver = 0,
+	.base.maxver = 0,
+	.ctor = nv50_disp_oimm_new,
+	.func = &gf119_disp_pioc_func,
+	.chid = { 9, 13 },
+};
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c
index f4e7eb3d1177..1281db28aebd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmgt215.c
@@ -33,5 +33,5 @@ gt215_disp_oimm_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_oimm_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 5,
+	.chid = { 5, 5 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
index 3940b9c966ec..07540f3d32dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/oimmnv50.c
@@ -33,7 +33,7 @@
 int
 nv50_disp_oimm_new(const struct nv50_disp_chan_func *func,
 		   const struct nv50_disp_chan_mthd *mthd,
-		   struct nv50_disp_root *root, int chid,
+		   struct nv50_disp_root *root, int ctrl, int user,
 		   const struct nvkm_oclass *oclass, void *data, u32 size,
 		   struct nvkm_object **pobject)
 {
@@ -54,7 +54,7 @@ nv50_disp_oimm_new(const struct nv50_disp_chan_func *func,
 	} else
 		return ret;
 
-	return nv50_disp_chan_new_(func, mthd, root, chid + head,
+	return nv50_disp_chan_new_(func, mthd, root, ctrl + head, user + head,
 				   head, oclass, pobject);
 }
 
@@ -65,5 +65,5 @@ nv50_disp_oimm_oclass = {
 	.base.maxver = 0,
 	.ctor = nv50_disp_oimm_new,
 	.func = &nv50_disp_pioc_func,
-	.chid = 5,
+	.chid = { 5, 5 },
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.c
index 3b7a9e7a1ea8..de36f73b14dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.c
@@ -31,7 +31,7 @@
 #include <nvif/event.h>
 
 int
-nvkm_output_dp_train(struct nvkm_output *base, u32 datarate, bool wait)
+nvkm_output_dp_train(struct nvkm_output *base, u32 datarate)
 {
 	struct nvkm_output_dp *outp = nvkm_output_dp(base);
 	bool retrain = true;
@@ -39,6 +39,8 @@ nvkm_output_dp_train(struct nvkm_output *base, u32 datarate, bool wait)
 	u32 linkrate;
 	int ret, i;
 
+	mutex_lock(&outp->mutex);
+
 	/* check that the link is trained at a high enough rate */
 	ret = nvkm_rdaux(outp->aux, DPCD_LC00_LINK_BW_SET, link, 2);
 	if (ret) {
@@ -88,19 +90,10 @@ done:
 			outp->dpcd[DPCD_RC02] =
 				outp->base.info.dpconf.link_nr;
 		}
-		atomic_set(&outp->lt.done, 0);
-		schedule_work(&outp->lt.work);
-	} else {
-		nvkm_notify_get(&outp->irq);
-	}
-
-	if (wait) {
-		if (!wait_event_timeout(outp->lt.wait,
-					atomic_read(&outp->lt.done),
-					msecs_to_jiffies(2000)))
-			ret = -ETIMEDOUT;
+		nvkm_dp_train(outp);
 	}
 
+	mutex_unlock(&outp->mutex);
 	return ret;
 }
 
@@ -118,7 +111,7 @@ nvkm_output_dp_enable(struct nvkm_output_dp *outp, bool enable)
 
 		if (!nvkm_rdaux(aux, DPCD_RC00_DPCD_REV, outp->dpcd,
 				sizeof(outp->dpcd))) {
-			nvkm_output_dp_train(&outp->base, 0, true);
+			nvkm_output_dp_train(&outp->base, 0);
 			return;
 		}
 	}
@@ -165,10 +158,10 @@ nvkm_output_dp_irq(struct nvkm_notify *notify)
 	};
 
 	OUTP_DBG(&outp->base, "IRQ: %d", line->mask);
-	nvkm_output_dp_train(&outp->base, 0, true);
+	nvkm_output_dp_train(&outp->base, 0);
 
 	nvkm_event_send(&disp->hpd, rep.mask, conn->index, &rep, sizeof(rep));
-	return NVKM_NOTIFY_DROP;
+	return NVKM_NOTIFY_KEEP;
 }
 
 static void
@@ -177,7 +170,6 @@ nvkm_output_dp_fini(struct nvkm_output *base)
 	struct nvkm_output_dp *outp = nvkm_output_dp(base);
 	nvkm_notify_put(&outp->hpd);
 	nvkm_notify_put(&outp->irq);
-	flush_work(&outp->lt.work);
 	nvkm_output_dp_enable(outp, false);
 }
 
@@ -187,6 +179,7 @@ nvkm_output_dp_init(struct nvkm_output *base)
 	struct nvkm_output_dp *outp = nvkm_output_dp(base);
 	nvkm_notify_put(&outp->base.conn->hpd);
 	nvkm_output_dp_enable(outp, true);
+	nvkm_notify_get(&outp->irq);
 	nvkm_notify_get(&outp->hpd);
 }
 
@@ -238,11 +231,6 @@ nvkm_output_dp_ctor(const struct nvkm_output_dp_func *func,
 	OUTP_DBG(&outp->base, "bios dp %02x %02x %02x %02x",
 		 outp->version, hdr, cnt, len);
 
-	/* link training */
-	INIT_WORK(&outp->lt.work, nvkm_dp_train);
-	init_waitqueue_head(&outp->lt.wait);
-	atomic_set(&outp->lt.done, 0);
-
 	/* link maintenance */
 	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_output_dp_irq, true,
 			       &(struct nvkm_i2c_ntfy_req) {
@@ -257,6 +245,9 @@ nvkm_output_dp_ctor(const struct nvkm_output_dp_func *func,
 		return ret;
 	}
 
+	mutex_init(&outp->mutex);
+	atomic_set(&outp->lt.done, 0);
+
 	/* hotplug detect, replaces gpio-based mechanism with aux events */
 	ret = nvkm_notify_init(NULL, &i2c->event, nvkm_output_dp_hpd, true,
 			       &(struct nvkm_i2c_ntfy_req) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
index 4e983f6d7032..3c83a561cd88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/outpdp.h
@@ -29,10 +29,10 @@ struct nvkm_output_dp {
 	bool present;
 	u8 dpcd[16];
 
+	struct mutex mutex;
 	struct {
-		struct work_struct work;
-		wait_queue_head_t wait;
 		atomic_t done;
+		bool mst;
 	} lt;
 };
 
@@ -41,9 +41,11 @@ struct nvkm_output_dp_func {
 	int (*lnk_pwr)(struct nvkm_output_dp *, int nr);
 	int (*lnk_ctl)(struct nvkm_output_dp *, int nr, int bw, bool ef);
 	int (*drv_ctl)(struct nvkm_output_dp *, int ln, int vs, int pe, int pc);
+	void (*vcpi)(struct nvkm_output_dp *, int head, u8 start_slot,
+		     u8 num_slots, u16 pbn, u16 aligned_pbn);
 };
 
-int nvkm_output_dp_train(struct nvkm_output *, u32 rate, bool wait);
+int nvkm_output_dp_train(struct nvkm_output *, u32 rate);
 
 int nvkm_output_dp_ctor(const struct nvkm_output_dp_func *, struct nvkm_disp *,
 			int index, struct dcb_output *, struct nvkm_i2c_aux *,
@@ -63,6 +65,7 @@ int gf119_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
 int gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *, int, int, bool);
 int gf119_sor_dp_drv_ctl(struct nvkm_output_dp *, int, int, int, int);
+void gf119_sor_dp_vcpi(struct nvkm_output_dp *, int, u8, u8, u16, u16);
 
 int gm107_sor_dp_new(struct nvkm_disp *, int, struct dcb_output *,
 		     struct nvkm_output **);
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c
index 97e2dd2d908e..589bd2f12b41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/ovlygp102.c
@@ -27,12 +27,12 @@
 #include <nvif/class.h>
 
 const struct nv50_disp_dmac_oclass
-gp104_disp_ovly_oclass = {
+gp102_disp_ovly_oclass = {
 	.base.oclass = GK104_DISP_OVERLAY_CONTROL_DMA,
 	.base.minver = 0,
 	.base.maxver = 0,
 	.ctor = nv50_disp_ovly_new,
-	.func = &gp104_disp_dmac_func,
+	.func = &gp102_disp_dmac_func,
 	.mthd = &gk104_disp_ovly_chan_mthd,
 	.chid = 5,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
index a625a9876e34..0abaa6431943 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocgf119.c
@@ -32,20 +32,21 @@ gf119_disp_pioc_fini(struct nv50_disp_chan *chan)
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->chid;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
-	nvkm_mask(device, 0x610490 + (chid * 0x10), 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x610490 + (ctrl * 0x10), 0x00000001, 0x00000000);
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610490 + (chid * 0x10)) & 0x00030000))
+		if (!(nvkm_rd32(device, 0x610490 + (ctrl * 0x10)) & 0x00030000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d fini: %08x\n", chid,
-			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d fini: %08x\n", user,
+			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 	}
 
 	/* disable error reporting and completion notification */
-	nvkm_mask(device, 0x610090, 0x00000001 << chid, 0x00000000);
-	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000000);
+	nvkm_mask(device, 0x610090, 0x00000001 << user, 0x00000000);
+	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000000);
 }
 
 static int
@@ -54,20 +55,21 @@ gf119_disp_pioc_init(struct nv50_disp_chan *chan)
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->chid;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
 	/* enable error reporting */
-	nvkm_mask(device, 0x6100a0, 0x00000001 << chid, 0x00000001 << chid);
+	nvkm_mask(device, 0x6100a0, 0x00000001 << user, 0x00000001 << user);
 
 	/* activate channel */
-	nvkm_wr32(device, 0x610490 + (chid * 0x10), 0x00000001);
+	nvkm_wr32(device, 0x610490 + (ctrl * 0x10), 0x00000001);
 	if (nvkm_msec(device, 2000,
-		u32 tmp = nvkm_rd32(device, 0x610490 + (chid * 0x10));
+		u32 tmp = nvkm_rd32(device, 0x610490 + (ctrl * 0x10));
 		if ((tmp & 0x00030000) == 0x00010000)
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d init: %08x\n", chid,
-			   nvkm_rd32(device, 0x610490 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d init: %08x\n", user,
+			   nvkm_rd32(device, 0x610490 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
index 9d2618dacf20..0211e0e8a35f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/piocnv50.c
@@ -32,15 +32,16 @@ nv50_disp_pioc_fini(struct nv50_disp_chan *chan)
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->chid;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
-	nvkm_mask(device, 0x610200 + (chid * 0x10), 0x00000001, 0x00000000);
+	nvkm_mask(device, 0x610200 + (ctrl * 0x10), 0x00000001, 0x00000000);
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x00030000))
+		if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x00030000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d timeout: %08x\n", chid,
-			   nvkm_rd32(device, 0x610200 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d timeout: %08x\n", user,
+			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 	}
 }
 
@@ -50,26 +51,27 @@ nv50_disp_pioc_init(struct nv50_disp_chan *chan)
 	struct nv50_disp *disp = chan->root->disp;
 	struct nvkm_subdev *subdev = &disp->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	int chid = chan->chid;
+	int ctrl = chan->chid.ctrl;
+	int user = chan->chid.user;
 
-	nvkm_wr32(device, 0x610200 + (chid * 0x10), 0x00002000);
+	nvkm_wr32(device, 0x610200 + (ctrl * 0x10), 0x00002000);
 	if (nvkm_msec(device, 2000,
-		if (!(nvkm_rd32(device, 0x610200 + (chid * 0x10)) & 0x00030000))
+		if (!(nvkm_rd32(device, 0x610200 + (ctrl * 0x10)) & 0x00030000))
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d timeout0: %08x\n", chid,
-			   nvkm_rd32(device, 0x610200 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d timeout0: %08x\n", user,
+			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
-	nvkm_wr32(device, 0x610200 + (chid * 0x10), 0x00000001);
+	nvkm_wr32(device, 0x610200 + (ctrl * 0x10), 0x00000001);
 	if (nvkm_msec(device, 2000,
-		u32 tmp = nvkm_rd32(device, 0x610200 + (chid * 0x10));
+		u32 tmp = nvkm_rd32(device, 0x610200 + (ctrl * 0x10));
 		if ((tmp & 0x00030000) == 0x00010000)
 			break;
 	) < 0) {
-		nvkm_error(subdev, "ch %d timeout1: %08x\n", chid,
-			   nvkm_rd32(device, 0x610200 + (chid * 0x10)));
+		nvkm_error(subdev, "ch %d timeout1: %08x\n", user,
+			   nvkm_rd32(device, 0x610200 + (ctrl * 0x10)));
 		return -EBUSY;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c
index 8443e04dc626..37122ca579ad 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootgp102.c
@@ -27,32 +27,32 @@
 #include <nvif/class.h>
 
 static const struct nv50_disp_root_func
-gp104_disp_root = {
+gp102_disp_root = {
 	.init = gf119_disp_root_init,
 	.fini = gf119_disp_root_fini,
 	.dmac = {
-		&gp104_disp_core_oclass,
-		&gp104_disp_base_oclass,
-		&gp104_disp_ovly_oclass,
+		&gp102_disp_core_oclass,
+		&gp102_disp_base_oclass,
+		&gp102_disp_ovly_oclass,
 	},
 	.pioc = {
-		&gk104_disp_oimm_oclass,
-		&gk104_disp_curs_oclass,
+		&gp102_disp_oimm_oclass,
+		&gp102_disp_curs_oclass,
 	},
 };
 
 static int
-gp104_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
+gp102_disp_root_new(struct nvkm_disp *disp, const struct nvkm_oclass *oclass,
 		    void *data, u32 size, struct nvkm_object **pobject)
 {
-	return nv50_disp_root_new_(&gp104_disp_root, disp, oclass,
+	return nv50_disp_root_new_(&gp102_disp_root, disp, oclass,
 				   data, size, pobject);
 }
 
 const struct nvkm_disp_oclass
-gp104_disp_root_oclass = {
-	.base.oclass = GP104_DISP,
+gp102_disp_root_oclass = {
+	.base.oclass = GP102_DISP,
 	.base.minver = -1,
 	.base.maxver = -1,
-	.ctor = gp104_disp_root_new,
+	.ctor = gp102_disp_root_new,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
index 2f9cecd81d04..e70dc6a9ff7d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.c
@@ -66,7 +66,7 @@ nv50_disp_root_scanoutpos(NV50_DISP_MTHD_V0)
 	return 0;
 }
 
-int
+static int
 nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 {
 	union {
@@ -173,13 +173,56 @@ nv50_disp_root_mthd_(struct nvkm_object *object, u32 mthd, void *data, u32 size)
 				return 0;
 			} else
 			if (args->v0.state != 0) {
-				nvkm_output_dp_train(&outpdp->base, 0, true);
+				nvkm_output_dp_train(&outpdp->base, 0);
 				return 0;
 			}
 		} else
 			return ret;
 	}
 		break;
+	case NV50_DISP_MTHD_V1_SOR_DP_MST_LINK: {
+		struct nvkm_output_dp *outpdp = nvkm_output_dp(outp);
+		union {
+			struct nv50_disp_sor_dp_mst_link_v0 v0;
+		} *args = data;
+		int ret = -ENOSYS;
+		nvif_ioctl(object, "disp sor dp mst link size %d\n", size);
+		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+			nvif_ioctl(object, "disp sor dp mst link vers %d state %d\n",
+				   args->v0.version, args->v0.state);
+			if (outpdp->lt.mst != !!args->v0.state) {
+				outpdp->lt.mst = !!args->v0.state;
+				atomic_set(&outpdp->lt.done, 0);
+				nvkm_output_dp_train(&outpdp->base, 0);
+			}
+			return 0;
+		} else
+			return ret;
+	}
+		break;
+	case NV50_DISP_MTHD_V1_SOR_DP_MST_VCPI: {
+		struct nvkm_output_dp *outpdp = nvkm_output_dp(outp);
+		union {
+			struct nv50_disp_sor_dp_mst_vcpi_v0 v0;
+		} *args = data;
+		int ret = -ENOSYS;
+		nvif_ioctl(object, "disp sor dp mst vcpi size %d\n", size);
+		if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
+			nvif_ioctl(object, "disp sor dp mst vcpi vers %d "
+					   "slot %02x/%02x pbn %04x/%04x\n",
+				   args->v0.version, args->v0.start_slot,
+				   args->v0.num_slots, args->v0.pbn,
+				   args->v0.aligned_pbn);
+			if (!outpdp->func->vcpi)
+				return -ENODEV;
+			outpdp->func->vcpi(outpdp, head, args->v0.start_slot,
+					   args->v0.num_slots, args->v0.pbn,
+					   args->v0.aligned_pbn);
+			return 0;
+		} else
+			return ret;
+	}
+		break;
 	case NV50_DISP_MTHD_V1_PIOR_PWR:
 		if (!func->pior.power)
 			return -ENODEV;
@@ -207,8 +250,8 @@ nv50_disp_root_pioc_new_(const struct nvkm_oclass *oclass,
 {
 	const struct nv50_disp_pioc_oclass *sclass = oclass->priv;
 	struct nv50_disp_root *root = nv50_disp_root(oclass->parent);
-	return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid,
-			    oclass, data, size, pobject);
+	return sclass->ctor(sclass->func, sclass->mthd, root, sclass->chid.ctrl,
+			    sclass->chid.user, oclass, data, size, pobject);
 }
 
 static int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
index ad00f1724b72..b147cf5b3518 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/rootnv50.h
@@ -41,5 +41,5 @@ extern const struct nvkm_disp_oclass gk110_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm107_disp_root_oclass;
 extern const struct nvkm_disp_oclass gm200_disp_root_oclass;
 extern const struct nvkm_disp_oclass gp100_disp_root_oclass;
-extern const struct nvkm_disp_oclass gp104_disp_root_oclass;
+extern const struct nvkm_disp_oclass gp102_disp_root_oclass;
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
index 1bb9d661e9b3..627b9ee1ddd2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorg94.c
@@ -39,21 +39,6 @@ g94_sor_loff(struct nvkm_output_dp *outp)
 }
 
 /*******************************************************************************
- * TMDS/LVDS
- ******************************************************************************/
-static const struct nvkm_output_func
-g94_sor_output_func = {
-};
-
-int
-g94_sor_output_new(struct nvkm_disp *disp, int index,
-		   struct dcb_output *dcbE, struct nvkm_output **poutp)
-{
-	return nvkm_output_new_(&g94_sor_output_func, disp,
-				index, dcbE, poutp);
-}
-
-/*******************************************************************************
  * DisplayPort
  ******************************************************************************/
 u32
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
index 49bd5da194e1..6ffdaa65aa77 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgf119.c
@@ -56,11 +56,13 @@ gf119_sor_dp_lnk_ctl(struct nvkm_output_dp *outp, int nr, int bw, bool ef)
 
 	clksor |= bw << 18;
 	dpctrl |= ((1 << nr) - 1) << 16;
+	if (outp->lt.mst)
+		dpctrl |= 0x40000000;
 	if (ef)
 		dpctrl |= 0x00004000;
 
 	nvkm_mask(device, 0x612300 + soff, 0x007c0000, clksor);
-	nvkm_mask(device, 0x61c10c + loff, 0x001f4000, dpctrl);
+	nvkm_mask(device, 0x61c10c + loff, 0x401f4000, dpctrl);
 	return 0;
 }
 
@@ -101,12 +103,24 @@ gf119_sor_dp_drv_ctl(struct nvkm_output_dp *outp,
 	return 0;
 }
 
+void
+gf119_sor_dp_vcpi(struct nvkm_output_dp *outp, int head, u8 slot,
+		  u8 slot_nr, u16 pbn, u16 aligned)
+{
+	struct nvkm_device *device = outp->base.disp->engine.subdev.device;
+	const u32 hoff = head * 0x800;
+
+	nvkm_mask(device, 0x616588 + hoff, 0x00003f3f, (slot_nr << 8) | slot);
+	nvkm_mask(device, 0x61658c + hoff, 0xffffffff, (aligned << 16) | pbn);
+}
+
 static const struct nvkm_output_dp_func
 gf119_sor_dp_func = {
 	.pattern = gf119_sor_dp_pattern,
 	.lnk_pwr = g94_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gf119_sor_dp_drv_ctl,
+	.vcpi = gf119_sor_dp_vcpi,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
index 37790b2617c5..4cf8ad4d18ab 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm107.c
@@ -43,6 +43,7 @@ gm107_sor_dp_func = {
 	.lnk_pwr = g94_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gf119_sor_dp_drv_ctl,
+	.vcpi = gf119_sor_dp_vcpi,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
index c44fa7ea672a..81b788fa61be 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/disp/sorgm200.c
@@ -120,6 +120,7 @@ gm200_sor_dp_func = {
 	.lnk_pwr = gm200_sor_dp_lnk_pwr,
 	.lnk_ctl = gf119_sor_dp_lnk_ctl,
 	.drv_ctl = gm200_sor_dp_drv_ctl,
+	.vcpi = gf119_sor_dp_vcpi,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
index f11ebdd16c77..11b7b8fd5dda 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/base.c
@@ -28,24 +28,6 @@
 
 #include <nvif/class.h>
 
-struct nvkm_dmaobj *
-nvkm_dma_search(struct nvkm_dma *dma, struct nvkm_client *client, u64 object)
-{
-	struct rb_node *node = client->dmaroot.rb_node;
-	while (node) {
-		struct nvkm_dmaobj *dmaobj =
-			container_of(node, typeof(*dmaobj), rb);
-		if (object < dmaobj->handle)
-			node = node->rb_left;
-		else
-		if (object > dmaobj->handle)
-			node = node->rb_right;
-		else
-			return dmaobj;
-	}
-	return NULL;
-}
-
 static int
 nvkm_dma_oclass_new(struct nvkm_device *device,
 		    const struct nvkm_oclass *oclass, void *data, u32 size,
@@ -53,34 +35,12 @@ nvkm_dma_oclass_new(struct nvkm_device *device,
 {
 	struct nvkm_dma *dma = nvkm_dma(oclass->engine);
 	struct nvkm_dmaobj *dmaobj = NULL;
-	struct nvkm_client *client = oclass->client;
-	struct rb_node **ptr = &client->dmaroot.rb_node;
-	struct rb_node *parent = NULL;
 	int ret;
 
 	ret = dma->func->class_new(dma, oclass, data, size, &dmaobj);
 	if (dmaobj)
 		*pobject = &dmaobj->object;
-	if (ret)
-		return ret;
-
-	dmaobj->handle = oclass->object;
-
-	while (*ptr) {
-		struct nvkm_dmaobj *obj = container_of(*ptr, typeof(*obj), rb);
-		parent = *ptr;
-		if (dmaobj->handle < obj->handle)
-			ptr = &parent->rb_left;
-		else
-		if (dmaobj->handle > obj->handle)
-			ptr = &parent->rb_right;
-		else
-			return -EEXIST;
-	}
-
-	rb_link_node(&dmaobj->rb, parent, ptr);
-	rb_insert_color(&dmaobj->rb, &client->dmaroot);
-	return 0;
+	return ret;
 }
 
 static const struct nvkm_device_oclass
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
index 13c661b1ef14..d20cc0681a88 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/dma/user.c
@@ -31,6 +31,19 @@
 #include <nvif/cl0002.h>
 #include <nvif/unpack.h>
 
+static const struct nvkm_object_func nvkm_dmaobj_func;
+struct nvkm_dmaobj *
+nvkm_dmaobj_search(struct nvkm_client *client, u64 handle)
+{
+	struct nvkm_object *object;
+
+	object = nvkm_object_search(client, handle, &nvkm_dmaobj_func);
+	if (IS_ERR(object))
+		return (void *)object;
+
+	return nvkm_dmaobj(object);
+}
+
 static int
 nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 		 int align, struct nvkm_gpuobj **pgpuobj)
@@ -42,10 +55,7 @@ nvkm_dmaobj_bind(struct nvkm_object *base, struct nvkm_gpuobj *gpuobj,
 static void *
 nvkm_dmaobj_dtor(struct nvkm_object *base)
 {
-	struct nvkm_dmaobj *dmaobj = nvkm_dmaobj(base);
-	if (!RB_EMPTY_NODE(&dmaobj->rb))
-		rb_erase(&dmaobj->rb, &dmaobj->object.client->dmaroot);
-	return dmaobj;
+	return nvkm_dmaobj(base);
 }
 
 static const struct nvkm_object_func
@@ -74,7 +84,6 @@ nvkm_dmaobj_ctor(const struct nvkm_dmaobj_func *func, struct nvkm_dma *dma,
 	nvkm_object_ctor(&nvkm_dmaobj_func, oclass, &dmaobj->object);
 	dmaobj->func = func;
 	dmaobj->dma = dma;
-	RB_CLEAR_NODE(&dmaobj->rb);
 
 	nvif_ioctl(parent, "create dma size %d\n", *psize);
 	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, true))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
index 1c9682ae3a6b..660ca7aa95ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/base.c
@@ -32,6 +32,17 @@
 #include <nvif/unpack.h>
 
 void
+nvkm_fifo_recover_chan(struct nvkm_fifo *fifo, int chid)
+{
+	unsigned long flags;
+	if (WARN_ON(!fifo->func->recover_chan))
+		return;
+	spin_lock_irqsave(&fifo->lock, flags);
+	fifo->func->recover_chan(fifo, chid);
+	spin_unlock_irqrestore(&fifo->lock, flags);
+}
+
+void
 nvkm_fifo_pause(struct nvkm_fifo *fifo, unsigned long *flags)
 {
 	return fifo->func->pause(fifo, flags);
@@ -55,19 +66,29 @@ nvkm_fifo_chan_put(struct nvkm_fifo *fifo, unsigned long flags,
 }
 
 struct nvkm_fifo_chan *
-nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *fifo, u64 inst)
 {
 	struct nvkm_fifo_chan *chan;
-	unsigned long flags;
-	spin_lock_irqsave(&fifo->lock, flags);
 	list_for_each_entry(chan, &fifo->chan, head) {
 		if (chan->inst->addr == inst) {
 			list_del(&chan->head);
 			list_add(&chan->head, &fifo->chan);
-			*rflags = flags;
 			return chan;
 		}
 	}
+	return NULL;
+}
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst(struct nvkm_fifo *fifo, u64 inst, unsigned long *rflags)
+{
+	struct nvkm_fifo_chan *chan;
+	unsigned long flags;
+	spin_lock_irqsave(&fifo->lock, flags);
+	if ((chan = nvkm_fifo_chan_inst_locked(fifo, inst))) {
+		*rflags = flags;
+		return chan;
+	}
 	spin_unlock_irqrestore(&fifo->lock, flags);
 	return NULL;
 }
@@ -90,9 +111,34 @@ nvkm_fifo_chan_chid(struct nvkm_fifo *fifo, int chid, unsigned long *rflags)
 	return NULL;
 }
 
+void
+nvkm_fifo_kevent(struct nvkm_fifo *fifo, int chid)
+{
+	nvkm_event_send(&fifo->kevent, 1, chid, NULL, 0);
+}
+
 static int
-nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
-		     struct nvkm_notify *notify)
+nvkm_fifo_kevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
+{
+	struct nvkm_fifo_chan *chan = nvkm_fifo_chan(object);
+	if (size == 0) {
+		notify->size  = 0;
+		notify->types = 1;
+		notify->index = chan->chid;
+		return 0;
+	}
+	return -ENOSYS;
+}
+
+static const struct nvkm_event_func
+nvkm_fifo_kevent_func = {
+	.ctor = nvkm_fifo_kevent_ctor,
+};
+
+static int
+nvkm_fifo_cevent_ctor(struct nvkm_object *object, void *data, u32 size,
+		      struct nvkm_notify *notify)
 {
 	if (size == 0) {
 		notify->size  = 0;
@@ -104,10 +150,16 @@ nvkm_fifo_event_ctor(struct nvkm_object *object, void *data, u32 size,
 }
 
 static const struct nvkm_event_func
-nvkm_fifo_event_func = {
-	.ctor = nvkm_fifo_event_ctor,
+nvkm_fifo_cevent_func = {
+	.ctor = nvkm_fifo_cevent_ctor,
 };
 
+void
+nvkm_fifo_cevent(struct nvkm_fifo *fifo)
+{
+	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+}
+
 static void
 nvkm_fifo_uevent_fini(struct nvkm_event *event, int type, int index)
 {
@@ -241,6 +293,7 @@ nvkm_fifo_dtor(struct nvkm_engine *engine)
 	void *data = fifo;
 	if (fifo->func->dtor)
 		data = fifo->func->dtor(fifo);
+	nvkm_event_fini(&fifo->kevent);
 	nvkm_event_fini(&fifo->cevent);
 	nvkm_event_fini(&fifo->uevent);
 	return data;
@@ -283,5 +336,9 @@ nvkm_fifo_ctor(const struct nvkm_fifo_func *func, struct nvkm_device *device,
 			return ret;
 	}
 
-	return nvkm_event_init(&nvkm_fifo_event_func, 1, 1, &fifo->cevent);
+	ret = nvkm_event_init(&nvkm_fifo_cevent_func, 1, 1, &fifo->cevent);
+	if (ret)
+		return ret;
+
+	return nvkm_event_init(&nvkm_fifo_kevent_func, 1, nr, &fifo->kevent);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
index dc6d4678f228..fab760ae922f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.c
@@ -371,9 +371,9 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 
 	/* allocate push buffer ctxdma instance */
 	if (push) {
-		dmaobj = nvkm_dma_search(device->dma, oclass->client, push);
-		if (!dmaobj)
-			return -ENOENT;
+		dmaobj = nvkm_dmaobj_search(client, push);
+		if (IS_ERR(dmaobj))
+			return PTR_ERR(dmaobj);
 
 		ret = nvkm_object_bind(&dmaobj->object, chan->inst, -16,
 				       &chan->push);
@@ -410,6 +410,6 @@ nvkm_fifo_chan_ctor(const struct nvkm_fifo_chan_func *func,
 		     base + user * chan->chid;
 	chan->size = user;
 
-	nvkm_event_send(&fifo->cevent, 1, 0, NULL, 0);
+	nvkm_fifo_cevent(fifo);
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
index 55dc415c5c08..d8019bdacd61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chan.h
@@ -29,5 +29,5 @@ struct nvkm_fifo_chan_oclass {
 	struct nvkm_sclass base;
 };
 
-int g84_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
+int gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *, u32, struct nvkm_event **);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
index aeb3387a3fb0..61797c4dd07a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/chang84.c
@@ -30,12 +30,12 @@
 
 #include <nvif/cl826e.h>
 
-int
+static int
 g84_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
 		   struct nvkm_event **pevent)
 {
 	switch (type) {
-	case G82_CHANNEL_DMA_V0_NTFY_UEVENT:
+	case NV826E_V0_NTFY_NON_STALL_INTERRUPT:
 		*pevent = &chan->fifo->uevent;
 		return 0;
 	default:
@@ -129,7 +129,7 @@ g84_fifo_chan_engine_fini(struct nvkm_fifo_chan *base,
 }
 
 
-int
+static int
 g84_fifo_chan_engine_init(struct nvkm_fifo_chan *base,
 			  struct nvkm_engine *engine)
 {
@@ -170,7 +170,7 @@ g84_fifo_chan_engine_ctor(struct nvkm_fifo_chan *base,
 	return nvkm_object_bind(object, NULL, 0, &chan->engn[engn]);
 }
 
-int
+static int
 g84_fifo_chan_object_ctor(struct nvkm_fifo_chan *base,
 			  struct nvkm_object *object)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
index 352a0baec84d..cd468ab1db12 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gf100.c
@@ -68,7 +68,14 @@ gf100_fifo_runlist_commit(struct gf100_fifo *fifo)
 	}
 	nvkm_done(cur);
 
-	target = (nvkm_memory_target(cur) == NVKM_MEM_TARGET_HOST) ? 0x3 : 0x0;
+	switch (nvkm_memory_target(cur)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		mutex_unlock(&subdev->mutex);
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(cur) >> 12) |
 				    (target << 28));
@@ -180,8 +187,10 @@ gf100_fifo_recover(struct gf100_fifo *fifo, struct nvkm_engine *engine,
 	list_del_init(&chan->head);
 	chan->killed = true;
 
-	fifo->recover.mask |= 1ULL << engine->subdev.index;
+	if (engine != &fifo->base.engine)
+		fifo->recover.mask |= 1ULL << engine->subdev.index;
 	schedule_work(&fifo->recover.work);
+	nvkm_fifo_kevent(&fifo->base, chid);
 }
 
 static const struct nvkm_enum
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
index 103c0afaaa6d..3a24788c3185 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
@@ -27,11 +27,71 @@
 #include <core/client.h>
 #include <core/gpuobj.h>
 #include <subdev/bar.h>
+#include <subdev/timer.h>
 #include <subdev/top.h>
 #include <engine/sw.h>
 
 #include <nvif/class.h>
 
+struct gk104_fifo_engine_status {
+	bool busy;
+	bool faulted;
+	bool chsw;
+	bool save;
+	bool load;
+	struct {
+		bool tsg;
+		u32 id;
+	} prev, next, *chan;
+};
+
+static void
+gk104_fifo_engine_status(struct gk104_fifo *fifo, int engn,
+			 struct gk104_fifo_engine_status *status)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
+
+	status->busy     = !!(stat & 0x80000000);
+	status->faulted  = !!(stat & 0x40000000);
+	status->next.tsg = !!(stat & 0x10000000);
+	status->next.id  =   (stat & 0x0fff0000) >> 16;
+	status->chsw     = !!(stat & 0x00008000);
+	status->save     = !!(stat & 0x00004000);
+	status->load     = !!(stat & 0x00002000);
+	status->prev.tsg = !!(stat & 0x00001000);
+	status->prev.id  =   (stat & 0x00000fff);
+	status->chan     = NULL;
+
+	if (status->busy && status->chsw) {
+		if (status->load && status->save) {
+			if (engine && nvkm_engine_chsw_load(engine))
+				status->chan = &status->next;
+			else
+				status->chan = &status->prev;
+		} else
+		if (status->load) {
+			status->chan = &status->next;
+		} else {
+			status->chan = &status->prev;
+		}
+	} else
+	if (status->load) {
+		status->chan = &status->prev;
+	}
+
+	nvkm_debug(subdev, "engine %02d: busy %d faulted %d chsw %d "
+			   "save %d load %d %sid %d%s-> %sid %d%s\n",
+		   engn, status->busy, status->faulted,
+		   status->chsw, status->save, status->load,
+		   status->prev.tsg ? "tsg" : "ch", status->prev.id,
+		   status->chan == &status->prev ? "*" : " ",
+		   status->next.tsg ? "tsg" : "ch", status->next.id,
+		   status->chan == &status->next ? "*" : " ");
+}
+
 static int
 gk104_fifo_class_get(struct nvkm_fifo *base, int index,
 		     const struct nvkm_fifo_chan_oclass **psclass)
@@ -83,10 +143,13 @@ gk104_fifo_runlist_commit(struct gk104_fifo *fifo, int runl)
 	}
 	nvkm_done(mem);
 
-	if (nvkm_memory_target(mem) == NVKM_MEM_TARGET_VRAM)
-		target = 0;
-	else
-		target = 3;
+	switch (nvkm_memory_target(mem)) {
+	case NVKM_MEM_TARGET_VRAM: target = 0; break;
+	case NVKM_MEM_TARGET_NCOH: target = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
 
 	nvkm_wr32(device, 0x002270, (nvkm_memory_addr(mem) >> 12) |
 				    (target << 28));
@@ -149,31 +212,137 @@ gk104_fifo_recover_work(struct work_struct *w)
 	nvkm_mask(device, 0x002630, runm, 0x00000000);
 }
 
+static void gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn);
+
 static void
-gk104_fifo_recover(struct gk104_fifo *fifo, struct nvkm_engine *engine,
-		   struct gk104_fifo_chan *chan)
+gk104_fifo_recover_runl(struct gk104_fifo *fifo, int runl)
 {
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
-	u32 chid = chan->base.chid;
-	int engn;
+	const u32 runm = BIT(runl);
 
-	nvkm_error(subdev, "%s engine fault on channel %d, recovering...\n",
-		   nvkm_subdev_name[engine->subdev.index], chid);
 	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.runm & runm)
+		return;
+	fifo->recover.runm |= runm;
 
-	nvkm_mask(device, 0x800004 + (chid * 0x08), 0x00000800, 0x00000800);
-	list_del_init(&chan->head);
-	chan->killed = true;
+	/* Block runlist to prevent channel assignment(s) from changing. */
+	nvkm_mask(device, 0x002630, runm, runm);
 
-	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		if (fifo->engine[engn].engine == engine) {
-			fifo->recover.engm |= BIT(engn);
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "runlist %d: scheduled for recovery\n", runl);
+	schedule_work(&fifo->recover.work);
+}
+
+static void
+gk104_fifo_recover_chan(struct nvkm_fifo *base, int chid)
+{
+	struct gk104_fifo *fifo = gk104_fifo(base);
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32  stat = nvkm_rd32(device, 0x800004 + (chid * 0x08));
+	const u32  runl = (stat & 0x000f0000) >> 16;
+	const bool used = (stat & 0x00000001);
+	unsigned long engn, engm = fifo->runlist[runl].engm;
+	struct gk104_fifo_chan *chan;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (!used)
+		return;
+
+	/* Lookup SW state for channel, and mark it as dead. */
+	list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
+		if (chan->base.chid == chid) {
+			list_del_init(&chan->head);
+			chan->killed = true;
+			nvkm_fifo_kevent(&fifo->base, chid);
 			break;
 		}
 	}
 
-	fifo->recover.runm |= BIT(chan->runl);
+	/* Disable channel. */
+	nvkm_wr32(device, 0x800004 + (chid * 0x08), stat | 0x00000800);
+	nvkm_warn(subdev, "channel %d: killed\n", chid);
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Schedule recovery for any engines the channel is on. */
+	for_each_set_bit(engn, &engm, fifo->engine_nr) {
+		struct gk104_fifo_engine_status status;
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.chan || status.chan->id != chid)
+			continue;
+		gk104_fifo_recover_engn(fifo, engn);
+	}
+}
+
+static void
+gk104_fifo_recover_engn(struct gk104_fifo *fifo, int engn)
+{
+	struct nvkm_engine *engine = fifo->engine[engn].engine;
+	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const u32 runl = fifo->engine[engn].runl;
+	const u32 engm = BIT(engn);
+	struct gk104_fifo_engine_status status;
+	int mmui = -1;
+
+	assert_spin_locked(&fifo->base.lock);
+	if (fifo->recover.engm & engm)
+		return;
+	fifo->recover.engm |= engm;
+
+	/* Block channel assignments from changing during recovery. */
+	gk104_fifo_recover_runl(fifo, runl);
+
+	/* Determine which channel (if any) is currently on the engine. */
+	gk104_fifo_engine_status(fifo, engn, &status);
+	if (status.chan) {
+		/* The channel is not longer viable, kill it. */
+		gk104_fifo_recover_chan(&fifo->base, status.chan->id);
+	}
+
+	/* Determine MMU fault ID for the engine, if we're not being
+	 * called from the fault handler already.
+	 */
+	if (!status.faulted && engine) {
+		mmui = nvkm_top_fault_id(device, engine->subdev.index);
+		if (mmui < 0) {
+			const struct nvkm_enum *en = fifo->func->fault.engine;
+			for (; en && en->name; en++) {
+				if (en->data2 == engine->subdev.index) {
+					mmui = en->value;
+					break;
+				}
+			}
+		}
+		WARN_ON(mmui < 0);
+	}
+
+	/* Trigger a MMU fault for the engine.
+	 *
+	 * No good idea why this is needed, but nvgpu does something similar,
+	 * and it makes recovery from CTXSW_TIMEOUT a lot more reliable.
+	 */
+	if (mmui >= 0) {
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000100 | mmui);
+
+		/* Wait for fault to trigger. */
+		nvkm_msec(device, 2000,
+			gk104_fifo_engine_status(fifo, engn, &status);
+			if (status.faulted)
+				break;
+		);
+
+		/* Release MMU fault trigger, and ACK the fault. */
+		nvkm_wr32(device, 0x002a30 + (engn * 0x04), 0x00000000);
+		nvkm_wr32(device, 0x00259c, BIT(mmui));
+		nvkm_wr32(device, 0x002100, 0x10000000);
+	}
+
+	/* Schedule recovery. */
+	nvkm_warn(subdev, "engine %d: scheduled for recovery\n", engn);
 	schedule_work(&fifo->recover.work);
 }
 
@@ -211,34 +380,30 @@ static void
 gk104_fifo_intr_sched_ctxsw(struct gk104_fifo *fifo)
 {
 	struct nvkm_device *device = fifo->base.engine.subdev.device;
-	struct gk104_fifo_chan *chan;
-	unsigned long flags;
+	unsigned long flags, engm = 0;
 	u32 engn;
 
+	/* We need to ACK the SCHED_ERROR here, and prevent it reasserting,
+	 * as MMU_FAULT cannot be triggered while it's pending.
+	 */
 	spin_lock_irqsave(&fifo->base.lock, flags);
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000000);
+	nvkm_wr32(device, 0x002100, 0x00000100);
+
 	for (engn = 0; engn < fifo->engine_nr; engn++) {
-		struct nvkm_engine *engine = fifo->engine[engn].engine;
-		int runl = fifo->engine[engn].runl;
-		u32 stat = nvkm_rd32(device, 0x002640 + (engn * 0x08));
-		u32 busy = (stat & 0x80000000);
-		u32 next = (stat & 0x0fff0000) >> 16;
-		u32 chsw = (stat & 0x00008000);
-		u32 save = (stat & 0x00004000);
-		u32 load = (stat & 0x00002000);
-		u32 prev = (stat & 0x00000fff);
-		u32 chid = load ? next : prev;
-		(void)save;
-
-		if (!busy || !chsw)
+		struct gk104_fifo_engine_status status;
+
+		gk104_fifo_engine_status(fifo, engn, &status);
+		if (!status.busy || !status.chsw)
 			continue;
 
-		list_for_each_entry(chan, &fifo->runlist[runl].chan, head) {
-			if (chan->base.chid == chid && engine) {
-				gk104_fifo_recover(fifo, engine, chan);
-				break;
-			}
-		}
+		engm |= BIT(engn);
 	}
+
+	for_each_set_bit(engn, &engm, fifo->engine_nr)
+		gk104_fifo_recover_engn(fifo, engn);
+
+	nvkm_mask(device, 0x002140, 0x00000100, 0x00000100);
 	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
@@ -301,6 +466,7 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 	struct nvkm_fifo_chan *chan;
 	unsigned long flags;
 	char gpcid[8] = "", en[16] = "";
+	int engn;
 
 	er = nvkm_enum_find(fifo->func->fault.reason, reason);
 	eu = nvkm_enum_find(fifo->func->fault.engine, unit);
@@ -342,7 +508,8 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		snprintf(en, sizeof(en), "%s", eu->name);
 	}
 
-	chan = nvkm_fifo_chan_inst(&fifo->base, (u64)inst << 12, &flags);
+	spin_lock_irqsave(&fifo->base.lock, flags);
+	chan = nvkm_fifo_chan_inst_locked(&fifo->base, (u64)inst << 12);
 
 	nvkm_error(subdev,
 		   "%s fault at %010llx engine %02x [%s] client %02x [%s%s] "
@@ -353,9 +520,23 @@ gk104_fifo_intr_fault(struct gk104_fifo *fifo, int unit)
 		   (u64)inst << 12,
 		   chan ? chan->object.client->name : "unknown");
 
-	if (engine && chan)
-		gk104_fifo_recover(fifo, engine, (void *)chan);
-	nvkm_fifo_chan_put(&fifo->base, flags, &chan);
+
+	/* Kill the channel that caused the fault. */
+	if (chan)
+		gk104_fifo_recover_chan(&fifo->base, chan->chid);
+
+	/* Channel recovery will probably have already done this for the
+	 * correct engine(s), but just in case we can't find the channel
+	 * information...
+	 */
+	for (engn = 0; engn < fifo->engine_nr && engine; engn++) {
+		if (fifo->engine[engn].engine == engine) {
+			gk104_fifo_recover_engn(fifo, engn);
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&fifo->base.lock, flags);
 }
 
 static const struct nvkm_bitfield gk104_fifo_pbdma_intr_0[] = {
@@ -716,6 +897,7 @@ gk104_fifo_ = {
 	.intr = gk104_fifo_intr,
 	.uevent_init = gk104_fifo_uevent_init,
 	.uevent_fini = gk104_fifo_uevent_fini,
+	.recover_chan = gk104_fifo_recover_chan,
 	.class_get = gk104_fifo_class_get,
 };
 
@@ -743,14 +925,14 @@ gk104_fifo_fault_engine[] = {
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0" },
-	{ 0x08, "HOST1" },
-	{ 0x09, "HOST2" },
-	{ 0x0a, "HOST3" },
-	{ 0x0b, "HOST4" },
-	{ 0x0c, "HOST5" },
-	{ 0x0d, "HOST6" },
-	{ 0x0e, "HOST7" },
+	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
+	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
+	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
 	{ 0x0f, "HOSTSR" },
 	{ 0x10, "MSVLD", NULL, NVKM_ENGINE_MSVLD },
 	{ 0x11, "MSPPP", NULL, NVKM_ENGINE_MSPPP },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
index bd1ff877aa06..29c080683b32 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gm107.c
@@ -32,14 +32,14 @@ gm107_fifo_fault_engine[] = {
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
 	{ 0x06, "SCHED" },
-	{ 0x07, "HOST0" },
-	{ 0x08, "HOST1" },
-	{ 0x09, "HOST2" },
-	{ 0x0a, "HOST3" },
-	{ 0x0b, "HOST4" },
-	{ 0x0c, "HOST5" },
-	{ 0x0d, "HOST6" },
-	{ 0x0e, "HOST7" },
+	{ 0x07, "HOST0", NULL, NVKM_ENGINE_FIFO },
+	{ 0x08, "HOST1", NULL, NVKM_ENGINE_FIFO },
+	{ 0x09, "HOST2", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0a, "HOST3", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0b, "HOST4", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0c, "HOST5", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0d, "HOST6", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0e, "HOST7", NULL, NVKM_ENGINE_FIFO },
 	{ 0x0f, "HOSTSR" },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
index eff83f7fb705..b2635aea9f6e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gp100.c
@@ -30,17 +30,17 @@ gp100_fifo_fault_engine[] = {
 	{ 0x03, "IFB", NULL, NVKM_ENGINE_IFB },
 	{ 0x04, "BAR1", NULL, NVKM_SUBDEV_BAR },
 	{ 0x05, "BAR2", NULL, NVKM_SUBDEV_INSTMEM },
-	{ 0x06, "HOST0" },
-	{ 0x07, "HOST1" },
-	{ 0x08, "HOST2" },
-	{ 0x09, "HOST3" },
-	{ 0x0a, "HOST4" },
-	{ 0x0b, "HOST5" },
-	{ 0x0c, "HOST6" },
-	{ 0x0d, "HOST7" },
-	{ 0x0e, "HOST8" },
-	{ 0x0f, "HOST9" },
-	{ 0x10, "HOST10" },
+	{ 0x06, "HOST0", NULL, NVKM_ENGINE_FIFO },
+	{ 0x07, "HOST1", NULL, NVKM_ENGINE_FIFO },
+	{ 0x08, "HOST2", NULL, NVKM_ENGINE_FIFO },
+	{ 0x09, "HOST3", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0a, "HOST4", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0b, "HOST5", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0c, "HOST6", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0d, "HOST7", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0e, "HOST8", NULL, NVKM_ENGINE_FIFO },
+	{ 0x0f, "HOST9", NULL, NVKM_ENGINE_FIFO },
+	{ 0x10, "HOST10", NULL, NVKM_ENGINE_FIFO },
 	{ 0x13, "PERF" },
 	{ 0x17, "PMU" },
 	{ 0x18, "PTP" },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
index cbc67f262322..f9e0377d3d24 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogf100.c
@@ -32,6 +32,23 @@
 #include <nvif/cl906f.h>
 #include <nvif/unpack.h>
 
+int
+gf100_fifo_chan_ntfy(struct nvkm_fifo_chan *chan, u32 type,
+		     struct nvkm_event **pevent)
+{
+	switch (type) {
+	case NV906F_V0_NTFY_NON_STALL_INTERRUPT:
+		*pevent = &chan->fifo->uevent;
+		return 0;
+	case NV906F_V0_NTFY_KILLED:
+		*pevent = &chan->fifo->kevent;
+		return 0;
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 static u32
 gf100_fifo_gpfifo_engine_addr(struct nvkm_engine *engine)
 {
@@ -60,6 +77,7 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 	struct nvkm_gpuobj *inst = chan->base.inst;
 	int ret = 0;
 
+	mutex_lock(&subdev->mutex);
 	nvkm_wr32(device, 0x002634, chan->base.chid);
 	if (nvkm_msec(device, 2000,
 		if (nvkm_rd32(device, 0x002634) == chan->base.chid)
@@ -67,10 +85,12 @@ gf100_fifo_gpfifo_engine_fini(struct nvkm_fifo_chan *base,
 	) < 0) {
 		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
 			   chan->base.chid, chan->base.object.client->name);
-		ret = -EBUSY;
-		if (suspend)
-			return ret;
+		ret = -ETIMEDOUT;
 	}
+	mutex_unlock(&subdev->mutex);
+
+	if (ret && suspend)
+		return ret;
 
 	if (offset) {
 		nvkm_kmap(inst);
@@ -181,7 +201,7 @@ gf100_fifo_gpfifo_func = {
 	.dtor = gf100_fifo_gpfifo_dtor,
 	.init = gf100_fifo_gpfifo_init,
 	.fini = gf100_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gf100_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gf100_fifo_gpfifo_engine_dtor,
 	.engine_init = gf100_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
index ed4351032ed6..8abf6f8ef445 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/gpfifogk104.c
@@ -40,7 +40,9 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 	struct nvkm_subdev *subdev = &fifo->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_client *client = chan->base.object.client;
+	int ret = 0;
 
+	mutex_lock(&subdev->mutex);
 	nvkm_wr32(device, 0x002634, chan->base.chid);
 	if (nvkm_msec(device, 2000,
 		if (!(nvkm_rd32(device, 0x002634) & 0x00100000))
@@ -48,10 +50,11 @@ gk104_fifo_gpfifo_kick(struct gk104_fifo_chan *chan)
 	) < 0) {
 		nvkm_error(subdev, "channel %d [%s] kick timeout\n",
 			   chan->base.chid, client->name);
-		return -EBUSY;
+		nvkm_fifo_recover_chan(&fifo->base, chan->base.chid);
+		ret = -ETIMEDOUT;
 	}
-
-	return 0;
+	mutex_unlock(&subdev->mutex);
+	return ret;
 }
 
 static u32
@@ -211,7 +214,7 @@ gk104_fifo_gpfifo_func = {
 	.dtor = gk104_fifo_gpfifo_dtor,
 	.init = gk104_fifo_gpfifo_init,
 	.fini = gk104_fifo_gpfifo_fini,
-	.ntfy = g84_fifo_chan_ntfy,
+	.ntfy = gf100_fifo_chan_ntfy,
 	.engine_ctor = gk104_fifo_gpfifo_engine_ctor,
 	.engine_dtor = gk104_fifo_gpfifo_engine_dtor,
 	.engine_init = gk104_fifo_gpfifo_engine_init,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
index f6dfb37d9429..f889b13b5e41 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/fifo/priv.h
@@ -6,6 +6,12 @@
 int nvkm_fifo_ctor(const struct nvkm_fifo_func *, struct nvkm_device *,
 		   int index, int nr, struct nvkm_fifo *);
 void nvkm_fifo_uevent(struct nvkm_fifo *);
+void nvkm_fifo_cevent(struct nvkm_fifo *);
+void nvkm_fifo_kevent(struct nvkm_fifo *, int chid);
+void nvkm_fifo_recover_chan(struct nvkm_fifo *, int chid);
+
+struct nvkm_fifo_chan *
+nvkm_fifo_chan_inst_locked(struct nvkm_fifo *, u64 inst);
 
 struct nvkm_fifo_chan_oclass;
 struct nvkm_fifo_func {
@@ -18,6 +24,7 @@ struct nvkm_fifo_func {
 	void (*start)(struct nvkm_fifo *, unsigned long *);
 	void (*uevent_init)(struct nvkm_fifo *);
 	void (*uevent_fini)(struct nvkm_fifo *);
+	void (*recover_chan)(struct nvkm_fifo *, int chid);
 	int (*class_get)(struct nvkm_fifo *, int index,
 			 const struct nvkm_fifo_chan_oclass **);
 	const struct nvkm_fifo_chan_oclass *chan[];
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
index 467065d1b4e6..cd8cf6f7024c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/base.c
@@ -25,6 +25,15 @@
 
 #include <engine/fifo.h>
 
+static bool
+nvkm_gr_chsw_load(struct nvkm_engine *engine)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->chsw_load)
+		return gr->func->chsw_load(gr);
+	return false;
+}
+
 static void
 nvkm_gr_tile(struct nvkm_engine *engine, int region, struct nvkm_fb_tile *tile)
 {
@@ -106,6 +115,15 @@ nvkm_gr_init(struct nvkm_engine *engine)
 	return gr->func->init(gr);
 }
 
+static int
+nvkm_gr_fini(struct nvkm_engine *engine, bool suspend)
+{
+	struct nvkm_gr *gr = nvkm_gr(engine);
+	if (gr->func->fini)
+		return gr->func->fini(gr, suspend);
+	return 0;
+}
+
 static void *
 nvkm_gr_dtor(struct nvkm_engine *engine)
 {
@@ -120,8 +138,10 @@ nvkm_gr = {
 	.dtor = nvkm_gr_dtor,
 	.oneinit = nvkm_gr_oneinit,
 	.init = nvkm_gr_init,
+	.fini = nvkm_gr_fini,
 	.intr = nvkm_gr_intr,
 	.tile = nvkm_gr_tile,
+	.chsw_load = nvkm_gr_chsw_load,
 	.fifo.cclass = nvkm_gr_cclass_new,
 	.fifo.sclass = nvkm_gr_oclass_get,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
index c925ade5880e..74a64e3fd59a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgf117.c
@@ -218,7 +218,7 @@ gf117_grctx_generate_attrib(struct gf100_grctx *info)
 	}
 }
 
-void
+static void
 gf117_grctx_generate_main(struct gf100_gr *gr, struct gf100_grctx *info)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
index 6d3c5011e18c..4c4b5ab6e46d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxgm107.c
@@ -933,7 +933,7 @@ gm107_grctx_generate_attrib(struct gf100_grctx *info)
 	}
 }
 
-void
+static void
 gm107_grctx_generate_tpcid(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
index 1e13278cf306..c8bb9191f9a2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/ctxnv50.c
@@ -106,6 +106,7 @@
 #define CP_SEEK_2      0x00c800ff
 
 #include "ctxnv40.h"
+#include "nv50.h"
 
 #include <subdev/fb.h>
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
index 8cb240b65ec2..12a703fe355d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf100.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf100_grgpc_data[] = {
+static uint32_t gf100_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x00000064,
 /* 0x0004: gpc_mmio_list_tail */
@@ -36,7 +36,7 @@ uint32_t gf100_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gf100_grgpc_code[] = {
+static uint32_t gf100_grgpc_code[] = {
 	0x03a10ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
index 550d6ba0933b..ffbfc51200f1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgf117.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf117_grgpc_data[] = {
+static uint32_t gf117_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x0000006c,
 /* 0x0004: gpc_mmio_list_tail */
@@ -40,7 +40,7 @@ uint32_t gf117_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gf117_grgpc_code[] = {
+static uint32_t gf117_grgpc_code[] = {
 	0x03a10ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
index 271b59d365e5..357f662de571 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk104.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gk104_grgpc_data[] = {
+static uint32_t gk104_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x0000006c,
 /* 0x0004: gpc_mmio_list_tail */
@@ -40,7 +40,7 @@ uint32_t gk104_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gk104_grgpc_code[] = {
+static uint32_t gk104_grgpc_code[] = {
 	0x03a10ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
index 73b4a32c5d29..4ffc8212a85c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk110.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gk110_grgpc_data[] = {
+static uint32_t gk110_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x0000006c,
 /* 0x0004: gpc_mmio_list_tail */
@@ -40,7 +40,7 @@ uint32_t gk110_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gk110_grgpc_code[] = {
+static uint32_t gk110_grgpc_code[] = {
 	0x03a10ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
index 018169818317..09196206c9bc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgk208.fuc5.h
@@ -1,4 +1,4 @@
-uint32_t gk208_grgpc_data[] = {
+static uint32_t gk208_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x0000006c,
 /* 0x0004: gpc_mmio_list_tail */
@@ -40,7 +40,7 @@ uint32_t gk208_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gk208_grgpc_code[] = {
+static uint32_t gk208_grgpc_code[] = {
 	0x03140ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
index eca007f03fa9..6d7d004363d9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/gpcgm107.fuc5.h
@@ -1,4 +1,4 @@
-uint32_t gm107_grgpc_data[] = {
+static uint32_t gm107_grgpc_data[] = {
 /* 0x0000: gpc_mmio_list_head */
 	0x0000006c,
 /* 0x0004: gpc_mmio_list_tail */
@@ -40,7 +40,7 @@ uint32_t gm107_grgpc_data[] = {
 	0x00000000,
 };
 
-uint32_t gm107_grgpc_code[] = {
+static uint32_t gm107_grgpc_code[] = {
 	0x03410ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
index 8015b40a61d6..7538404b8b13 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf100.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf100_grhub_data[] = {
+static uint32_t gf100_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gf100_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gf100_grhub_code[] = {
+static uint32_t gf100_grhub_code[] = {
 	0x039b0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
index 2af90ec6852a..ce000a47ec6d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgf117.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf117_grhub_data[] = {
+static uint32_t gf117_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gf117_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gf117_grhub_code[] = {
+static uint32_t gf117_grhub_code[] = {
 	0x039b0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
index e8b8c1c94700..1f26cb6a233c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk104.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gk104_grhub_data[] = {
+static uint32_t gk104_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gk104_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gk104_grhub_code[] = {
+static uint32_t gk104_grhub_code[] = {
 	0x039b0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
index f4ed2fb6f714..70436d93efe3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk110.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gk110_grhub_data[] = {
+static uint32_t gk110_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gk110_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gk110_grhub_code[] = {
+static uint32_t gk110_grhub_code[] = {
 	0x039b0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
index ed488973c117..e0933a07426a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgk208.fuc5.h
@@ -1,4 +1,4 @@
-uint32_t gk208_grhub_data[] = {
+static uint32_t gk208_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gk208_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gk208_grhub_code[] = {
+static uint32_t gk208_grhub_code[] = {
 	0x030e0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
index 5c9051839557..9b432823bcbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/fuc/hubgm107.fuc5.h
@@ -1,4 +1,4 @@
-uint32_t gm107_grhub_data[] = {
+static uint32_t gm107_grhub_data[] = {
 /* 0x0000: hub_mmio_list_head */
 	0x00000300,
 /* 0x0004: hub_mmio_list_tail */
@@ -205,7 +205,7 @@ uint32_t gm107_grhub_data[] = {
 	0x0417e91c,
 };
 
-uint32_t gm107_grhub_code[] = {
+static uint32_t gm107_grhub_code[] = {
 	0x030e0ef5,
 /* 0x0004: queue_put */
 	0x9800d898,
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
index ce913300539f..da1ba74682b4 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/g84.c
@@ -25,6 +25,8 @@
 
 #include <subdev/timer.h>
 
+#include <nvif/class.h>
+
 static const struct nvkm_bitfield nv50_gr_status[] = {
 	{ 0x00000001, "BUSY" }, /* set when any bit is set */
 	{ 0x00000002, "DISPATCH" },
@@ -180,11 +182,11 @@ g84_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8297, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, G82_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
index 157919c788e6..f9acb8a944d2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.c
@@ -702,6 +702,22 @@ gf100_gr_pack_mmio[] = {
  * PGRAPH engine/subdev functions
  ******************************************************************************/
 
+static bool
+gf100_gr_chsw_load(struct nvkm_gr *base)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	if (!gr->firmware) {
+		u32 trace = nvkm_rd32(gr->base.engine.subdev.device, 0x40981c);
+		if (trace & 0x00000040)
+			return true;
+	} else {
+		u32 mthd = nvkm_rd32(gr->base.engine.subdev.device, 0x409808);
+		if (mthd & 0x00080000)
+			return true;
+	}
+	return false;
+}
+
 int
 gf100_gr_rops(struct gf100_gr *gr)
 {
@@ -1041,6 +1057,13 @@ gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
 		stat &= ~0x00000008;
 	}
 
+	if (stat & 0x00000010) {
+		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0430));
+		nvkm_error(subdev, "GPC%d/TPC%d/MPC: %08x\n", gpc, tpc, trap);
+		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0430), 0xc0000000);
+		stat &= ~0x00000010;
+	}
+
 	if (stat) {
 		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
 	}
@@ -1129,7 +1152,7 @@ gf100_gr_trap_intr(struct gf100_gr *gr)
 	if (trap & 0x00000008) {
 		u32 stat = nvkm_rd32(device, 0x408030);
 
-		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
+		nvkm_snprintbf(error, sizeof(error), gf100_ccache_error,
 			       stat & 0x3fffffff);
 		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
 		nvkm_wr32(device, 0x408030, 0xc0000000);
@@ -1258,7 +1281,7 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 	struct nvkm_device *device = subdev->device;
 	u32 stat = nvkm_rd32(device, 0x409c18);
 
-	if (stat & 0x00000001) {
+	if (!gr->firmware && (stat & 0x00000001)) {
 		u32 code = nvkm_rd32(device, 0x409814);
 		if (code == E_BAD_FWMTHD) {
 			u32 class = nvkm_rd32(device, 0x409808);
@@ -1270,15 +1293,14 @@ gf100_gr_ctxctl_isr(struct gf100_gr *gr)
 			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
 					   "mthd %04x data %08x\n",
 				   subc, class, mthd, data);
-
-			nvkm_wr32(device, 0x409c20, 0x00000001);
-			stat &= ~0x00000001;
 		} else {
 			nvkm_error(subdev, "FECS ucode error %d\n", code);
 		}
+		nvkm_wr32(device, 0x409c20, 0x00000001);
+		stat &= ~0x00000001;
 	}
 
-	if (stat & 0x00080000) {
+	if (!gr->firmware && (stat & 0x00080000)) {
 		nvkm_error(subdev, "FECS watchdog timeout\n");
 		gf100_gr_ctxctl_debug(gr);
 		nvkm_wr32(device, 0x409c20, 0x00080000);
@@ -1384,27 +1406,12 @@ gf100_gr_intr(struct nvkm_gr *base)
 	nvkm_fifo_chan_put(device->fifo, flags, &chan);
 }
 
-void
-gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
+static void
+gf100_gr_init_fw(struct nvkm_falcon *falcon,
 		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
 {
-	struct nvkm_device *device = gr->base.engine.subdev.device;
-	int i;
-
-	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
-	for (i = 0; i < data->size / 4; i++)
-		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
-
-	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
-	for (i = 0; i < code->size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
-		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
-	}
-
-	/* code must be padded to 0x40 words */
-	for (; i & 0x3f; i++)
-		nvkm_wr32(device, fuc_base + 0x0184, 0);
+	nvkm_falcon_load_dmem(falcon, data->data, 0x0, data->size, 0);
+	nvkm_falcon_load_imem(falcon, code->data, 0x0, code->size, 0, 0, false);
 }
 
 static void
@@ -1449,162 +1456,149 @@ gf100_gr_init_csdata(struct gf100_gr *gr,
 	nvkm_wr32(device, falcon + 0x01c4, star + 4);
 }
 
-int
-gf100_gr_init_ctxctl(struct gf100_gr *gr)
+/* Initialize context from an external (secure or not) firmware */
+static int
+gf100_gr_init_ctxctl_ext(struct gf100_gr *gr)
 {
-	const struct gf100_grctx_func *grctx = gr->func->grctx;
 	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_secboot *sb = device->secboot;
-	int i;
 	int ret = 0;
 
-	if (gr->firmware) {
-		/* load fuc microcode */
-		nvkm_mc_unk260(device, 0);
-
-		/* securely-managed falcons must be reset using secure boot */
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
-					 &gr->fuc409d);
-		if (ret)
-			return ret;
+	/* load fuc microcode */
+	nvkm_mc_unk260(device, 0);
 
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
-					 &gr->fuc41ad);
-		if (ret)
-			return ret;
+	/* securely-managed falcons must be reset using secure boot */
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
+	else
+		gf100_gr_init_fw(gr->fecs, &gr->fuc409c, &gr->fuc409d);
+	if (ret)
+		return ret;
 
-		nvkm_mc_unk260(device, 1);
-
-		/* start both of them running */
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x41a10c, 0x00000000);
-		nvkm_wr32(device, 0x40910c, 0x00000000);
-
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
-		else
-			nvkm_wr32(device, 0x41a100, 0x00000002);
-		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
-			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
-		else
-			nvkm_wr32(device, 0x409100, 0x00000002);
-		if (nvkm_msec(device, 2000,
-			if (nvkm_rd32(device, 0x409800) & 0x00000001)
-				break;
-		) < 0)
-			return -EBUSY;
+	if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
+		ret = nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
+	else
+		gf100_gr_init_fw(gr->gpccs, &gr->fuc41ac, &gr->fuc41ad);
+	if (ret)
+		return ret;
+
+	nvkm_mc_unk260(device, 1);
+
+	/* start both of them running */
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x41a10c, 0x00000000);
+	nvkm_wr32(device, 0x40910c, 0x00000000);
+
+	nvkm_falcon_start(gr->gpccs);
+	nvkm_falcon_start(gr->fecs);
+
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800) & 0x00000001)
+			break;
+	) < 0)
+		return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x7fffffff);
-		nvkm_wr32(device, 0x409504, 0x00000021);
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x7fffffff);
+	nvkm_wr32(device, 0x409504, 0x00000021);
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000010);
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000010);
+	if (nvkm_msec(device, 2000,
+		if ((gr->size = nvkm_rd32(device, 0x409800)))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000016);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	nvkm_wr32(device, 0x409840, 0xffffffff);
+	nvkm_wr32(device, 0x409500, 0x00000000);
+	nvkm_wr32(device, 0x409504, 0x00000025);
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x409800))
+			break;
+	) < 0)
+		return -EBUSY;
+
+	if (device->chipset >= 0xe0) {
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000030);
 		if (nvkm_msec(device, 2000,
-			if ((gr->size = nvkm_rd32(device, 0x409800)))
+			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000016);
+		nvkm_wr32(device, 0x409810, 0xb00095c8);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000031);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		nvkm_wr32(device, 0x409840, 0xffffffff);
-		nvkm_wr32(device, 0x409500, 0x00000000);
-		nvkm_wr32(device, 0x409504, 0x00000025);
+		nvkm_wr32(device, 0x409810, 0x00080420);
+		nvkm_wr32(device, 0x409800, 0x00000000);
+		nvkm_wr32(device, 0x409500, 0x00000001);
+		nvkm_wr32(device, 0x409504, 0x00000032);
 		if (nvkm_msec(device, 2000,
 			if (nvkm_rd32(device, 0x409800))
 				break;
 		) < 0)
 			return -EBUSY;
 
-		if (device->chipset >= 0xe0) {
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000030);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0xb00095c8);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000031);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
-
-			nvkm_wr32(device, 0x409810, 0x00080420);
-			nvkm_wr32(device, 0x409800, 0x00000000);
-			nvkm_wr32(device, 0x409500, 0x00000001);
-			nvkm_wr32(device, 0x409504, 0x00000032);
-			if (nvkm_msec(device, 2000,
-				if (nvkm_rd32(device, 0x409800))
-					break;
-			) < 0)
-				return -EBUSY;
+		nvkm_wr32(device, 0x409614, 0x00000070);
+		nvkm_wr32(device, 0x409614, 0x00000770);
+		nvkm_wr32(device, 0x40802c, 0x00000001);
+	}
 
-			nvkm_wr32(device, 0x409614, 0x00000070);
-			nvkm_wr32(device, 0x409614, 0x00000770);
-			nvkm_wr32(device, 0x40802c, 0x00000001);
+	if (gr->data == NULL) {
+		int ret = gf100_grctx_generate(gr);
+		if (ret) {
+			nvkm_error(subdev, "failed to construct context\n");
+			return ret;
 		}
+	}
 
-		if (gr->data == NULL) {
-			int ret = gf100_grctx_generate(gr);
-			if (ret) {
-				nvkm_error(subdev, "failed to construct context\n");
-				return ret;
-			}
-		}
+	return 0;
+}
+
+static int
+gf100_gr_init_ctxctl_int(struct gf100_gr *gr)
+{
+	const struct gf100_grctx_func *grctx = gr->func->grctx;
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
 
-		return 0;
-	} else
 	if (!gr->func->fecs.ucode) {
 		return -ENOSYS;
 	}
 
 	/* load HUB microcode */
 	nvkm_mc_unk260(device, 0);
-	nvkm_wr32(device, 0x4091c0, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x409180, 0x01000000);
-	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x409188, i >> 6);
-		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->fecs, gr->func->fecs.ucode->data.data, 0x0,
+			      gr->func->fecs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->fecs, gr->func->fecs.ucode->code.data, 0x0,
+			      gr->func->fecs.ucode->code.size, 0, 0, false);
 
 	/* load GPC microcode */
-	nvkm_wr32(device, 0x41a1c0, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
-		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
-
-	nvkm_wr32(device, 0x41a180, 0x01000000);
-	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x41a188, i >> 6);
-		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
-	}
+	nvkm_falcon_load_dmem(gr->gpccs, gr->func->gpccs.ucode->data.data, 0x0,
+			      gr->func->gpccs.ucode->data.size, 0);
+	nvkm_falcon_load_imem(gr->gpccs, gr->func->gpccs.ucode->code.data, 0x0,
+			      gr->func->gpccs.ucode->code.size, 0, 0, false);
 	nvkm_mc_unk260(device, 1);
 
 	/* load register lists */
@@ -1636,6 +1630,19 @@ gf100_gr_init_ctxctl(struct gf100_gr *gr)
 	return 0;
 }
 
+int
+gf100_gr_init_ctxctl(struct gf100_gr *gr)
+{
+	int ret;
+
+	if (gr->firmware)
+		ret = gf100_gr_init_ctxctl_ext(gr);
+	else
+		ret = gf100_gr_init_ctxctl_int(gr);
+
+	return ret;
+}
+
 static int
 gf100_gr_oneinit(struct nvkm_gr *base)
 {
@@ -1701,14 +1708,36 @@ gf100_gr_oneinit(struct nvkm_gr *base)
 	return 0;
 }
 
-int
+static int
 gf100_gr_init_(struct nvkm_gr *base)
 {
 	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &base->engine.subdev;
+	u32 ret;
+
 	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
+
+	ret = nvkm_falcon_get(gr->fecs, subdev);
+	if (ret)
+		return ret;
+
+	ret = nvkm_falcon_get(gr->gpccs, subdev);
+	if (ret)
+		return ret;
+
 	return gr->func->init(gr);
 }
 
+static int
+gf100_gr_fini_(struct nvkm_gr *base, bool suspend)
+{
+	struct gf100_gr *gr = gf100_gr(base);
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	nvkm_falcon_put(gr->gpccs, subdev);
+	nvkm_falcon_put(gr->fecs, subdev);
+	return 0;
+}
+
 void
 gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
 {
@@ -1731,6 +1760,9 @@ gf100_gr_dtor(struct nvkm_gr *base)
 		gr->func->dtor(gr);
 	kfree(gr->data);
 
+	nvkm_falcon_del(&gr->gpccs);
+	nvkm_falcon_del(&gr->fecs);
+
 	gf100_gr_dtor_fw(&gr->fuc409c);
 	gf100_gr_dtor_fw(&gr->fuc409d);
 	gf100_gr_dtor_fw(&gr->fuc41ac);
@@ -1749,13 +1781,59 @@ gf100_gr_ = {
 	.dtor = gf100_gr_dtor,
 	.oneinit = gf100_gr_oneinit,
 	.init = gf100_gr_init_,
+	.fini = gf100_gr_fini_,
 	.intr = gf100_gr_intr,
 	.units = gf100_gr_units,
 	.chan_new = gf100_gr_chan_new,
 	.object_get = gf100_gr_object_get,
+	.chsw_load = gf100_gr_chsw_load,
 };
 
 int
+gf100_gr_ctor_fw_legacy(struct gf100_gr *gr, const char *fwname,
+			struct gf100_gr_fuc *fuc, int ret)
+{
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
+	struct nvkm_device *device = subdev->device;
+	const struct firmware *fw;
+	char f[32];
+
+	/* see if this firmware has a legacy path */
+	if (!strcmp(fwname, "fecs_inst"))
+		fwname = "fuc409c";
+	else if (!strcmp(fwname, "fecs_data"))
+		fwname = "fuc409d";
+	else if (!strcmp(fwname, "gpccs_inst"))
+		fwname = "fuc41ac";
+	else if (!strcmp(fwname, "gpccs_data"))
+		fwname = "fuc41ad";
+	else {
+		/* nope, let's just return the error we got */
+		nvkm_error(subdev, "failed to load %s\n", fwname);
+		return ret;
+	}
+
+	/* yes, try to load from the legacy path */
+	nvkm_debug(subdev, "%s: falling back to legacy path\n", fwname);
+
+	snprintf(f, sizeof(f), "nouveau/nv%02x_%s", device->chipset, fwname);
+	ret = request_firmware(&fw, f, device->dev);
+	if (ret) {
+		snprintf(f, sizeof(f), "nouveau/%s", fwname);
+		ret = request_firmware(&fw, f, device->dev);
+		if (ret) {
+			nvkm_error(subdev, "failed to load %s\n", fwname);
+			return ret;
+		}
+	}
+
+	fuc->size = fw->size;
+	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
+	release_firmware(fw);
+	return (fuc->data != NULL) ? 0 : -ENOMEM;
+}
+
+int
 gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 		 struct gf100_gr_fuc *fuc)
 {
@@ -1765,10 +1843,8 @@ gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
 	int ret;
 
 	ret = nvkm_firmware_get(device, fwname, &fw);
-	if (ret) {
-		nvkm_error(subdev, "failed to load %s\n", fwname);
-		return ret;
-	}
+	if (ret)
+		return gf100_gr_ctor_fw_legacy(gr, fwname, fuc, ret);
 
 	fuc->size = fw->size;
 	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
@@ -1780,6 +1856,7 @@ int
 gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	      int index, struct gf100_gr *gr)
 {
+	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
 	int ret;
 
 	gr->func = func;
@@ -1792,7 +1869,11 @@ gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
 	if (ret)
 		return ret;
 
-	return 0;
+	ret = nvkm_falcon_v1_new(subdev, "FECS", 0x409000, &gr->fecs);
+	if (ret)
+		return ret;
+
+	return nvkm_falcon_v1_new(subdev, "GPCCS", 0x41a000, &gr->gpccs);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
index 268b8d60ff73..db6ee3b06841 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf100.h
@@ -29,6 +29,7 @@
 #include <core/gpuobj.h>
 #include <subdev/ltc.h>
 #include <subdev/mmu.h>
+#include <engine/falcon.h>
 
 #define GPC_MAX 32
 #define TPC_MAX_PER_GPC 8
@@ -75,6 +76,8 @@ struct gf100_gr {
 	const struct gf100_gr_func *func;
 	struct nvkm_gr base;
 
+	struct nvkm_falcon *fecs;
+	struct nvkm_falcon *gpccs;
 	struct gf100_gr_fuc fuc409c;
 	struct gf100_gr_fuc fuc409d;
 	struct gf100_gr_fuc fuc41ac;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
index 70335f65c51e..0124e468086e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gf117.c
@@ -102,7 +102,7 @@ gf117_gr_pack_mmio[] = {
 
 #include "fuc/hubgf117.fuc3.h"
 
-struct gf100_gr_ucode
+static struct gf100_gr_ucode
 gf117_gr_fecs_ucode = {
 	.code.data = gf117_grhub_code,
 	.code.size = sizeof(gf117_grhub_code),
@@ -112,7 +112,7 @@ gf117_gr_fecs_ucode = {
 
 #include "fuc/gpcgf117.fuc3.h"
 
-struct gf100_gr_ucode
+static struct gf100_gr_ucode
 gf117_gr_gpccs_ucode = {
 	.code.data = gf117_grgpc_code,
 	.code.size = sizeof(gf117_grgpc_code),
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
index 45f965f608a7..2c67fac576d1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gm107.c
@@ -308,7 +308,7 @@ gm107_gr_init_bios(struct gf100_gr *gr)
 	}
 }
 
-int
+static int
 gm107_gr_init(struct gf100_gr *gr)
 {
 	struct nvkm_device *device = gr->base.engine.subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
index 2e68919f00b2..c711a55ce392 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt200.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt200_gr = {
 	.init = nv50_gr_init,
@@ -31,11 +33,11 @@ gt200_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
index 2bf7aac360cc..fa103df32ec7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/gt215.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 gt215_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ gt215_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8597, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_TESLA, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
index 95d5219faf93..eb1a90644752 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp79.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp79_gr = {
 	.init = nv50_gr_init,
@@ -30,11 +32,11 @@ mcp79_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x8397, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT200_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
index 027b58e5976b..c91eb56e9327 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/mcp89.c
@@ -23,6 +23,8 @@
  */
 #include "nv50.h"
 
+#include <nvif/class.h>
+
 static const struct nvkm_gr_func
 mcp89_gr = {
 	.init = nv50_gr_init,
@@ -31,12 +33,12 @@ mcp89_gr = {
 	.tlb_flush = g84_gr_tlb_flush,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
-		{ -1, -1, 0x85c0, &nv50_gr_object },
-		{ -1, -1, 0x8697, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT214_COMPUTE, &nv50_gr_object },
+		{ -1, -1, GT21A_TESLA, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
index f1e15a4d4f64..b4e3c50badc7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv30.c
@@ -187,6 +187,7 @@ nv30_gr = {
 		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
 		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
 		{ -1, -1, 0x0397, &nv04_gr_object }, /* rankine */
+		{ -1, -1, 0x0597, &nv04_gr_object }, /* kelvin */
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
index 300f5ed5de0b..e7ed04b935cd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv34.c
@@ -123,6 +123,7 @@ nv34_gr = {
 		{ -1, -1, 0x0389, &nv04_gr_object }, /* sifm (nv30) */
 		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
 		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
+		{ -1, -1, 0x0597, &nv04_gr_object }, /* kelvin */
 		{ -1, -1, 0x0697, &nv04_gr_object }, /* rankine */
 		{}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
index 740df0f52c38..5e8abacbacc6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv35.c
@@ -124,6 +124,7 @@ nv35_gr = {
 		{ -1, -1, 0x038a, &nv04_gr_object }, /* ifc (nv30) */
 		{ -1, -1, 0x039e, &nv04_gr_object }, /* swzsurf (nv30) */
 		{ -1, -1, 0x0497, &nv04_gr_object }, /* rankine */
+		{ -1, -1, 0x0597, &nv04_gr_object }, /* kelvin */
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
index fca67de43f2b..df16ffda1749 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/nv50.c
@@ -27,6 +27,8 @@
 #include <core/gpuobj.h>
 #include <engine/fifo.h>
 
+#include <nvif/class.h>
+
 u64
 nv50_gr_units(struct nvkm_gr *gr)
 {
@@ -778,11 +780,11 @@ nv50_gr = {
 	.chan_new = nv50_gr_chan_new,
 	.units = nv50_gr_units,
 	.sclass = {
-		{ -1, -1, 0x0030, &nv50_gr_object },
-		{ -1, -1, 0x502d, &nv50_gr_object },
-		{ -1, -1, 0x5039, &nv50_gr_object },
-		{ -1, -1, 0x5097, &nv50_gr_object },
-		{ -1, -1, 0x50c0, &nv50_gr_object },
+		{ -1, -1, NV_NULL_CLASS, &nv50_gr_object },
+		{ -1, -1, NV50_TWOD, &nv50_gr_object },
+		{ -1, -1, NV50_MEMORY_TO_MEMORY_FORMAT, &nv50_gr_object },
+		{ -1, -1, NV50_TESLA, &nv50_gr_object },
+		{ -1, -1, NV50_COMPUTE, &nv50_gr_object },
 		{}
 	}
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
index d8adcdf6985a..2a52d9f026ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/gr/priv.h
@@ -15,6 +15,7 @@ struct nvkm_gr_func {
 	void *(*dtor)(struct nvkm_gr *);
 	int (*oneinit)(struct nvkm_gr *);
 	int (*init)(struct nvkm_gr *);
+	int (*fini)(struct nvkm_gr *, bool);
 	void (*intr)(struct nvkm_gr *);
 	void (*tile)(struct nvkm_gr *, int region, struct nvkm_fb_tile *);
 	int (*tlb_flush)(struct nvkm_gr *);
@@ -24,6 +25,7 @@ struct nvkm_gr_func {
 	/* Returns chipset-specific counts of units packed into an u64.
 	 */
 	u64 (*units)(struct nvkm_gr *);
+	bool (*chsw_load)(struct nvkm_gr *);
 	struct nvkm_sclass sclass[];
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
index 003ac915eaad..8a8895246d26 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv31.c
@@ -198,7 +198,7 @@ nv31_mpeg_intr(struct nvkm_engine *engine)
 		}
 
 		if (type == 0x00000010) {
-			if (!nv31_mpeg_mthd(mpeg, mthd, data))
+			if (nv31_mpeg_mthd(mpeg, mthd, data))
 				show &= ~0x01000000;
 		}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
index e536f37e24b0..c3cf02ed468e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/mpeg/nv44.c
@@ -172,7 +172,7 @@ nv44_mpeg_intr(struct nvkm_engine *engine)
 		}
 
 		if (type == 0x00000010) {
-			if (!nv44_mpeg_mthd(subdev->device, mthd, data))
+			if (nv44_mpeg_mthd(subdev->device, mthd, data))
 				show &= ~0x01000000;
 		}
 	}
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
index 8616636ad7b4..dde89a4a0f5b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/base.c
@@ -71,7 +71,7 @@ nvkm_perfdom_find(struct nvkm_pm *pm, int di)
 	return NULL;
 }
 
-struct nvkm_perfsig *
+static struct nvkm_perfsig *
 nvkm_perfsig_find(struct nvkm_pm *pm, u8 di, u8 si, struct nvkm_perfdom **pdom)
 {
 	struct nvkm_perfdom *dom = *pdom;
@@ -699,7 +699,7 @@ nvkm_pm_oclass_get(struct nvkm_oclass *oclass, int index,
 	return 1;
 }
 
-int
+static int
 nvkm_perfsrc_new(struct nvkm_pm *pm, struct nvkm_perfsig *sig,
 		 const struct nvkm_specsrc *spec)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c b/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c
index d2901e9a7808..fe2532ee4145 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/pm/gf100.c
@@ -102,7 +102,7 @@ gf100_pm_gpc[] = {
 	{}
 };
 
-const struct nvkm_specdom
+static const struct nvkm_specdom
 gf100_pm_part[] = {
 	{ 0xe0, (const struct nvkm_specsig[]) {
 			{ 0x0f, "part00_pbfb_00", gf100_pbfb_sources },
diff --git a/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h b/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h
index eca62221f299..4b57f8814560 100644
--- a/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h
+++ b/drivers/gpu/drm/nouveau/nvkm/engine/sec/fuc/g98.fuc0s.h
@@ -1,4 +1,4 @@
-uint32_t g98_sec_data[] = {
+static uint32_t g98_sec_data[] = {
 /* 0x0000: ctx_dma */
 /* 0x0000: ctx_dma_query */
 	0x00000000,
@@ -150,7 +150,7 @@ uint32_t g98_sec_data[] = {
 	0x00000000,
 };
 
-uint32_t g98_sec_code[] = {
+static uint32_t g98_sec_code[] = {
 	0x17f004bd,
 	0x0010fe35,
 	0xf10004fe,
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
new file mode 100644
index 000000000000..584863db9bfc
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/Kbuild
@@ -0,0 +1,2 @@
+nvkm-y += nvkm/falcon/base.o
+nvkm-y += nvkm/falcon/v1.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/base.c b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
new file mode 100644
index 000000000000..4852f313762f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/base.c
@@ -0,0 +1,191 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <subdev/mc.h>
+
+void
+nvkm_falcon_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u16 tag, u8 port, bool secure)
+{
+	if (secure && !falcon->secret) {
+		nvkm_warn(falcon->user,
+			  "writing with secure tag on a non-secure falcon!\n");
+		return;
+	}
+
+	falcon->func->load_imem(falcon, data, start, size, tag, port,
+				secure);
+}
+
+void
+nvkm_falcon_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	falcon->func->load_dmem(falcon, data, start, size, port);
+}
+
+void
+nvkm_falcon_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size, u8 port,
+		      void *data)
+{
+	falcon->func->read_dmem(falcon, start, size, port, data);
+}
+
+void
+nvkm_falcon_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *inst)
+{
+	if (!falcon->func->bind_context) {
+		nvkm_error(falcon->user,
+			   "Context binding not supported on this falcon!\n");
+		return;
+	}
+
+	falcon->func->bind_context(falcon, inst);
+}
+
+void
+nvkm_falcon_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	falcon->func->set_start_addr(falcon, start_addr);
+}
+
+void
+nvkm_falcon_start(struct nvkm_falcon *falcon)
+{
+	falcon->func->start(falcon);
+}
+
+int
+nvkm_falcon_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+	int ret;
+
+	nvkm_mc_enable(device, id);
+	ret = falcon->func->enable(falcon);
+	if (ret) {
+		nvkm_mc_disable(device, id);
+		return ret;
+	}
+
+	return 0;
+}
+
+void
+nvkm_falcon_disable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	enum nvkm_devidx id = falcon->owner->index;
+
+	/* already disabled, return or wait_idle will timeout */
+	if (!nvkm_mc_enabled(device, id))
+		return;
+
+	falcon->func->disable(falcon);
+
+	nvkm_mc_disable(device, id);
+}
+
+int
+nvkm_falcon_reset(struct nvkm_falcon *falcon)
+{
+	nvkm_falcon_disable(falcon);
+	return nvkm_falcon_enable(falcon);
+}
+
+int
+nvkm_falcon_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	return falcon->func->wait_for_halt(falcon, ms);
+}
+
+int
+nvkm_falcon_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	return falcon->func->clear_interrupt(falcon, mask);
+}
+
+void
+nvkm_falcon_put(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user == user) {
+		nvkm_debug(falcon->user, "released %s falcon\n", falcon->name);
+		falcon->user = NULL;
+	}
+	mutex_unlock(&falcon->mutex);
+}
+
+int
+nvkm_falcon_get(struct nvkm_falcon *falcon, const struct nvkm_subdev *user)
+{
+	mutex_lock(&falcon->mutex);
+	if (falcon->user) {
+		nvkm_error(user, "%s falcon already acquired by %s!\n",
+			   falcon->name, nvkm_subdev_name[falcon->user->index]);
+		mutex_unlock(&falcon->mutex);
+		return -EBUSY;
+	}
+
+	nvkm_debug(user, "acquired %s falcon\n", falcon->name);
+	falcon->user = user;
+	mutex_unlock(&falcon->mutex);
+	return 0;
+}
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *func,
+		 struct nvkm_subdev *subdev, const char *name, u32 addr,
+		 struct nvkm_falcon *falcon)
+{
+	u32 reg;
+
+	falcon->func = func;
+	falcon->owner = subdev;
+	falcon->name = name;
+	falcon->addr = addr;
+	mutex_init(&falcon->mutex);
+
+	reg = nvkm_falcon_rd32(falcon, 0x12c);
+	falcon->version = reg & 0xf;
+	falcon->secret = (reg >> 4) & 0x3;
+	falcon->code.ports = (reg >> 8) & 0xf;
+	falcon->data.ports = (reg >> 12) & 0xf;
+
+	reg = nvkm_falcon_rd32(falcon, 0x108);
+	falcon->code.limit = (reg & 0x1ff) << 8;
+	falcon->data.limit = (reg & 0x3fe00) >> 1;
+
+	reg = nvkm_falcon_rd32(falcon, 0xc08);
+	falcon->debug = (reg >> 20) & 0x1;
+}
+
+void
+nvkm_falcon_del(struct nvkm_falcon **pfalcon)
+{
+	if (*pfalcon) {
+		kfree(*pfalcon);
+		*pfalcon = NULL;
+	}
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
new file mode 100644
index 000000000000..97b56f759d0b
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/priv.h
@@ -0,0 +1,8 @@
+#ifndef __NVKM_FALCON_PRIV_H__
+#define __NVKM_FALCON_PRIV_H__
+#include <engine/falcon.h>
+
+void
+nvkm_falcon_ctor(const struct nvkm_falcon_func *, struct nvkm_subdev *,
+		 const char *, u32, struct nvkm_falcon *);
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
new file mode 100644
index 000000000000..b537f111f39c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/falcon/v1.c
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+
+#include <core/gpuobj.h>
+#include <core/memory.h>
+#include <subdev/timer.h>
+
+static void
+nvkm_falcon_v1_load_imem(struct nvkm_falcon *falcon, void *data, u32 start,
+			 u32 size, u16 tag, u8 port, bool secure)
+{
+	u8 rem = size % 4;
+	u32 reg;
+	int i;
+
+	size -= rem;
+
+	reg = start | BIT(24) | (secure ? BIT(28) : 0);
+	nvkm_falcon_wr32(falcon, 0x180 + (port * 16), reg);
+	for (i = 0; i < size / 4; i++) {
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, ((u32 *)data)[i]);
+	}
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get written
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		/* write new tag every 256B */
+		if ((i & 0x3f) == 0)
+			nvkm_falcon_wr32(falcon, 0x188, tag++);
+		nvkm_falcon_wr32(falcon, 0x184, extra & (BIT(rem * 8) - 1));
+		++i;
+	}
+
+	/* code must be padded to 0x40 words */
+	for (; i & 0x3f; i++)
+		nvkm_falcon_wr32(falcon, 0x184, 0);
+}
+
+static void
+nvkm_falcon_v1_load_dmem(struct nvkm_falcon *falcon, void *data, u32 start,
+		      u32 size, u8 port)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 24));
+	for (i = 0; i < size / 4; i++)
+		nvkm_falcon_wr32(falcon, 0x1c4, ((u32 *)data)[i]);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = ((u32 *)data)[i];
+
+		nvkm_falcon_wr32(falcon, 0x1c4, extra & (BIT(rem * 8) - 1));
+	}
+}
+
+static void
+nvkm_falcon_v1_read_dmem(struct nvkm_falcon *falcon, u32 start, u32 size,
+			 u8 port, void *data)
+{
+	u8 rem = size % 4;
+	int i;
+
+	size -= rem;
+
+	nvkm_falcon_wr32(falcon, 0x1c0 + (port * 16), start | (0x1 << 25));
+	for (i = 0; i < size / 4; i++)
+		((u32 *)data)[i] = nvkm_falcon_rd32(falcon, 0x1c4);
+
+	/*
+	 * If size is not a multiple of 4, mask the last work to ensure garbage
+	 * does not get read
+	 */
+	if (rem) {
+		u32 extra = nvkm_falcon_rd32(falcon, 0x1c4);
+
+		for (i = size; i < size + rem; i++) {
+			((u8 *)data)[i] = (u8)(extra & 0xff);
+			extra >>= 8;
+		}
+	}
+}
+
+static void
+nvkm_falcon_v1_bind_context(struct nvkm_falcon *falcon, struct nvkm_gpuobj *ctx)
+{
+	u32 inst_loc;
+
+	/* disable instance block binding */
+	if (ctx == NULL) {
+		nvkm_falcon_wr32(falcon, 0x10c, 0x0);
+		return;
+	}
+
+	nvkm_falcon_wr32(falcon, 0x10c, 0x1);
+
+	/* setup apertures - virtual */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_UCODE, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_VIRT, 0x0);
+	/* setup apertures - physical */
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_VID, 0x4);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_COH, 0x5);
+	nvkm_falcon_wr32(falcon, 0xe00 + 4 * FALCON_DMAIDX_PHYS_SYS_NCOH, 0x6);
+
+	/* Set context */
+	switch (nvkm_memory_target(ctx->memory)) {
+	case NVKM_MEM_TARGET_VRAM: inst_loc = 0; break;
+	case NVKM_MEM_TARGET_NCOH: inst_loc = 3; break;
+	default:
+		WARN_ON(1);
+		return;
+	}
+
+	/* Enable context */
+	nvkm_falcon_mask(falcon, 0x048, 0x1, 0x1);
+	nvkm_falcon_wr32(falcon, 0x480,
+			 ((ctx->addr >> 12) & 0xfffffff) |
+			 (inst_loc << 28) | (1 << 30));
+}
+
+static void
+nvkm_falcon_v1_set_start_addr(struct nvkm_falcon *falcon, u32 start_addr)
+{
+	nvkm_falcon_wr32(falcon, 0x104, start_addr);
+}
+
+static void
+nvkm_falcon_v1_start(struct nvkm_falcon *falcon)
+{
+	u32 reg = nvkm_falcon_rd32(falcon, 0x100);
+
+	if (reg & BIT(6))
+		nvkm_falcon_wr32(falcon, 0x130, 0x2);
+	else
+		nvkm_falcon_wr32(falcon, 0x100, 0x2);
+}
+
+static int
+nvkm_falcon_v1_wait_for_halt(struct nvkm_falcon *falcon, u32 ms)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, ms, falcon->addr + 0x100, 0x10, 0x10);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_clear_interrupt(struct nvkm_falcon *falcon, u32 mask)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	/* clear interrupt(s) */
+	nvkm_falcon_mask(falcon, 0x004, mask, mask);
+	/* wait until interrupts are cleared */
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x008, mask, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+falcon_v1_wait_idle(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x04c, 0xffff, 0x0);
+	if (ret < 0)
+		return ret;
+
+	return 0;
+}
+
+static int
+nvkm_falcon_v1_enable(struct nvkm_falcon *falcon)
+{
+	struct nvkm_device *device = falcon->owner->device;
+	int ret;
+
+	ret = nvkm_wait_msec(device, 10, falcon->addr + 0x10c, 0x6, 0x0);
+	if (ret < 0) {
+		nvkm_error(falcon->user, "Falcon mem scrubbing timeout\n");
+		return ret;
+	}
+
+	ret = falcon_v1_wait_idle(falcon);
+	if (ret)
+		return ret;
+
+	/* enable IRQs */
+	nvkm_falcon_wr32(falcon, 0x010, 0xff);
+
+	return 0;
+}
+
+static void
+nvkm_falcon_v1_disable(struct nvkm_falcon *falcon)
+{
+	/* disable IRQs and wait for any previous code to complete */
+	nvkm_falcon_wr32(falcon, 0x014, 0xff);
+	falcon_v1_wait_idle(falcon);
+}
+
+static const struct nvkm_falcon_func
+nvkm_falcon_v1 = {
+	.load_imem = nvkm_falcon_v1_load_imem,
+	.load_dmem = nvkm_falcon_v1_load_dmem,
+	.read_dmem = nvkm_falcon_v1_read_dmem,
+	.bind_context = nvkm_falcon_v1_bind_context,
+	.start = nvkm_falcon_v1_start,
+	.wait_for_halt = nvkm_falcon_v1_wait_for_halt,
+	.clear_interrupt = nvkm_falcon_v1_clear_interrupt,
+	.enable = nvkm_falcon_v1_enable,
+	.disable = nvkm_falcon_v1_disable,
+	.set_start_addr = nvkm_falcon_v1_set_start_addr,
+};
+
+int
+nvkm_falcon_v1_new(struct nvkm_subdev *owner, const char *name, u32 addr,
+		   struct nvkm_falcon **pfalcon)
+{
+	struct nvkm_falcon *falcon;
+	if (!(falcon = *pfalcon = kzalloc(sizeof(*falcon), GFP_KERNEL)))
+		return -ENOMEM;
+	nvkm_falcon_ctor(&nvkm_falcon_v1, owner, name, addr, falcon);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c
index 370dcd8ff7b5..6eff637ac301 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bar/nv50.c
@@ -84,7 +84,7 @@ nv50_bar_oneinit(struct nvkm_bar *base)
 	start = 0x0100000000ULL;
 	limit = start + device->func->resource_size(device, 3);
 
-	ret = nvkm_vm_new(device, start, limit, start, &bar3_lock, &vm);
+	ret = nvkm_vm_new(device, start, limit - start, start, &bar3_lock, &vm);
 	if (ret)
 		return ret;
 
@@ -117,7 +117,7 @@ nv50_bar_oneinit(struct nvkm_bar *base)
 	start = 0x0000000000ULL;
 	limit = start + device->func->resource_size(device, 1);
 
-	ret = nvkm_vm_new(device, start, limit--, start, &bar1_lock, &vm);
+	ret = nvkm_vm_new(device, start, limit-- - start, start, &bar1_lock, &vm);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
index dbcb0ef21587..6b4f1e06a38f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/Kbuild
@@ -19,6 +19,7 @@ nvkm-y += nvkm/subdev/bios/pcir.o
 nvkm-y += nvkm/subdev/bios/perf.o
 nvkm-y += nvkm/subdev/bios/pll.o
 nvkm-y += nvkm/subdev/bios/pmu.o
+nvkm-y += nvkm/subdev/bios/power_budget.o
 nvkm-y += nvkm/subdev/bios/ramcfg.o
 nvkm-y += nvkm/subdev/bios/rammap.o
 nvkm-y += nvkm/subdev/bios/shadow.o
@@ -31,6 +32,7 @@ nvkm-y += nvkm/subdev/bios/timing.o
 nvkm-y += nvkm/subdev/bios/therm.o
 nvkm-y += nvkm/subdev/bios/vmap.o
 nvkm-y += nvkm/subdev/bios/volt.o
+nvkm-y += nvkm/subdev/bios/vpstate.o
 nvkm-y += nvkm/subdev/bios/xpio.o
 nvkm-y += nvkm/subdev/bios/M0203.o
 nvkm-y += nvkm/subdev/bios/M0205.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/boost.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/boost.c
index 3756ec91a88d..eaf74eb72983 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/boost.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/boost.c
@@ -25,16 +25,16 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/boost.h>
 
-u16
+u32
 nvbios_boostTe(struct nvkm_bios *bios,
 	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 boost = 0x0000;
+	u32 boost = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			boost = nvbios_rd16(bios, bit_P.offset + 0x30);
+			boost = nvbios_rd32(bios, bit_P.offset + 0x30);
 
 		if (boost) {
 			*ver = nvbios_rd08(bios, boost + 0);
@@ -52,15 +52,15 @@ nvbios_boostTe(struct nvkm_bios *bios,
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_boostEe(struct nvkm_bios *bios, int idx,
 	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 data = nvbios_boostTe(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 data = nvbios_boostTe(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (data && idx < *cnt) {
 		data = data + *hdr + (idx * (*len + (snr * ssz)));
 		*hdr = *len;
@@ -68,14 +68,14 @@ nvbios_boostEe(struct nvkm_bios *bios, int idx,
 		*len = ssz;
 		return data;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_boostEp(struct nvkm_bios *bios, int idx,
 	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_boostE *info)
 {
-	u16 data = nvbios_boostEe(bios, idx, ver, hdr, cnt, len);
+	u32 data = nvbios_boostEe(bios, idx, ver, hdr, cnt, len);
 	memset(info, 0x00, sizeof(*info));
 	if (data) {
 		info->pstate = (nvbios_rd16(bios, data + 0x00) & 0x01e0) >> 5;
@@ -85,7 +85,7 @@ nvbios_boostEp(struct nvkm_bios *bios, int idx,
 	return data;
 }
 
-u16
+u32
 nvbios_boostEm(struct nvkm_bios *bios, u8 pstate,
 	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_boostE *info)
 {
@@ -97,21 +97,21 @@ nvbios_boostEm(struct nvkm_bios *bios, u8 pstate,
 	return data;
 }
 
-u16
+u32
 nvbios_boostSe(struct nvkm_bios *bios, int idx,
-	       u16 data, u8 *ver, u8 *hdr, u8 cnt, u8 len)
+	       u32 data, u8 *ver, u8 *hdr, u8 cnt, u8 len)
 {
 	if (data && idx < cnt) {
 		data = data + *hdr + (idx * len);
 		*hdr = len;
 		return data;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_boostSp(struct nvkm_bios *bios, int idx,
-	       u16 data, u8 *ver, u8 *hdr, u8 cnt, u8 len,
+	       u32 data, u8 *ver, u8 *hdr, u8 cnt, u8 len,
 	       struct nvbios_boostS *info)
 {
 	data = nvbios_boostSe(bios, idx, data, ver, hdr, cnt, len);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/cstep.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/cstep.c
index 32e01624a162..5063382d8a6c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/cstep.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/cstep.c
@@ -25,16 +25,16 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/cstep.h>
 
-u16
+u32
 nvbios_cstepTe(struct nvkm_bios *bios,
 	       u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *xnr, u8 *xsz)
 {
 	struct bit_entry bit_P;
-	u16 cstep = 0x0000;
+	u32 cstep = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			cstep = nvbios_rd16(bios, bit_P.offset + 0x34);
+			cstep = nvbios_rd32(bios, bit_P.offset + 0x34);
 
 		if (cstep) {
 			*ver = nvbios_rd08(bios, cstep + 0);
@@ -52,27 +52,27 @@ nvbios_cstepTe(struct nvkm_bios *bios,
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_cstepEe(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr)
 {
 	u8  cnt, len, xnr, xsz;
-	u16 data = nvbios_cstepTe(bios, ver, hdr, &cnt, &len, &xnr, &xsz);
+	u32 data = nvbios_cstepTe(bios, ver, hdr, &cnt, &len, &xnr, &xsz);
 	if (data && idx < cnt) {
 		data = data + *hdr + (idx * len);
 		*hdr = len;
 		return data;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_cstepEp(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr,
 	       struct nvbios_cstepE *info)
 {
-	u16 data = nvbios_cstepEe(bios, idx, ver, hdr);
+	u32 data = nvbios_cstepEe(bios, idx, ver, hdr);
 	memset(info, 0x00, sizeof(*info));
 	if (data) {
 		info->pstate = (nvbios_rd16(bios, data + 0x00) & 0x01e0) >> 5;
@@ -81,7 +81,7 @@ nvbios_cstepEp(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr,
 	return data;
 }
 
-u16
+u32
 nvbios_cstepEm(struct nvkm_bios *bios, u8 pstate, u8 *ver, u8 *hdr,
 	       struct nvbios_cstepE *info)
 {
@@ -93,24 +93,24 @@ nvbios_cstepEm(struct nvkm_bios *bios, u8 pstate, u8 *ver, u8 *hdr,
 	return data;
 }
 
-u16
+u32
 nvbios_cstepXe(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr)
 {
 	u8  cnt, len, xnr, xsz;
-	u16 data = nvbios_cstepTe(bios, ver, hdr, &cnt, &len, &xnr, &xsz);
+	u32 data = nvbios_cstepTe(bios, ver, hdr, &cnt, &len, &xnr, &xsz);
 	if (data && idx < xnr) {
 		data = data + *hdr + (cnt * len) + (idx * xsz);
 		*hdr = xsz;
 		return data;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_cstepXp(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr,
 	       struct nvbios_cstepX *info)
 {
-	u16 data = nvbios_cstepXe(bios, idx, ver, hdr);
+	u32 data = nvbios_cstepXe(bios, idx, ver, hdr);
 	memset(info, 0x00, sizeof(*info));
 	if (data) {
 		info->freq    = nvbios_rd16(bios, data + 0x00) * 1000;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
index d89e78c4e689..972370ed36f0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/dp.c
@@ -207,8 +207,11 @@ nvbios_dpcfg_match(struct nvkm_bios *bios, u16 outp, u8 pc, u8 vs, u8 pe,
 	if (*ver >= 0x30) {
 		const u8 vsoff[] = { 0, 4, 7, 9 };
 		idx = (pc * 10) + vsoff[vs] + pe;
-		if (*ver >= 0x40 && *hdr >= 0x12)
+		if (*ver >= 0x40 && *ver <= 0x41 && *hdr >= 0x12)
 			idx += nvbios_rd08(bios, outp + 0x11) * 40;
+		else
+		if (*ver >= 0x42)
+			idx += nvbios_rd08(bios, outp + 0x11) * 10;
 	} else {
 		while ((data = nvbios_dpcfg_entry(bios, outp, ++idx,
 						  ver, hdr, cnt, len))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
index 80fed7e78dcb..456f9ea920dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/fan.c
@@ -25,15 +25,15 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/fan.h>
 
-u16
+static u32
 nvbios_fan_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_P;
-	u16 fan = 0x0000;
+	u32 fan = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2 && bit_P.length >= 0x5a)
-			fan = nvbios_rd16(bios, bit_P.offset + 0x58);
+			fan = nvbios_rd32(bios, bit_P.offset + 0x58);
 
 		if (fan) {
 			*ver = nvbios_rd08(bios, fan + 0);
@@ -49,25 +49,25 @@ nvbios_fan_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+static u32
 nvbios_fan_entry(struct nvkm_bios *bios, int idx, u8 *ver, u8 *hdr,
 		 u8 *cnt, u8 *len)
 {
-	u16 data = nvbios_fan_table(bios, ver, hdr, cnt, len);
+	u32 data = nvbios_fan_table(bios, ver, hdr, cnt, len);
 	if (data && idx < *cnt)
 		return data + *hdr + (idx * (*len));
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_fan_parse(struct nvkm_bios *bios, struct nvbios_therm_fan *fan)
 {
 	u8 ver, hdr, cnt, len;
 
-	u16 data = nvbios_fan_entry(bios, 0, &ver, &hdr, &cnt, &len);
+	u32 data = nvbios_fan_entry(bios, 0, &ver, &hdr, &cnt, &len);
 	if (data) {
 		u8 type = nvbios_rd08(bios, data + 0x00);
 		switch (type) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c
index 084328028af1..3953d11844ea 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/iccsense.c
@@ -23,20 +23,21 @@
  */
 #include <subdev/bios.h>
 #include <subdev/bios/bit.h>
+#include <subdev/bios/extdev.h>
 #include <subdev/bios/iccsense.h>
 
-static u16
+static u32
 nvbios_iccsense_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
 		      u8 *len)
 {
 	struct bit_entry bit_P;
-	u16 iccsense;
+	u32 iccsense;
 
 	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
 	    bit_P.length < 0x2c)
 		return 0;
 
-	iccsense = nvbios_rd16(bios, bit_P.offset + 0x28);
+	iccsense = nvbios_rd32(bios, bit_P.offset + 0x28);
 	if (!iccsense)
 		return 0;
 
@@ -60,7 +61,7 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense)
 {
 	struct nvkm_subdev *subdev = &bios->subdev;
 	u8 ver, hdr, cnt, len, i;
-	u16 table, entry;
+	u32 table, entry;
 
 	table = nvbios_iccsense_table(bios, &ver, &hdr, &cnt, &len);
 	if (!table || !cnt)
@@ -77,23 +78,47 @@ nvbios_iccsense_parse(struct nvkm_bios *bios, struct nvbios_iccsense *iccsense)
 		return -ENOMEM;
 
 	for (i = 0; i < cnt; ++i) {
+		struct nvbios_extdev_func extdev;
 		struct pwr_rail_t *rail = &iccsense->rail[i];
+		u8 res_start = 0;
+		int r;
+
 		entry = table + hdr + i * len;
 
 		switch(ver) {
 		case 0x10:
 			rail->mode = nvbios_rd08(bios, entry + 0x1);
 			rail->extdev_id = nvbios_rd08(bios, entry + 0x2);
-			rail->resistor_mohm = nvbios_rd08(bios, entry + 0x3);
-			rail->rail = nvbios_rd08(bios, entry + 0x4);
+			res_start = 0x3;
 			break;
 		case 0x20:
 			rail->mode = nvbios_rd08(bios, entry);
 			rail->extdev_id = nvbios_rd08(bios, entry + 0x1);
-			rail->resistor_mohm = nvbios_rd08(bios, entry + 0x5);
-			rail->rail = nvbios_rd08(bios, entry + 0x6);
+			res_start = 0x5;
+			break;
+		};
+
+		if (nvbios_extdev_parse(bios, rail->extdev_id, &extdev))
+			continue;
+
+		switch (extdev.type) {
+		case NVBIOS_EXTDEV_INA209:
+		case NVBIOS_EXTDEV_INA219:
+			rail->resistor_count = 1;
+			break;
+		case NVBIOS_EXTDEV_INA3221:
+			rail->resistor_count = 3;
+			break;
+		default:
+			rail->resistor_count = 0;
 			break;
 		};
+
+		for (r = 0; r < rail->resistor_count; ++r) {
+			rail->resistors[r].mohm = nvbios_rd08(bios, entry + res_start + r * 2);
+			rail->resistors[r].enabled = !(nvbios_rd08(bios, entry + res_start + r * 2 + 1) & 0x40);
+		}
+		rail->config = nvbios_rd16(bios, entry + res_start + rail->resistor_count * 2);
 	}
 
 	return 0;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/mxm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/mxm.c
index 3ddf0939ded3..994cc2d7759b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/mxm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/mxm.c
@@ -81,7 +81,7 @@ mxm_sor_map(struct nvkm_bios *bios, u8 conn)
 		u16 map = nvbios_rd16(bios, mxm + 4);
 		if (map) {
 			ver = nvbios_rd08(bios, map);
-			if (ver == 0x10) {
+			if (ver == 0x10 || ver == 0x11) {
 				if (conn < nvbios_rd08(bios, map + 3)) {
 					map += nvbios_rd08(bios, map + 1);
 					map += conn;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/perf.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/perf.c
index 636bfb665bb9..c3068358f695 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/perf.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/perf.c
@@ -26,16 +26,16 @@
 #include <subdev/bios/perf.h>
 #include <subdev/pci.h>
 
-u16
+u32
 nvbios_perf_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr,
 		  u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 perf = 0x0000;
+	u32 perf = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version <= 2) {
-			perf = nvbios_rd16(bios, bit_P.offset + 0);
+			perf = nvbios_rd32(bios, bit_P.offset + 0);
 			if (perf) {
 				*ver = nvbios_rd08(bios, perf + 0);
 				*hdr = nvbios_rd08(bios, perf + 1);
@@ -72,15 +72,15 @@ nvbios_perf_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr,
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_perf_entry(struct nvkm_bios *bios, int idx,
 		  u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 perf = nvbios_perf_table(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 perf = nvbios_perf_table(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (perf && idx < *cnt) {
 		perf = perf + *hdr + (idx * (*len + (snr * ssz)));
 		*hdr = *len;
@@ -88,14 +88,14 @@ nvbios_perf_entry(struct nvkm_bios *bios, int idx,
 		*len = ssz;
 		return perf;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_perfEp(struct nvkm_bios *bios, int idx,
 	      u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_perfE *info)
 {
-	u16 perf = nvbios_perf_entry(bios, idx, ver, hdr, cnt, len);
+	u32 perf = nvbios_perf_entry(bios, idx, ver, hdr, cnt, len);
 	memset(info, 0x00, sizeof(*info));
 	info->pstate = nvbios_rd08(bios, perf + 0x00);
 	switch (!!perf * *ver) {
@@ -163,7 +163,7 @@ nvbios_perfEp(struct nvkm_bios *bios, int idx,
 		info->pcie_width = 0xff;
 		break;
 	default:
-		return 0x0000;
+		return 0;
 	}
 	return perf;
 }
@@ -202,7 +202,7 @@ nvbios_perf_fan_parse(struct nvkm_bios *bios,
 		      struct nvbios_perf_fan *fan)
 {
 	u8  ver, hdr, cnt, len, snr, ssz;
-	u16 perf = nvbios_perf_table(bios, &ver, &hdr, &cnt, &len, &snr, &ssz);
+	u32 perf = nvbios_perf_table(bios, &ver, &hdr, &cnt, &len, &snr, &ssz);
 	if (!perf)
 		return -ENODEV;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
new file mode 100644
index 000000000000..617bfffce4ad
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/power_budget.c
@@ -0,0 +1,126 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/power_budget.h>
+
+static u32
+nvbios_power_budget_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt,
+			  u8 *len)
+{
+	struct bit_entry bit_P;
+	u32 power_budget;
+
+	if (bit_entry(bios, 'P', &bit_P) || bit_P.version != 2 ||
+	    bit_P.length < 0x2c)
+		return 0;
+
+	power_budget = nvbios_rd32(bios, bit_P.offset + 0x2c);
+	if (!power_budget)
+		return 0;
+
+	*ver = nvbios_rd08(bios, power_budget);
+	switch (*ver) {
+	case 0x20:
+	case 0x30:
+		*hdr = nvbios_rd08(bios, power_budget + 0x1);
+		*len = nvbios_rd08(bios, power_budget + 0x2);
+		*cnt = nvbios_rd08(bios, power_budget + 0x3);
+		return power_budget;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+int
+nvbios_power_budget_header(struct nvkm_bios *bios,
+                           struct nvbios_power_budget *budget)
+{
+	struct nvkm_subdev *subdev = &bios->subdev;
+	u8 ver, hdr, cnt, len, cap_entry;
+	u32 header;
+
+	if (!bios || !budget)
+		return -EINVAL;
+
+	header = nvbios_power_budget_table(bios, &ver, &hdr, &cnt, &len);
+	if (!header || !cnt)
+		return -ENODEV;
+
+	switch (ver) {
+	case 0x20:
+		cap_entry = nvbios_rd08(bios, header + 0x9);
+		break;
+	case 0x30:
+		cap_entry = nvbios_rd08(bios, header + 0xa);
+		break;
+	default:
+		cap_entry = 0xff;
+	}
+
+	if (cap_entry >= cnt && cap_entry != 0xff) {
+		nvkm_warn(subdev,
+		          "invalid cap_entry in power budget table found\n");
+		budget->cap_entry = 0xff;
+		return -EINVAL;
+	}
+
+	budget->offset = header;
+	budget->ver = ver;
+	budget->hlen = hdr;
+	budget->elen = len;
+	budget->ecount = cnt;
+
+	budget->cap_entry = cap_entry;
+
+	return 0;
+}
+
+int
+nvbios_power_budget_entry(struct nvkm_bios *bios,
+                          struct nvbios_power_budget *budget,
+                          u8 idx, struct nvbios_power_budget_entry *entry)
+{
+	u32 entry_offset;
+
+	if (!bios || !budget || !budget->offset || idx >= budget->ecount
+		|| !entry)
+		return -EINVAL;
+
+	entry_offset = budget->offset + budget->hlen + idx * budget->elen;
+
+	if (budget->ver >= 0x20) {
+		entry->min_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = nvbios_rd32(bios, entry_offset + 0x6);
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0xa);
+	} else {
+		entry->min_w = 0;
+		entry->max_w = nvbios_rd32(bios, entry_offset + 0x2);
+		entry->avg_w = entry->max_w;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h
index 212800ecdce9..7d1d3c6b4b72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/priv.h
@@ -12,6 +12,7 @@ struct nvbios_source {
 	bool rw;
 	bool ignore_checksum;
 	bool no_pcir;
+	bool require_checksum;
 };
 
 int nvbios_extend(struct nvkm_bios *, u32 length);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
index b2557e87afdd..7deb81b6dbac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadow.c
@@ -86,9 +86,12 @@ shadow_image(struct nvkm_bios *bios, int idx, u32 offset, struct shadow *mthd)
 		    nvbios_checksum(&bios->data[image.base], image.size)) {
 			nvkm_debug(subdev, "%08x: checksum failed\n",
 				   image.base);
-			if (mthd->func->rw)
+			if (!mthd->func->require_checksum) {
+				if (mthd->func->rw)
+					score += 1;
 				score += 1;
-			score += 1;
+			} else
+				return 0;
 		} else {
 			score += 3;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
index 8fecb5ff22a0..06572f8ce914 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/shadowacpi.c
@@ -99,6 +99,7 @@ nvbios_acpi_fast = {
 	.init = acpi_init,
 	.read = acpi_read_fast,
 	.rw = false,
+	.require_checksum = true,
 };
 
 const struct nvbios_source
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/therm.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/therm.c
index a54cfec0550d..5babc5a7c7d5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/therm.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/therm.c
@@ -25,17 +25,17 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/therm.h>
 
-static u16
+static u32
 therm_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *len, u8 *cnt)
 {
 	struct bit_entry bit_P;
-	u16 therm = 0;
+	u32 therm = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 1)
-			therm = nvbios_rd16(bios, bit_P.offset + 12);
+			therm = nvbios_rd32(bios, bit_P.offset + 12);
 		else if (bit_P.version == 2)
-			therm = nvbios_rd16(bios, bit_P.offset + 16);
+			therm = nvbios_rd32(bios, bit_P.offset + 16);
 		else
 			nvkm_error(&bios->subdev,
 				   "unknown offset for thermal in BIT P %d\n",
@@ -44,7 +44,7 @@ therm_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *len, u8 *cnt)
 
 	/* exit now if we haven't found the thermal table */
 	if (!therm)
-		return 0x0000;
+		return 0;
 
 	*ver = nvbios_rd08(bios, therm + 0);
 	*hdr = nvbios_rd08(bios, therm + 1);
@@ -53,14 +53,14 @@ therm_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *len, u8 *cnt)
 	return therm + nvbios_rd08(bios, therm + 1);
 }
 
-static u16
+static u32
 nvbios_therm_entry(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len)
 {
 	u8 hdr, cnt;
-	u16 therm = therm_table(bios, ver, &hdr, len, &cnt);
+	u32 therm = therm_table(bios, ver, &hdr, len, &cnt);
 	if (therm && idx < cnt)
 		return therm + idx * *len;
-	return 0x0000;
+	return 0;
 }
 
 int
@@ -70,7 +70,7 @@ nvbios_therm_sensor_parse(struct nvkm_bios *bios,
 {
 	s8 thrs_section, sensor_section, offset;
 	u8 ver, len, i;
-	u16 entry;
+	u32 entry;
 
 	/* we only support the core domain for now */
 	if (domain != NVBIOS_THERM_DOMAIN_CORE)
@@ -154,7 +154,7 @@ nvbios_therm_fan_parse(struct nvkm_bios *bios, struct nvbios_therm_fan *fan)
 {
 	struct nvbios_therm_trip_point *cur_trip = NULL;
 	u8 ver, len, i;
-	u16 entry;
+	u32 entry;
 
 	uint8_t duty_lut[] = { 0, 0, 25, 0, 40, 0, 50, 0,
 				75, 0, 85, 0, 100, 0, 100, 0 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c
index 99f6432ac0af..7e83c3985020 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/timing.c
@@ -25,19 +25,19 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/timing.h>
 
-u16
+u32
 nvbios_timingTe(struct nvkm_bios *bios,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len, u8 *snr, u8 *ssz)
 {
 	struct bit_entry bit_P;
-	u16 timing = 0x0000;
+	u32 timing = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 1)
-			timing = nvbios_rd16(bios, bit_P.offset + 4);
+			timing = nvbios_rd32(bios, bit_P.offset + 4);
 		else
 		if (bit_P.version == 2)
-			timing = nvbios_rd16(bios, bit_P.offset + 8);
+			timing = nvbios_rd32(bios, bit_P.offset + 8);
 
 		if (timing) {
 			*ver = nvbios_rd08(bios, timing + 0);
@@ -62,15 +62,15 @@ nvbios_timingTe(struct nvkm_bios *bios,
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_timingEe(struct nvkm_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	u8  snr, ssz;
-	u16 timing = nvbios_timingTe(bios, ver, hdr, cnt, len, &snr, &ssz);
+	u32 timing = nvbios_timingTe(bios, ver, hdr, cnt, len, &snr, &ssz);
 	if (timing && idx < *cnt) {
 		timing += *hdr + idx * (*len + (snr * ssz));
 		*hdr = *len;
@@ -78,14 +78,14 @@ nvbios_timingEe(struct nvkm_bios *bios, int idx,
 		*len = ssz;
 		return timing;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_timingEp(struct nvkm_bios *bios, int idx,
 		u8 *ver, u8 *hdr, u8 *cnt, u8 *len, struct nvbios_ramcfg *p)
 {
-	u16 data = nvbios_timingEe(bios, idx, ver, hdr, cnt, len), temp;
+	u32 data = nvbios_timingEe(bios, idx, ver, hdr, cnt, len), temp;
 	p->timing_ver = *ver;
 	p->timing_hdr = *hdr;
 	switch (!!data * *ver) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vmap.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vmap.c
index 2f13db745948..c228ca15fa3b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vmap.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vmap.c
@@ -25,15 +25,15 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/vmap.h>
 
-u16
+u32
 nvbios_vmap_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_P;
-	u16 vmap = 0x0000;
+	u32 vmap = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2) {
-			vmap = nvbios_rd16(bios, bit_P.offset + 0x20);
+			vmap = nvbios_rd32(bios, bit_P.offset + 0x20);
 			if (vmap) {
 				*ver = nvbios_rd08(bios, vmap + 0);
 				switch (*ver) {
@@ -50,40 +50,50 @@ nvbios_vmap_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_vmap_parse(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		  struct nvbios_vmap *info)
 {
-	u16 vmap = nvbios_vmap_table(bios, ver, hdr, cnt, len);
+	u32 vmap = nvbios_vmap_table(bios, ver, hdr, cnt, len);
 	memset(info, 0x00, sizeof(*info));
 	switch (!!vmap * *ver) {
 	case 0x10:
+		info->max0 = 0xff;
+		info->max1 = 0xff;
+		info->max2 = 0xff;
+		break;
 	case 0x20:
+		info->max0 = nvbios_rd08(bios, vmap + 0x7);
+		info->max1 = nvbios_rd08(bios, vmap + 0x8);
+		if (*len >= 0xc)
+			info->max2 = nvbios_rd08(bios, vmap + 0xc);
+		else
+			info->max2 = 0xff;
 		break;
 	}
 	return vmap;
 }
 
-u16
+u32
 nvbios_vmap_entry(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len)
 {
 	u8  hdr, cnt;
-	u16 vmap = nvbios_vmap_table(bios, ver, &hdr, &cnt, len);
+	u32 vmap = nvbios_vmap_table(bios, ver, &hdr, &cnt, len);
 	if (vmap && idx < cnt) {
 		vmap = vmap + hdr + (idx * *len);
 		return vmap;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_vmap_entry_parse(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len,
 			struct nvbios_vmap_entry *info)
 {
-	u16 vmap = nvbios_vmap_entry(bios, idx, ver, len);
+	u32 vmap = nvbios_vmap_entry(bios, idx, ver, len);
 	memset(info, 0x00, sizeof(*info));
 	switch (!!vmap * *ver) {
 	case 0x10:
@@ -95,7 +105,7 @@ nvbios_vmap_entry_parse(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len,
 		info->arg[2] = nvbios_rd32(bios, vmap + 0x10);
 		break;
 	case 0x20:
-		info->unk0   = nvbios_rd08(bios, vmap + 0x00);
+		info->mode   = nvbios_rd08(bios, vmap + 0x00);
 		info->link   = nvbios_rd08(bios, vmap + 0x01);
 		info->min    = nvbios_rd32(bios, vmap + 0x02);
 		info->max    = nvbios_rd32(bios, vmap + 0x06);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
index 6e0a33648be9..a7797a9e9cbc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/volt.c
@@ -25,18 +25,18 @@
 #include <subdev/bios/bit.h>
 #include <subdev/bios/volt.h>
 
-u16
+u32
 nvbios_volt_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 {
 	struct bit_entry bit_P;
-	u16 volt = 0x0000;
+	u32 volt = 0;
 
 	if (!bit_entry(bios, 'P', &bit_P)) {
 		if (bit_P.version == 2)
-			volt = nvbios_rd16(bios, bit_P.offset + 0x0c);
+			volt = nvbios_rd32(bios, bit_P.offset + 0x0c);
 		else
 		if (bit_P.version == 1)
-			volt = nvbios_rd16(bios, bit_P.offset + 0x10);
+			volt = nvbios_rd32(bios, bit_P.offset + 0x10);
 
 		if (volt) {
 			*ver = nvbios_rd08(bios, volt + 0);
@@ -62,33 +62,37 @@ nvbios_volt_table(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len)
 		}
 	}
 
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_volt_parse(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 		  struct nvbios_volt *info)
 {
-	u16 volt = nvbios_volt_table(bios, ver, hdr, cnt, len);
+	u32 volt = nvbios_volt_table(bios, ver, hdr, cnt, len);
 	memset(info, 0x00, sizeof(*info));
 	switch (!!volt * *ver) {
 	case 0x12:
 		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x04);
+		info->ranged  = false;
 		break;
 	case 0x20:
 		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x05);
+		info->ranged  = false;
 		break;
 	case 0x30:
 		info->type    = NVBIOS_VOLT_GPIO;
 		info->vidmask = nvbios_rd08(bios, volt + 0x04);
+		info->ranged  = false;
 		break;
 	case 0x40:
 		info->type    = NVBIOS_VOLT_GPIO;
 		info->base    = nvbios_rd32(bios, volt + 0x04);
 		info->step    = nvbios_rd16(bios, volt + 0x08);
 		info->vidmask = nvbios_rd08(bios, volt + 0x0b);
+		info->ranged  = true; /* XXX: find the flag byte */
 		/*XXX*/
 		info->min     = 0;
 		info->max     = info->base;
@@ -104,32 +108,34 @@ nvbios_volt_parse(struct nvkm_bios *bios, u8 *ver, u8 *hdr, u8 *cnt, u8 *len,
 			info->pwm_freq  = nvbios_rd32(bios, volt + 0x5) / 1000;
 			info->pwm_range = nvbios_rd32(bios, volt + 0x16);
 		} else {
-			info->type      = NVBIOS_VOLT_GPIO;
-			info->vidmask   = nvbios_rd08(bios, volt + 0x06);
-			info->step      = nvbios_rd16(bios, volt + 0x16);
+			info->type    = NVBIOS_VOLT_GPIO;
+			info->vidmask = nvbios_rd08(bios, volt + 0x06);
+			info->step    = nvbios_rd16(bios, volt + 0x16);
+			info->ranged  =
+				!!(nvbios_rd08(bios, volt + 0x4) & 0x2);
 		}
 		break;
 	}
 	return volt;
 }
 
-u16
+u32
 nvbios_volt_entry(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len)
 {
 	u8  hdr, cnt;
-	u16 volt = nvbios_volt_table(bios, ver, &hdr, &cnt, len);
+	u32 volt = nvbios_volt_table(bios, ver, &hdr, &cnt, len);
 	if (volt && idx < cnt) {
 		volt = volt + hdr + (idx * *len);
 		return volt;
 	}
-	return 0x0000;
+	return 0;
 }
 
-u16
+u32
 nvbios_volt_entry_parse(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len,
 			struct nvbios_volt_entry *info)
 {
-	u16 volt = nvbios_volt_entry(bios, idx, ver, len);
+	u32 volt = nvbios_volt_entry(bios, idx, ver, len);
 	memset(info, 0x00, sizeof(*info));
 	switch (!!volt * *ver) {
 	case 0x12:
@@ -142,7 +148,10 @@ nvbios_volt_entry_parse(struct nvkm_bios *bios, int idx, u8 *ver, u8 *len,
 		info->vid     = nvbios_rd08(bios, volt + 0x01) >> 2;
 		break;
 	case 0x40:
+		break;
 	case 0x50:
+		info->voltage = nvbios_rd32(bios, volt) & 0x001fffff;
+		info->vid     = (nvbios_rd32(bios, volt) >> 23) & 0xff;
 		break;
 	}
 	return volt;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vpstate.c b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vpstate.c
new file mode 100644
index 000000000000..f199270163d2
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/bios/vpstate.c
@@ -0,0 +1,82 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include <subdev/bios.h>
+#include <subdev/bios/bit.h>
+#include <subdev/bios/vpstate.h>
+
+static u32
+nvbios_vpstate_offset(struct nvkm_bios *b)
+{
+	struct bit_entry bit_P;
+
+	if (!bit_entry(b, 'P', &bit_P)) {
+		if (bit_P.version == 2)
+			return nvbios_rd32(b, bit_P.offset + 0x38);
+	}
+
+	return 0x0000;
+}
+
+int
+nvbios_vpstate_parse(struct nvkm_bios *b, struct nvbios_vpstate_header *h)
+{
+	if (!h)
+		return -EINVAL;
+
+	h->offset = nvbios_vpstate_offset(b);
+	if (!h->offset)
+		return -ENODEV;
+
+	h->version = nvbios_rd08(b, h->offset);
+	switch (h->version) {
+	case 0x10:
+		h->hlen     = nvbios_rd08(b, h->offset + 0x1);
+		h->elen     = nvbios_rd08(b, h->offset + 0x2);
+		h->slen     = nvbios_rd08(b, h->offset + 0x3);
+		h->scount   = nvbios_rd08(b, h->offset + 0x4);
+		h->ecount   = nvbios_rd08(b, h->offset + 0x5);
+
+		h->base_id  = nvbios_rd08(b, h->offset + 0x0f);
+		h->boost_id = nvbios_rd08(b, h->offset + 0x10);
+		h->tdp_id   = nvbios_rd08(b, h->offset + 0x11);
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+int
+nvbios_vpstate_entry(struct nvkm_bios *b, struct nvbios_vpstate_header *h,
+		     u8 idx, struct nvbios_vpstate_entry *e)
+{
+	u32 offset;
+
+	if (!e || !h || idx > h->ecount)
+		return -EINVAL;
+
+	offset = h->offset + h->hlen + idx * (h->elen + (h->slen * h->scount));
+	e->pstate    = nvbios_rd08(b, offset);
+	e->clock_mhz = nvbios_rd16(b, offset + 0x5);
+	return 0;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
index 7102c25320fc..e4c8d310d870 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/base.c
@@ -27,6 +27,7 @@
 #include <subdev/bios/boost.h>
 #include <subdev/bios/cstep.h>
 #include <subdev/bios/perf.h>
+#include <subdev/bios/vpstate.h>
 #include <subdev/fb.h>
 #include <subdev/therm.h>
 #include <subdev/volt.h>
@@ -43,13 +44,13 @@ nvkm_clk_adjust(struct nvkm_clk *clk, bool adjust,
 	struct nvkm_bios *bios = clk->subdev.device->bios;
 	struct nvbios_boostE boostE;
 	u8  ver, hdr, cnt, len;
-	u16 data;
+	u32 data;
 
 	data = nvbios_boostEm(bios, pstate, &ver, &hdr, &cnt, &len, &boostE);
 	if (data) {
 		struct nvbios_boostS boostS;
 		u8  idx = 0, sver, shdr;
-		u16 subd;
+		u32 subd;
 
 		input = max(boostE.min, input);
 		input = min(boostE.max, input);
@@ -74,6 +75,88 @@ nvkm_clk_adjust(struct nvkm_clk *clk, bool adjust,
 /******************************************************************************
  * C-States
  *****************************************************************************/
+static bool
+nvkm_cstate_valid(struct nvkm_clk *clk, struct nvkm_cstate *cstate,
+		  u32 max_volt, int temp)
+{
+	const struct nvkm_domain *domain = clk->domains;
+	struct nvkm_volt *volt = clk->subdev.device->volt;
+	int voltage;
+
+	while (domain && domain->name != nv_clk_src_max) {
+		if (domain->flags & NVKM_CLK_DOM_FLAG_VPSTATE) {
+			u32 freq = cstate->domain[domain->name];
+			switch (clk->boost_mode) {
+			case NVKM_CLK_BOOST_NONE:
+				if (clk->base_khz && freq > clk->base_khz)
+					return false;
+			case NVKM_CLK_BOOST_BIOS:
+				if (clk->boost_khz && freq > clk->boost_khz)
+					return false;
+			}
+		}
+		domain++;
+	}
+
+	if (!volt)
+		return true;
+
+	voltage = nvkm_volt_map(volt, cstate->voltage, temp);
+	if (voltage < 0)
+		return false;
+	return voltage <= min(max_volt, volt->max_uv);
+}
+
+static struct nvkm_cstate *
+nvkm_cstate_find_best(struct nvkm_clk *clk, struct nvkm_pstate *pstate,
+		      struct nvkm_cstate *start)
+{
+	struct nvkm_device *device = clk->subdev.device;
+	struct nvkm_volt *volt = device->volt;
+	struct nvkm_cstate *cstate;
+	int max_volt;
+
+	if (!pstate || !start)
+		return NULL;
+
+	if (!volt)
+		return start;
+
+	max_volt = volt->max_uv;
+	if (volt->max0_id != 0xff)
+		max_volt = min(max_volt,
+			       nvkm_volt_map(volt, volt->max0_id, clk->temp));
+	if (volt->max1_id != 0xff)
+		max_volt = min(max_volt,
+			       nvkm_volt_map(volt, volt->max1_id, clk->temp));
+	if (volt->max2_id != 0xff)
+		max_volt = min(max_volt,
+			       nvkm_volt_map(volt, volt->max2_id, clk->temp));
+
+	for (cstate = start; &cstate->head != &pstate->list;
+	     cstate = list_entry(cstate->head.prev, typeof(*cstate), head)) {
+		if (nvkm_cstate_valid(clk, cstate, max_volt, clk->temp))
+			break;
+	}
+
+	return cstate;
+}
+
+static struct nvkm_cstate *
+nvkm_cstate_get(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
+{
+	struct nvkm_cstate *cstate;
+	if (cstatei == NVKM_CLK_CSTATE_HIGHEST)
+		return list_last_entry(&pstate->list, typeof(*cstate), head);
+	else {
+		list_for_each_entry(cstate, &pstate->list, head) {
+			if (cstate->id == cstatei)
+				return cstate;
+		}
+	}
+	return NULL;
+}
+
 static int
 nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
 {
@@ -85,7 +168,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
 	int ret;
 
 	if (!list_empty(&pstate->list)) {
-		cstate = list_entry(pstate->list.prev, typeof(*cstate), head);
+		cstate = nvkm_cstate_get(clk, pstate, cstatei);
+		cstate = nvkm_cstate_find_best(clk, pstate, cstate);
 	} else {
 		cstate = &pstate->base;
 	}
@@ -99,7 +183,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
 	}
 
 	if (volt) {
-		ret = nvkm_volt_set_id(volt, cstate->voltage, +1);
+		ret = nvkm_volt_set_id(volt, cstate->voltage,
+				       pstate->base.voltage, clk->temp, +1);
 		if (ret && ret != -ENODEV) {
 			nvkm_error(subdev, "failed to raise voltage: %d\n", ret);
 			return ret;
@@ -113,7 +198,8 @@ nvkm_cstate_prog(struct nvkm_clk *clk, struct nvkm_pstate *pstate, int cstatei)
 	}
 
 	if (volt) {
-		ret = nvkm_volt_set_id(volt, cstate->voltage, -1);
+		ret = nvkm_volt_set_id(volt, cstate->voltage,
+				       pstate->base.voltage, clk->temp, -1);
 		if (ret && ret != -ENODEV)
 			nvkm_error(subdev, "failed to lower voltage: %d\n", ret);
 	}
@@ -138,22 +224,27 @@ static int
 nvkm_cstate_new(struct nvkm_clk *clk, int idx, struct nvkm_pstate *pstate)
 {
 	struct nvkm_bios *bios = clk->subdev.device->bios;
+	struct nvkm_volt *volt = clk->subdev.device->volt;
 	const struct nvkm_domain *domain = clk->domains;
 	struct nvkm_cstate *cstate = NULL;
 	struct nvbios_cstepX cstepX;
 	u8  ver, hdr;
-	u16 data;
+	u32 data;
 
 	data = nvbios_cstepXp(bios, idx, &ver, &hdr, &cstepX);
 	if (!data)
 		return -ENOENT;
 
+	if (volt && nvkm_volt_map_min(volt, cstepX.voltage) > volt->max_uv)
+		return -EINVAL;
+
 	cstate = kzalloc(sizeof(*cstate), GFP_KERNEL);
 	if (!cstate)
 		return -ENOMEM;
 
 	*cstate = pstate->base;
 	cstate->voltage = cstepX.voltage;
+	cstate->id = idx;
 
 	while (domain && domain->name != nv_clk_src_max) {
 		if (domain->flags & NVKM_CLK_DOM_FLAG_CORE) {
@@ -175,7 +266,7 @@ static int
 nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
 {
 	struct nvkm_subdev *subdev = &clk->subdev;
-	struct nvkm_ram *ram = subdev->device->fb->ram;
+	struct nvkm_fb *fb = subdev->device->fb;
 	struct nvkm_pci *pci = subdev->device->pci;
 	struct nvkm_pstate *pstate;
 	int ret, idx = 0;
@@ -190,7 +281,8 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
 
 	nvkm_pcie_set_link(pci, pstate->pcie_speed, pstate->pcie_width);
 
-	if (ram && ram->func->calc) {
+	if (fb && fb->ram && fb->ram->func->calc) {
+		struct nvkm_ram *ram = fb->ram;
 		int khz = pstate->base.domain[nv_clk_src_mem];
 		do {
 			ret = ram->func->calc(ram, khz);
@@ -200,7 +292,7 @@ nvkm_pstate_prog(struct nvkm_clk *clk, int pstatei)
 		ram->func->tidy(ram);
 	}
 
-	return nvkm_cstate_prog(clk, pstate, 0);
+	return nvkm_cstate_prog(clk, pstate, NVKM_CLK_CSTATE_HIGHEST);
 }
 
 static void
@@ -214,14 +306,14 @@ nvkm_pstate_work(struct work_struct *work)
 		return;
 	clk->pwrsrc = power_supply_is_system_supplied();
 
-	nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d D %d\n",
+	nvkm_trace(subdev, "P %d PWR %d U(AC) %d U(DC) %d A %d T %d°C D %d\n",
 		   clk->pstate, clk->pwrsrc, clk->ustate_ac, clk->ustate_dc,
-		   clk->astate, clk->tstate, clk->dstate);
+		   clk->astate, clk->temp, clk->dstate);
 
 	pstate = clk->pwrsrc ? clk->ustate_ac : clk->ustate_dc;
 	if (clk->state_nr && pstate != -1) {
 		pstate = (pstate < 0) ? clk->astate : pstate;
-		pstate = min(pstate, clk->state_nr - 1 + clk->tstate);
+		pstate = min(pstate, clk->state_nr - 1);
 		pstate = max(pstate, clk->dstate);
 	} else {
 		pstate = clk->pstate = -1;
@@ -316,7 +408,7 @@ nvkm_pstate_new(struct nvkm_clk *clk, int idx)
 	struct nvbios_cstepE cstepE;
 	struct nvbios_perfE perfE;
 	u8  ver, hdr, cnt, len;
-	u16 data;
+	u32 data;
 
 	data = nvbios_perfEp(bios, idx, &ver, &hdr, &cnt, &len, &perfE);
 	if (!data)
@@ -448,13 +540,12 @@ nvkm_clk_astate(struct nvkm_clk *clk, int req, int rel, bool wait)
 }
 
 int
-nvkm_clk_tstate(struct nvkm_clk *clk, int req, int rel)
+nvkm_clk_tstate(struct nvkm_clk *clk, u8 temp)
 {
-	if (!rel) clk->tstate  = req;
-	if ( rel) clk->tstate += rel;
-	clk->tstate = min(clk->tstate, 0);
-	clk->tstate = max(clk->tstate, -(clk->state_nr - 1));
-	return nvkm_pstate_calc(clk, true);
+	if (clk->temp == temp)
+		return 0;
+	clk->temp = temp;
+	return nvkm_pstate_calc(clk, false);
 }
 
 int
@@ -524,9 +615,9 @@ nvkm_clk_init(struct nvkm_subdev *subdev)
 		return clk->func->init(clk);
 
 	clk->astate = clk->state_nr - 1;
-	clk->tstate = 0;
 	clk->dstate = 0;
 	clk->pstate = -1;
+	clk->temp = 90; /* reasonable default value */
 	nvkm_pstate_calc(clk, true);
 	return 0;
 }
@@ -561,10 +652,22 @@ int
 nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device,
 	      int index, bool allow_reclock, struct nvkm_clk *clk)
 {
+	struct nvkm_subdev *subdev = &clk->subdev;
+	struct nvkm_bios *bios = device->bios;
 	int ret, idx, arglen;
 	const char *mode;
+	struct nvbios_vpstate_header h;
+
+	nvkm_subdev_ctor(&nvkm_clk, device, index, subdev);
+
+	if (bios && !nvbios_vpstate_parse(bios, &h)) {
+		struct nvbios_vpstate_entry base, boost;
+		if (!nvbios_vpstate_entry(bios, &h, h.boost_id, &boost))
+			clk->boost_khz = boost.clock_mhz * 1000;
+		if (!nvbios_vpstate_entry(bios, &h, h.base_id, &base))
+			clk->base_khz = base.clock_mhz * 1000;
+	}
 
-	nvkm_subdev_ctor(&nvkm_clk, device, index, &clk->subdev);
 	clk->func = func;
 	INIT_LIST_HEAD(&clk->states);
 	clk->domains = func->domains;
@@ -607,6 +710,8 @@ nvkm_clk_ctor(const struct nvkm_clk_func *func, struct nvkm_device *device,
 	if (mode)
 		clk->ustate_dc = nvkm_clk_nstate(clk, mode, arglen);
 
+	clk->boost_mode = nvkm_longopt(device->cfgopt, "NvBoost",
+				       NVKM_CLK_BOOST_NONE);
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
index 89d5543118cf..7f67f9f5a550 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gf100.c
@@ -457,7 +457,7 @@ gf100_clk = {
 		{ nv_clk_src_hubk06 , 0x00 },
 		{ nv_clk_src_hubk01 , 0x01 },
 		{ nv_clk_src_copy   , 0x02 },
-		{ nv_clk_src_gpc    , 0x03, 0, "core", 2000 },
+		{ nv_clk_src_gpc    , 0x03, NVKM_CLK_DOM_FLAG_VPSTATE, "core", 2000 },
 		{ nv_clk_src_rop    , 0x04 },
 		{ nv_clk_src_mem    , 0x05, 0, "memory", 1000 },
 		{ nv_clk_src_vdec   , 0x06 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
index 06bc0d2d6ae1..0b37e3da7feb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gk104.c
@@ -491,7 +491,7 @@ gk104_clk = {
 	.domains = {
 		{ nv_clk_src_crystal, 0xff },
 		{ nv_clk_src_href   , 0xff },
-		{ nv_clk_src_gpc    , 0x00, NVKM_CLK_DOM_FLAG_CORE, "core", 2000 },
+		{ nv_clk_src_gpc    , 0x00, NVKM_CLK_DOM_FLAG_CORE | NVKM_CLK_DOM_FLAG_VPSTATE, "core", 2000 },
 		{ nv_clk_src_hubk07 , 0x01, NVKM_CLK_DOM_FLAG_CORE },
 		{ nv_clk_src_rop    , 0x02, NVKM_CLK_DOM_FLAG_CORE },
 		{ nv_clk_src_mem    , 0x03, 0, "memory", 500 },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
index 056702ef69aa..96e0941c8edd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/gt215.c
@@ -180,7 +180,7 @@ gt215_clk_read(struct nvkm_clk *base, enum nv_clk_src src)
 	return 0;
 }
 
-int
+static int
 gt215_clk_info(struct nvkm_clk *base, int idx, u32 khz,
 	       struct gt215_clk_info *info)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
index 5841f297973c..da1770e47490 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/clk/nv50.c
@@ -112,7 +112,7 @@ read_pll_src(struct nv50_clk *clk, u32 base)
 		M    = (coef & 0x000000ff) >> 0;
 		break;
 	default:
-		BUG_ON(1);
+		BUG();
 	}
 
 	if (M)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
index a410c0db8a08..1730371933df 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/gm200.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/bit.h>
 #include <subdev/bios/pmu.h>
+#include <subdev/timer.h>
 
 static void
 pmu_code(struct nv50_devinit *init, u32 pmu, u32 img, u32 len, bool sec)
@@ -123,21 +124,13 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post)
 		return -EINVAL;
 	}
 
-	/* reset PMU and load init table parser ucode */
-	if (post) {
-		nvkm_mask(device, 0x000200, 0x00002000, 0x00000000);
-		nvkm_mask(device, 0x000200, 0x00002000, 0x00002000);
-		nvkm_rd32(device, 0x000200);
-		while (nvkm_rd32(device, 0x10a10c) & 0x00000006) {
-		}
-	}
-
 	ret = pmu_load(init, 0x04, post, &exec, &args);
 	if (ret)
 		return ret;
 
 	/* upload first chunk of init data */
 	if (post) {
+		// devinit tables
 		u32 pmu = pmu_args(init, args + 0x08, 0x08);
 		u32 img = nvbios_rd16(bios, bit_I.offset + 0x14);
 		u32 len = nvbios_rd16(bios, bit_I.offset + 0x16);
@@ -146,6 +139,7 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post)
 
 	/* upload second chunk of init data */
 	if (post) {
+		// devinit boot scripts
 		u32 pmu = pmu_args(init, args + 0x08, 0x10);
 		u32 img = nvbios_rd16(bios, bit_I.offset + 0x18);
 		u32 len = nvbios_rd16(bios, bit_I.offset + 0x1a);
@@ -156,8 +150,11 @@ gm200_devinit_post(struct nvkm_devinit *base, bool post)
 	if (post) {
 		nvkm_wr32(device, 0x10a040, 0x00005000);
 		pmu_exec(init, exec);
-		while (!(nvkm_rd32(device, 0x10a040) & 0x00002000)) {
-		}
+		if (nvkm_msec(device, 2000,
+			if (nvkm_rd32(device, 0x10a040) & 0x00002000)
+				break;
+		) < 0)
+			return -ETIMEDOUT;
 	}
 
 	/* load and execute some other ucode image (bios therm?) */
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
index c714b097719c..59362f8dee22 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/devinit/nv50.c
@@ -50,7 +50,7 @@ nv50_devinit_pll_set(struct nvkm_devinit *init, u32 type, u32 freq)
 	ret = nv04_pll_calc(subdev, &info, freq, &N1, &M1, &N2, &M2, &P);
 	if (!ret) {
 		nvkm_error(subdev, "failed pll calculation\n");
-		return ret;
+		return -EINVAL;
 	}
 
 	switch (info.type) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
index edcc157e6ac8..63566ba12fbb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/Kbuild
@@ -24,8 +24,9 @@ nvkm-y += nvkm/subdev/fb/gk104.o
 nvkm-y += nvkm/subdev/fb/gk20a.o
 nvkm-y += nvkm/subdev/fb/gm107.o
 nvkm-y += nvkm/subdev/fb/gm200.o
+nvkm-y += nvkm/subdev/fb/gm20b.o
 nvkm-y += nvkm/subdev/fb/gp100.o
-nvkm-y += nvkm/subdev/fb/gp104.o
+nvkm-y += nvkm/subdev/fb/gp102.o
 
 nvkm-y += nvkm/subdev/fb/ram.o
 nvkm-y += nvkm/subdev/fb/ramnv04.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
index 76433cc66fff..3841ad6be99e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.c
@@ -50,24 +50,33 @@ gf100_fb_intr(struct nvkm_fb *base)
 }
 
 int
-gf100_fb_oneinit(struct nvkm_fb *fb)
+gf100_fb_oneinit(struct nvkm_fb *base)
 {
-	struct nvkm_device *device = fb->subdev.device;
+	struct gf100_fb *fb = gf100_fb(base);
+	struct nvkm_device *device = fb->base.subdev.device;
 	int ret, size = 0x1000;
 
 	size = nvkm_longopt(device->cfgopt, "MmuDebugBufferSize", size);
 	size = min(size, 0x1000);
 
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, size, 0x1000,
-			      false, &fb->mmu_rd);
+			      false, &fb->base.mmu_rd);
 	if (ret)
 		return ret;
 
 	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, size, 0x1000,
-			      false, &fb->mmu_wr);
+			      false, &fb->base.mmu_wr);
 	if (ret)
 		return ret;
 
+	fb->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (fb->r100c10_page) {
+		fb->r100c10 = dma_map_page(device->dev, fb->r100c10_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(device->dev, fb->r100c10))
+			return -EFAULT;
+	}
+
 	return 0;
 }
 
@@ -123,14 +132,6 @@ gf100_fb_new_(const struct nvkm_fb_func *func, struct nvkm_device *device,
 	nvkm_fb_ctor(func, device, index, &fb->base);
 	*pfb = &fb->base;
 
-	fb->r100c10_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (fb->r100c10_page) {
-		fb->r100c10 = dma_map_page(device->dev, fb->r100c10_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(device->dev, fb->r100c10))
-			return -EFAULT;
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
index 449f431644b3..412eb89834e8 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gf100.h
@@ -16,4 +16,8 @@ void gf100_fb_init(struct nvkm_fb *);
 void gf100_fb_intr(struct nvkm_fb *);
 
 void gp100_fb_init(struct nvkm_fb *);
+
+void gm200_fb_init_page(struct nvkm_fb *fb);
+void gm200_fb_init(struct nvkm_fb *base);
+
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
index f815fe2bbf08..5d34d6136616 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gk20a.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ * Copyright (c) 2014-2016, NVIDIA CORPORATION. All rights reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
  * copy of this software and associated documentation files (the "Software"),
@@ -20,27 +20,21 @@
  * DEALINGS IN THE SOFTWARE.
  */
 #include "priv.h"
+#include "gf100.h"
 
-#include <core/memory.h>
-
-static void
-gk20a_fb_init(struct nvkm_fb *fb)
-{
-	struct nvkm_device *device = fb->subdev.device;
-	nvkm_wr32(device, 0x100cc8, nvkm_memory_addr(fb->mmu_wr) >> 8);
-	nvkm_wr32(device, 0x100ccc, nvkm_memory_addr(fb->mmu_rd) >> 8);
-}
-
+/* GK20A's FB is similar to GF100's, but without the ability to allocate VRAM */
 static const struct nvkm_fb_func
 gk20a_fb = {
+	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
-	.init = gk20a_fb_init,
+	.init = gf100_fb_init,
 	.init_page = gf100_fb_init_page,
+	.intr = gf100_fb_intr,
 	.memtype_valid = gf100_fb_memtype_valid,
 };
 
 int
 gk20a_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
 {
-	return nvkm_fb_new_(&gk20a_fb, device, index, pfb);
+	return gf100_fb_new_(&gk20a_fb, device, index, pfb);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
index 62f653240be3..fe5886013ac0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm200.c
@@ -44,7 +44,7 @@ gm200_fb_init_page(struct nvkm_fb *fb)
 	}
 }
 
-static void
+void
 gm200_fb_init(struct nvkm_fb *base)
 {
 	struct gf100_fb *fb = gf100_fb(base);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c
new file mode 100644
index 000000000000..b87c233bcd6d
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gm20b.c
@@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#include "priv.h"
+#include "gf100.h"
+
+/* GM20B's FB is similar to GM200, but without the ability to allocate VRAM */
+static const struct nvkm_fb_func
+gm20b_fb = {
+	.dtor = gf100_fb_dtor,
+	.oneinit = gf100_fb_oneinit,
+	.init = gm200_fb_init,
+	.init_page = gm200_fb_init_page,
+	.intr = gf100_fb_intr,
+	.memtype_valid = gf100_fb_memtype_valid,
+};
+
+int
+gm20b_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+{
+	return gf100_fb_new_(&gm20b_fb, device, index, pfb);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
index 92cb71861bec..73b4ae1c73dc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/gp102.c
@@ -27,7 +27,7 @@
 #include <core/memory.h>
 
 static const struct nvkm_fb_func
-gp104_fb = {
+gp102_fb = {
 	.dtor = gf100_fb_dtor,
 	.oneinit = gf100_fb_oneinit,
 	.init = gp100_fb_init,
@@ -37,7 +37,7 @@ gp104_fb = {
 };
 
 int
-gp104_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
+gp102_fb_new(struct nvkm_device *device, int index, struct nvkm_fb **pfb)
 {
-	return gf100_fb_new_(&gp104_fb, device, index, pfb);
+	return gf100_fb_new_(&gp102_fb, device, index, pfb);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
index 1b5fb02eab2a..0595e0722bfc 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/nv50.c
@@ -210,6 +210,23 @@ nv50_fb_intr(struct nvkm_fb *base)
 	nvkm_fifo_chan_put(fifo, flags, &chan);
 }
 
+static int
+nv50_fb_oneinit(struct nvkm_fb *base)
+{
+	struct nv50_fb *fb = nv50_fb(base);
+	struct nvkm_device *device = fb->base.subdev.device;
+
+	fb->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+	if (fb->r100c08_page) {
+		fb->r100c08 = dma_map_page(device->dev, fb->r100c08_page, 0,
+					   PAGE_SIZE, DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(device->dev, fb->r100c08))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
 static void
 nv50_fb_init(struct nvkm_fb *base)
 {
@@ -245,6 +262,7 @@ nv50_fb_dtor(struct nvkm_fb *base)
 static const struct nvkm_fb_func
 nv50_fb_ = {
 	.dtor = nv50_fb_dtor,
+	.oneinit = nv50_fb_oneinit,
 	.init = nv50_fb_init,
 	.intr = nv50_fb_intr,
 	.ram_new = nv50_fb_ram_new,
@@ -263,16 +281,6 @@ nv50_fb_new_(const struct nv50_fb_func *func, struct nvkm_device *device,
 	fb->func = func;
 	*pfb = &fb->base;
 
-	fb->r100c08_page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-	if (fb->r100c08_page) {
-		fb->r100c08 = dma_map_page(device->dev, fb->r100c08_page, 0,
-					   PAGE_SIZE, DMA_BIDIRECTIONAL);
-		if (dma_mapping_error(device->dev, fb->r100c08))
-			return -EFAULT;
-	} else {
-		nvkm_warn(&fb->base.subdev, "failed 100c08 page alloc\n");
-	}
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
index b9ec0ae6723a..b60068b7d8f9 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ram.h
@@ -24,6 +24,7 @@ int  gf100_ram_ctor(const struct nvkm_ram_func *, struct nvkm_fb *,
 int  gf100_ram_get(struct nvkm_ram *, u64, u32, u32, u32, struct nvkm_mem **);
 void gf100_ram_put(struct nvkm_ram *, struct nvkm_mem **);
 
+int  gk104_ram_ctor(struct nvkm_fb *, struct nvkm_ram **, u32);
 int  gk104_ram_init(struct nvkm_ram *ram);
 
 /* RAM type-specific MR calculation routines */
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
index 772425ca5a9e..6758da93a3a1 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgf100.c
@@ -420,8 +420,6 @@ gf100_ram_tidy(struct nvkm_ram *base)
 	ram_exec(&ram->fuc, false);
 }
 
-extern const u8 gf100_pte_storage_type_map[256];
-
 void
 gf100_ram_put(struct nvkm_ram *ram, struct nvkm_mem **pmem)
 {
@@ -447,7 +445,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_ltc *ltc = ram->fb->subdev.device->ltc;
 	struct nvkm_mm *mm = &ram->vram;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int type = (memtype & 0x0ff);
 	int back = (memtype & 0x800);
@@ -464,7 +462,6 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	if (!mem)
 		return -ENOMEM;
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->size = size;
 
 	mutex_lock(&ram->fb->subdev.mutex);
@@ -480,6 +477,7 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 	}
 	mem->memtype = type;
 
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(mm, 0, 1, size, ncmin, align, &r);
@@ -491,13 +489,13 @@ gf100_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		size -= r->length;
 	} while (size);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
index 1fa3ade468ae..fb8a1239743d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgk104.c
@@ -259,7 +259,9 @@ gk104_ram_calc_gddr5(struct gk104_ram *ram, u32 freq)
 
 	ram_mask(fuc, 0x10f808, 0x40000000, 0x40000000);
 	ram_block(fuc);
-	ram_wr32(fuc, 0x62c000, 0x0f0f0000);
+
+	if (nvkm_device_engine(ram->base.fb->subdev.device, NVKM_ENGINE_DISP))
+		ram_wr32(fuc, 0x62c000, 0x0f0f0000);
 
 	/* MR1: turn termination on early, for some reason.. */
 	if ((ram->base.mr[1] & 0x03c) != 0x030) {
@@ -658,7 +660,9 @@ gk104_ram_calc_gddr5(struct gk104_ram *ram, u32 freq)
 		gk104_ram_train(fuc, 0x80020000, 0x01000000);
 
 	ram_unblock(fuc);
-	ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
+
+	if (nvkm_device_engine(ram->base.fb->subdev.device, NVKM_ENGINE_DISP))
+		ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
 
 	if (next->bios.rammap_11_08_01)
 		data = 0x00000800;
@@ -706,7 +710,9 @@ gk104_ram_calc_sddr3(struct gk104_ram *ram, u32 freq)
 
 	ram_mask(fuc, 0x10f808, 0x40000000, 0x40000000);
 	ram_block(fuc);
-	ram_wr32(fuc, 0x62c000, 0x0f0f0000);
+
+	if (nvkm_device_engine(ram->base.fb->subdev.device, NVKM_ENGINE_DISP))
+		ram_wr32(fuc, 0x62c000, 0x0f0f0000);
 
 	if (vc == 1 && ram_have(fuc, gpio2E)) {
 		u32 temp  = ram_mask(fuc, gpio2E, 0x3000, fuc->r_func2E[1]);
@@ -936,7 +942,9 @@ gk104_ram_calc_sddr3(struct gk104_ram *ram, u32 freq)
 	ram_nsec(fuc, 1000);
 
 	ram_unblock(fuc);
-	ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
+
+	if (nvkm_device_engine(ram->base.fb->subdev.device, NVKM_ENGINE_DISP))
+		ram_wr32(fuc, 0x62c000, 0x0f0f0f00);
 
 	if (next->bios.rammap_11_08_01)
 		data = 0x00000800;
@@ -981,7 +989,7 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 		int *N1, int *fN1, int *M1, int *P1,
 		int *N2, int *M2, int *P2)
 {
-	int best_clk = 0, best_err = target_khz, p_ref, n_ref;
+	int best_err = target_khz, p_ref, n_ref;
 	bool upper = false;
 
 	*M1 = 1;
@@ -1002,7 +1010,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 			/* we found a better combination */
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
@@ -1014,7 +1021,6 @@ gk104_pll_calc_hiclk(int target_khz, int crystal,
 				- target_khz;
 			if (cur_err < best_err) {
 				best_err = cur_err;
-				best_clk = cur_clk;
 				*N2 = cur_N;
 				*N1 = n_ref;
 				*P1 = p_ref;
@@ -1530,6 +1536,12 @@ gk104_ram_func = {
 int
 gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
+	return gk104_ram_ctor(fb, pram, 0x022554);
+}
+
+int
+gk104_ram_ctor(struct nvkm_fb *fb, struct nvkm_ram **pram, u32 maskaddr)
+{
 	struct nvkm_subdev *subdev = &fb->subdev;
 	struct nvkm_device *device = subdev->device;
 	struct nvkm_bios *bios = device->bios;
@@ -1544,7 +1556,7 @@ gk104_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 		return -ENOMEM;
 	*pram = &ram->base;
 
-	ret = gf100_ram_ctor(&gk104_ram_func, fb, 0x022554, &ram->base);
+	ret = gf100_ram_ctor(&gk104_ram_func, fb, maskaddr, &ram->base);
 	if (ret)
 		return ret;
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c
index 43d807f6ca71..ac862d1d77bd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgm107.c
@@ -23,18 +23,8 @@
  */
 #include "ram.h"
 
-static const struct nvkm_ram_func
-gm107_ram_func = {
-	.init = gk104_ram_init,
-	.get = gf100_ram_get,
-	.put = gf100_ram_put,
-};
-
 int
 gm107_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 {
-	if (!(*pram = kzalloc(sizeof(**pram), GFP_KERNEL)))
-		return -ENOMEM;
-
-	return gf100_ram_ctor(&gm107_ram_func, fb, 0x021c14, *pram);
+	return gk104_ram_ctor(fb, pram, 0x021c14);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
index f3be408b5e5e..405faabe8dcd 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgp100.c
@@ -92,13 +92,13 @@ gp100_ram_new(struct nvkm_fb *fb, struct nvkm_ram **pram)
 	enum nvkm_ram_type type = nvkm_fb_bios_memtype(device->bios);
 	const u32 rsvd_head = ( 256 * 1024); /* vga memory */
 	const u32 rsvd_tail = (1024 * 1024); /* vbios etc */
-	u32 fbpa_num = nvkm_rd32(device, 0x022438), fbpa;
+	u32 fbpa_num = nvkm_rd32(device, 0x02243c), fbpa;
 	u32 fbio_opt = nvkm_rd32(device, 0x021c14);
 	u64 part, size = 0, comm = ~0ULL;
 	bool mixed = false;
 	int ret;
 
-	nvkm_debug(subdev, "022438: %08x\n", fbpa_num);
+	nvkm_debug(subdev, "02243c: %08x\n", fbpa_num);
 	nvkm_debug(subdev, "021c14: %08x\n", fbio_opt);
 	for (fbpa = 0; fbpa < fbpa_num; fbpa++) {
 		if (!(fbio_opt & (1 << fbpa))) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
index d15ea886df27..f10664372161 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramgt215.c
@@ -95,7 +95,7 @@ struct gt215_ram {
 	struct gt215_ltrain ltrain;
 };
 
-void
+static void
 gt215_link_train_calc(u32 *vals, struct gt215_ltrain *train)
 {
 	int i, lo, hi;
@@ -149,7 +149,7 @@ gt215_link_train_calc(u32 *vals, struct gt215_ltrain *train)
 /*
  * Link training for (at least) DDR3
  */
-int
+static int
 gt215_link_train(struct gt215_ram *ram)
 {
 	struct gt215_ltrain *train = &ram->ltrain;
@@ -267,7 +267,7 @@ out:
 	return ret;
 }
 
-int
+static int
 gt215_link_train_init(struct gt215_ram *ram)
 {
 	static const u32 pattern[16] = {
@@ -333,7 +333,7 @@ gt215_link_train_init(struct gt215_ram *ram)
 	return 0;
 }
 
-void
+static void
 gt215_link_train_fini(struct gt215_ram *ram)
 {
 	if (ram->ltrain.mem)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
index 0a0e44b75577..017a91de74a0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/rammcp77.c
@@ -39,7 +39,7 @@ mcp77_ram_init(struct nvkm_ram *base)
 	u32 flush  = ((ram->base.size - (ram->poller_base + 0x40)) >> 5) - 1;
 
 	/* Enable NISO poller for various clients and set their associated
-	 * read address, only for MCP77/78 and MCP79/7A. (fd#25701)
+	 * read address, only for MCP77/78 and MCP79/7A. (fd#27501)
 	 */
 	nvkm_wr32(device, 0x100c18, dniso);
 	nvkm_mask(device, 0x100c14, 0x00000000, 0x00000001);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
index 87bde8ff2d6b..6549b0588309 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/ramnv50.c
@@ -496,15 +496,12 @@ nv50_ram_tidy(struct nvkm_ram *base)
 void
 __nv50_ram_put(struct nvkm_ram *ram, struct nvkm_mem *mem)
 {
-	struct nvkm_mm_node *this;
-
-	while (!list_empty(&mem->regions)) {
-		this = list_first_entry(&mem->regions, typeof(*this), rl_entry);
-
-		list_del(&this->rl_entry);
-		nvkm_mm_free(&ram->vram, &this);
+	struct nvkm_mm_node *next = mem->mem;
+	struct nvkm_mm_node *node;
+	while ((node = next)) {
+		next = node->next;
+		nvkm_mm_free(&ram->vram, &node);
 	}
-
 	nvkm_mm_free(&ram->tags, &mem->tag);
 }
 
@@ -530,7 +527,7 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 {
 	struct nvkm_mm *heap = &ram->vram;
 	struct nvkm_mm *tags = &ram->tags;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node **node, *r;
 	struct nvkm_mem *mem;
 	int comp = (memtype & 0x300) >> 8;
 	int type = (memtype & 0x07f);
@@ -559,11 +556,11 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			comp = 0;
 	}
 
-	INIT_LIST_HEAD(&mem->regions);
 	mem->memtype = (comp << 7) | type;
 	mem->size = max;
 
 	type = nv50_fb_memtype[type];
+	node = &mem->mem;
 	do {
 		if (back)
 			ret = nvkm_mm_tail(heap, 0, type, max, min, align, &r);
@@ -575,13 +572,13 @@ nv50_ram_get(struct nvkm_ram *ram, u64 size, u32 align, u32 ncmin,
 			return ret;
 		}
 
-		list_add_tail(&r->rl_entry, &mem->regions);
+		*node = r;
+		node = &r->next;
 		max -= r->length;
 	} while (max);
 	mutex_unlock(&ram->fb->subdev.mutex);
 
-	r = list_first_entry(&mem->regions, struct nvkm_mm_node, rl_entry);
-	mem->offset = (u64)r->offset << NVKM_RAM_MM_SHIFT;
+	mem->offset = (u64)mem->mem->offset << NVKM_RAM_MM_SHIFT;
 	*pmem = mem;
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
index b9f1ffdfc602..4dcd8742f2da 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr2.c
@@ -23,6 +23,7 @@
  *          Ben Skeggs
  */
 #include "priv.h"
+#include "ram.h"
 
 struct ramxlat {
 	int id;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
index 26900333b1d6..eca8a445eab3 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/fb/sddr3.c
@@ -23,6 +23,7 @@
  * 	    Roy Spliet <rspliet@eclipso.eu>
  */
 #include "priv.h"
+#include "ram.h"
 
 struct ramxlat {
 	int id;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c
index 3f45afd17d5a..2ead515b8530 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/gpio/gk104.c
@@ -37,7 +37,7 @@ gk104_gpio_intr_stat(struct nvkm_gpio *gpio, u32 *hi, u32 *lo)
 	nvkm_wr32(device, 0x00dc80, intr1);
 }
 
-void
+static void
 gk104_gpio_intr_mask(struct nvkm_gpio *gpio, u32 type, u32 mask, u32 data)
 {
 	struct nvkm_device *device = gpio->subdev.device;
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
index f0851d57df2f..01d5c5a56e2e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/aux.c
@@ -74,7 +74,7 @@ nvkm_i2c_aux_i2c_func(struct i2c_adapter *adap)
 	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL;
 }
 
-const struct i2c_algorithm
+static const struct i2c_algorithm
 nvkm_i2c_aux_i2c_algo = {
 	.master_xfer = nvkm_i2c_aux_i2c_xfer,
 	.functionality = nvkm_i2c_aux_i2c_func
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
index 954f5b76bfcf..b80236a4eeac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxg94.c
@@ -79,7 +79,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 	struct g94_i2c_aux *aux = g94_i2c_aux(obj);
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
 	const u32 base = aux->ch * 0x50;
-	u32 ctrl, stat, timeout, retries;
+	u32 ctrl, stat, timeout, retries = 0;
 	u32 xbuf[4] = {};
 	int ret, i;
 
@@ -111,7 +111,7 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 	nvkm_wr32(device, 0x00e4e0 + base, addr);
 
 	/* (maybe) retry transaction a number of times on failure... */
-	for (retries = 0; !ret && retries < 32; retries++) {
+	do {
 		/* reset, and delay a while if this is a retry */
 		nvkm_wr32(device, 0x00e4e4 + base, 0x80000000 | ctrl);
 		nvkm_wr32(device, 0x00e4e4 + base, 0x00000000 | ctrl);
@@ -131,20 +131,20 @@ g94_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 				goto out;
 			}
 		} while (ctrl & 0x00010000);
-		ret = 1;
+		ret = 0;
 
 		/* read status, and check if transaction completed ok */
 		stat = nvkm_mask(device, 0x00e4e8 + base, 0, 0);
 		if ((stat & 0x000f0000) == 0x00080000 ||
 		    (stat & 0x000f0000) == 0x00020000)
-			ret = retry ? 0 : 1;
+			ret = 1;
 		if ((stat & 0x00000100))
 			ret = -ETIMEDOUT;
 		if ((stat & 0x00000e00))
 			ret = -EIO;
 
 		AUX_TRACE(&aux->base, "%02d %08x %08x", retries, ctrl, stat);
-	}
+	} while (ret && retry && retries++ < 32);
 
 	if (type & 1) {
 		for (i = 0; i < 16; i += 4) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
index 61d729b82c69..ed458c7f056b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/i2c/auxgm200.c
@@ -79,7 +79,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 	struct gm200_i2c_aux *aux = gm200_i2c_aux(obj);
 	struct nvkm_device *device = aux->base.pad->i2c->subdev.device;
 	const u32 base = aux->ch * 0x50;
-	u32 ctrl, stat, timeout, retries;
+	u32 ctrl, stat, timeout, retries = 0;
 	u32 xbuf[4] = {};
 	int ret, i;
 
@@ -111,7 +111,7 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 	nvkm_wr32(device, 0x00d950 + base, addr);
 
 	/* (maybe) retry transaction a number of times on failure... */
-	for (retries = 0; !ret && retries < 32; retries++) {
+	do {
 		/* reset, and delay a while if this is a retry */
 		nvkm_wr32(device, 0x00d954 + base, 0x80000000 | ctrl);
 		nvkm_wr32(device, 0x00d954 + base, 0x00000000 | ctrl);
@@ -131,20 +131,20 @@ gm200_i2c_aux_xfer(struct nvkm_i2c_aux *obj, bool retry,
 				goto out;
 			}
 		} while (ctrl & 0x00010000);
-		ret = 1;
+		ret = 0;
 
 		/* read status, and check if transaction completed ok */
 		stat = nvkm_mask(device, 0x00d958 + base, 0, 0);
 		if ((stat & 0x000f0000) == 0x00080000 ||
 		    (stat & 0x000f0000) == 0x00020000)
-			ret = retry ? 0 : 1;
+			ret = 1;
 		if ((stat & 0x00000100))
 			ret = -ETIMEDOUT;
 		if ((stat & 0x00000e00))
 			ret = -EIO;
 
 		AUX_TRACE(&aux->base, "%02d %08x %08x", retries, ctrl, stat);
-	}
+	} while (ret && retry && retries++ < 32);
 
 	if (type & 1) {
 		for (i = 0; i < 16; i += 4) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
index b7159b338fac..1a4ab825852c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ibus/gk20a.c
@@ -29,7 +29,7 @@ gk20a_ibus_init_ibus_ring(struct nvkm_subdev *ibus)
 	nvkm_mask(device, 0x137250, 0x3f, 0);
 
 	nvkm_mask(device, 0x000200, 0x20, 0);
-	usleep_range(20, 30);
+	udelay(20);
 	nvkm_mask(device, 0x000200, 0x20, 0x20);
 
 	nvkm_wr32(device, 0x12004c, 0x4);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
index 41bd5d0f7692..fecfa6afcf54 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/base.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/extdev.h>
 #include <subdev/bios/iccsense.h>
+#include <subdev/bios/power_budget.h>
 #include <subdev/i2c.h>
 
 static bool
@@ -96,60 +97,12 @@ nvkm_iccsense_ina3221_read(struct nvkm_iccsense *iccsense,
 }
 
 static void
-nvkm_iccsense_ina209_config(struct nvkm_iccsense *iccsense,
-			    struct nvkm_iccsense_sensor *sensor)
-{
-	struct nvkm_subdev *subdev = &iccsense->subdev;
-	/* configuration:
-	 * 0x0007: 0x0007 shunt and bus continous
-	 * 0x0078: 0x0078 128 samples shunt
-	 * 0x0780: 0x0780 128 samples bus
-	 * 0x1800: 0x0000 +-40 mV shunt range
-	 * 0x2000: 0x0000 16V FSR
-         */
-	u16 value = 0x07ff;
-	nvkm_debug(subdev, "config for sensor id %i: 0x%x\n", sensor->id, value);
-	nv_wr16i2cr(sensor->i2c, sensor->addr, 0x00, value);
-}
-
-static void
-nvkm_iccsense_ina3221_config(struct nvkm_iccsense *iccsense,
-			     struct nvkm_iccsense_sensor *sensor)
-{
-	struct nvkm_subdev *subdev = &iccsense->subdev;
-	/* configuration:
-	 * 0x0007: 0x0007 shunt and bus continous
-	 * 0x0031: 0x0000 140 us conversion time shunt
-	 * 0x01c0: 0x0000 140 us conversion time bus
-	 * 0x0f00: 0x0f00 1024 samples
-	 * 0x7000: 0x?000 channels
-         */
-	u16 value = 0x0e07;
-	if (sensor->rail_mask & 0x1)
-		value |= 0x1 << 14;
-	if (sensor->rail_mask & 0x2)
-		value |= 0x1 << 13;
-	if (sensor->rail_mask & 0x4)
-		value |= 0x1 << 12;
-	nvkm_debug(subdev, "config for sensor id %i: 0x%x\n", sensor->id, value);
-	nv_wr16i2cr(sensor->i2c, sensor->addr, 0x00, value);
-}
-
-static void
 nvkm_iccsense_sensor_config(struct nvkm_iccsense *iccsense,
 		            struct nvkm_iccsense_sensor *sensor)
 {
-	switch (sensor->type) {
-	case NVBIOS_EXTDEV_INA209:
-	case NVBIOS_EXTDEV_INA219:
-		nvkm_iccsense_ina209_config(iccsense, sensor);
-		break;
-	case NVBIOS_EXTDEV_INA3221:
-		nvkm_iccsense_ina3221_config(iccsense, sensor);
-		break;
-	default:
-		break;
-	}
+	struct nvkm_subdev *subdev = &iccsense->subdev;
+	nvkm_trace(subdev, "write config of extdev %i: 0x%04x\n", sensor->id, sensor->config);
+	nv_wr16i2cr(sensor->i2c, sensor->addr, 0x00, sensor->config);
 }
 
 int
@@ -196,7 +149,6 @@ nvkm_iccsense_dtor(struct nvkm_subdev *subdev)
 static struct nvkm_iccsense_sensor*
 nvkm_iccsense_create_sensor(struct nvkm_iccsense *iccsense, u8 id)
 {
-
 	struct nvkm_subdev *subdev = &iccsense->subdev;
 	struct nvkm_bios *bios = subdev->device->bios;
 	struct nvkm_i2c *i2c = subdev->device->i2c;
@@ -245,7 +197,7 @@ nvkm_iccsense_create_sensor(struct nvkm_iccsense *iccsense, u8 id)
 	sensor->type = extdev.type;
 	sensor->i2c = &i2c_bus->i2c;
 	sensor->addr = addr;
-	sensor->rail_mask = 0x0;
+	sensor->config = 0x0;
 	return sensor;
 }
 
@@ -265,56 +217,79 @@ nvkm_iccsense_oneinit(struct nvkm_subdev *subdev)
 {
 	struct nvkm_iccsense *iccsense = nvkm_iccsense(subdev);
 	struct nvkm_bios *bios = subdev->device->bios;
+	struct nvbios_power_budget budget;
 	struct nvbios_iccsense stbl;
-	int i;
+	int i, ret;
+
+	if (!bios)
+		return 0;
+
+	ret = nvbios_power_budget_header(bios, &budget);
+	if (!ret && budget.cap_entry != 0xff) {
+		struct nvbios_power_budget_entry entry;
+		ret = nvbios_power_budget_entry(bios, &budget,
+		                                budget.cap_entry, &entry);
+		if (!ret) {
+			iccsense->power_w_max  = entry.avg_w;
+			iccsense->power_w_crit = entry.max_w;
+		}
+	}
 
-	if (!bios || nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
+	if (nvbios_iccsense_parse(bios, &stbl) || !stbl.nr_entry)
 		return 0;
 
 	iccsense->data_valid = true;
 	for (i = 0; i < stbl.nr_entry; ++i) {
-		struct pwr_rail_t *r = &stbl.rail[i];
-		struct nvkm_iccsense_rail *rail;
+		struct pwr_rail_t *pwr_rail = &stbl.rail[i];
 		struct nvkm_iccsense_sensor *sensor;
-		int (*read)(struct nvkm_iccsense *,
-			    struct nvkm_iccsense_rail *);
+		int r;
 
-		if (!r->mode || r->resistor_mohm == 0)
+		if (pwr_rail->mode != 1 || !pwr_rail->resistor_count)
 			continue;
 
-		sensor = nvkm_iccsense_get_sensor(iccsense, r->extdev_id);
+		sensor = nvkm_iccsense_get_sensor(iccsense, pwr_rail->extdev_id);
 		if (!sensor)
 			continue;
 
-		switch (sensor->type) {
-		case NVBIOS_EXTDEV_INA209:
-			if (r->rail != 0)
-				continue;
-			read = nvkm_iccsense_ina209_read;
-			break;
-		case NVBIOS_EXTDEV_INA219:
-			if (r->rail != 0)
+		if (!sensor->config)
+			sensor->config = pwr_rail->config;
+		else if (sensor->config != pwr_rail->config)
+			nvkm_error(subdev, "config mismatch found for extdev %i\n", pwr_rail->extdev_id);
+
+		for (r = 0; r < pwr_rail->resistor_count; ++r) {
+			struct nvkm_iccsense_rail *rail;
+			struct pwr_rail_resistor_t *res = &pwr_rail->resistors[r];
+			int (*read)(struct nvkm_iccsense *,
+				    struct nvkm_iccsense_rail *);
+
+			if (!res->mohm || !res->enabled)
 				continue;
-			read = nvkm_iccsense_ina219_read;
-			break;
-		case NVBIOS_EXTDEV_INA3221:
-			if (r->rail >= 3)
+
+			switch (sensor->type) {
+			case NVBIOS_EXTDEV_INA209:
+				read = nvkm_iccsense_ina209_read;
+				break;
+			case NVBIOS_EXTDEV_INA219:
+				read = nvkm_iccsense_ina219_read;
+				break;
+			case NVBIOS_EXTDEV_INA3221:
+				read = nvkm_iccsense_ina3221_read;
+				break;
+			default:
 				continue;
-			read = nvkm_iccsense_ina3221_read;
-			break;
-		default:
-			continue;
+			}
+
+			rail = kmalloc(sizeof(*rail), GFP_KERNEL);
+			if (!rail)
+				return -ENOMEM;
+
+			rail->read = read;
+			rail->sensor = sensor;
+			rail->idx = r;
+			rail->mohm = res->mohm;
+			nvkm_debug(subdev, "create rail for extdev %i: { idx: %i, mohm: %i }\n", pwr_rail->extdev_id, r, rail->mohm);
+			list_add_tail(&rail->head, &iccsense->rails);
 		}
-
-		rail = kmalloc(sizeof(*rail), GFP_KERNEL);
-		if (!rail)
-			return -ENOMEM;
-		sensor->rail_mask |= 1 << r->rail;
-		rail->read = read;
-		rail->sensor = sensor;
-		rail->idx = r->rail;
-		rail->mohm = r->resistor_mohm;
-		list_add_tail(&rail->head, &iccsense->rails);
 	}
 	return 0;
 }
@@ -329,7 +304,8 @@ nvkm_iccsense_init(struct nvkm_subdev *subdev)
 	return 0;
 }
 
-struct nvkm_subdev_func iccsense_func = {
+static const struct nvkm_subdev_func
+iccsense_func = {
 	.oneinit = nvkm_iccsense_oneinit,
 	.init = nvkm_iccsense_init,
 	.dtor = nvkm_iccsense_dtor,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h
index b72c31d2f908..e90e0f6ed008 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/iccsense/priv.h
@@ -10,7 +10,7 @@ struct nvkm_iccsense_sensor {
 	enum nvbios_extdev_type type;
 	struct i2c_adapter *i2c;
 	u8 addr;
-	u8 rail_mask;
+	u16 config;
 };
 
 struct nvkm_iccsense_rail {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
index 8ed8f65ff664..10c987a654ec 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/base.c
@@ -104,7 +104,7 @@ nvkm_instobj_dtor(struct nvkm_memory *memory)
 	return iobj;
 }
 
-const struct nvkm_memory_func
+static const struct nvkm_memory_func
 nvkm_instobj_func = {
 	.dtor = nvkm_instobj_dtor,
 	.target = nvkm_instobj_target,
@@ -156,7 +156,7 @@ nvkm_instobj_wr32_slow(struct nvkm_memory *memory, u64 offset, u32 data)
 	return nvkm_wo32(iobj->parent, offset, data);
 }
 
-const struct nvkm_memory_func
+static const struct nvkm_memory_func
 nvkm_instobj_func_slow = {
 	.dtor = nvkm_instobj_dtor,
 	.target = nvkm_instobj_target,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
index a6a7fa0d7679..9dec58ec3d9f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/instmem/gk20a.c
@@ -116,7 +116,7 @@ struct gk20a_instmem {
 static enum nvkm_memory_target
 gk20a_instobj_target(struct nvkm_memory *memory)
 {
-	return NVKM_MEM_TARGET_HOST;
+	return NVKM_MEM_TARGET_NCOH;
 }
 
 static u64
@@ -305,11 +305,11 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 	struct gk20a_instobj_iommu *node = gk20a_instobj_iommu(memory);
 	struct gk20a_instmem *imem = node->base.imem;
 	struct device *dev = imem->base.subdev.device->dev;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->base.mem.mem;
 	unsigned long flags;
 	int i;
 
-	if (unlikely(list_empty(&node->base.mem.regions)))
+	if (unlikely(!r))
 		goto out;
 
 	spin_lock_irqsave(&imem->lock, flags);
@@ -320,9 +320,6 @@ gk20a_instobj_dtor_iommu(struct nvkm_memory *memory)
 
 	spin_unlock_irqrestore(&imem->lock, flags);
 
-	r = list_first_entry(&node->base.mem.regions, struct nvkm_mm_node,
-			     rl_entry);
-
 	/* clear IOMMU bit to unmap pages */
 	r->offset &= ~BIT(imem->iommu_bit - imem->iommu_pgshift);
 
@@ -404,10 +401,7 @@ gk20a_instobj_ctor_dma(struct gk20a_instmem *imem, u32 npages, u32 align,
 	node->r.length = (npages << PAGE_SHIFT) >> 12;
 
 	node->base.mem.offset = node->handle;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&node->r.rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = &node->r;
 	return 0;
 }
 
@@ -484,10 +478,7 @@ gk20a_instobj_ctor_iommu(struct gk20a_instmem *imem, u32 npages, u32 align,
 	r->offset |= BIT(imem->iommu_bit - imem->iommu_pgshift);
 
 	node->base.mem.offset = ((u64)r->offset) << imem->iommu_pgshift;
-
-	INIT_LIST_HEAD(&node->base.mem.regions);
-	list_add_tail(&r->rl_entry, &node->base.mem.regions);
-
+	node->base.mem.mem = r;
 	return 0;
 
 release_area:
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
index 39c2a38e54f7..0c7ef250dcaf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/ltc/base.c
@@ -47,8 +47,10 @@ nvkm_ltc_tags_clear(struct nvkm_ltc *ltc, u32 first, u32 count)
 
 	BUG_ON((first > limit) || (limit >= ltc->num_tags));
 
+	mutex_lock(&ltc->subdev.mutex);
 	ltc->func->cbc_clear(ltc, first, limit);
 	ltc->func->cbc_wait(ltc);
+	mutex_unlock(&ltc->subdev.mutex);
 }
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
index 6b25e25f9eba..09f669ac6630 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/base.c
@@ -161,6 +161,16 @@ nvkm_mc_enable(struct nvkm_device *device, enum nvkm_devidx devidx)
 	}
 }
 
+bool
+nvkm_mc_enabled(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	u64 pmc_enable = nvkm_mc_reset_mask(device, false, devidx);
+
+	return (pmc_enable != 0) &&
+	       ((nvkm_rd32(device, 0x000200) & pmc_enable) == pmc_enable);
+}
+
+
 static int
 nvkm_mc_fini(struct nvkm_subdev *subdev, bool suspend)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
index c3d66ef5dc12..430a61c3df44 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mc/g84.c
@@ -34,7 +34,7 @@ g84_mc_reset[] = {
 	{}
 };
 
-const struct nvkm_mc_map
+static const struct nvkm_mc_map
 g84_mc_intr[] = {
 	{ 0x04000000, NVKM_ENGINE_DISP },
 	{ 0x00020000, NVKM_ENGINE_VP },
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
index 5df9669ea39c..d06ad2c372bf 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mmu/base.c
@@ -31,7 +31,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 {
 	struct nvkm_vm *vm = vma->vm;
 	struct nvkm_mmu *mmu = vm->mmu;
-	struct nvkm_mm_node *r;
+	struct nvkm_mm_node *r = node->mem;
 	int big = vma->node->type != mmu->func->spg_shift;
 	u32 offset = vma->node->offset + (delta >> 12);
 	u32 bits = vma->node->type - 12;
@@ -41,7 +41,7 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 	u32 end, len;
 
 	delta = 0;
-	list_for_each_entry(r, &node->regions, rl_entry) {
+	while (r) {
 		u64 phys = (u64)r->offset << 12;
 		u32 num  = r->length >> bits;
 
@@ -65,7 +65,8 @@ nvkm_vm_map_at(struct nvkm_vma *vma, u64 delta, struct nvkm_mem *node)
 
 			delta += (u64)len << vma->node->type;
 		}
-	}
+		r = r->next;
+	};
 
 	mmu->func->flush(vm);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
index 21b65ee254e4..e3e2f5e83815 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/base.c
@@ -250,6 +250,10 @@ nvkm_mxm_new_(struct nvkm_device *device, int index, struct nvkm_mxm **pmxm)
 	}
 
 	nvkm_info(&mxm->subdev, "BIOS version %d.%d\n", ver >> 4, ver & 0x0f);
+	nvkm_debug(&mxm->subdev, "module flags: %02x\n",
+		   nvbios_rd08(bios, data + 0x01));
+	nvkm_debug(&mxm->subdev, "config flags: %02x\n",
+		   nvbios_rd08(bios, data + 0x02));
 
 	if (mxm_shadow(mxm, ver)) {
 		nvkm_warn(&mxm->subdev, "failed to locate valid SIS\n");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.c
index 45a2f8e784f9..9abfa5e2fe9f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/mxms.c
@@ -23,8 +23,8 @@
  */
 #include "mxms.h"
 
-#define ROM16(x) le16_to_cpu(*(u16 *)&(x))
-#define ROM32(x) le32_to_cpu(*(u32 *)&(x))
+#define ROM16(x) get_unaligned_le16(&(x))
+#define ROM32(x) get_unaligned_le32(&(x))
 
 static u8 *
 mxms_data(struct nvkm_mxm *mxm)
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c
index db14fad2ddfc..844971e5e874 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/mxm/nv50.c
@@ -190,8 +190,8 @@ mxm_dcb_sanitise(struct nvkm_mxm *mxm)
 	struct nvkm_bios *bios = subdev->device->bios;
 	u8  ver, hdr, cnt, len;
 	u16 dcb = dcb_table(bios, &ver, &hdr, &cnt, &len);
-	if (dcb == 0x0000 || ver != 0x40) {
-		nvkm_debug(subdev, "unsupported DCB version\n");
+	if (dcb == 0x0000 || (ver != 0x40 && ver != 0x41)) {
+		nvkm_warn(subdev, "unsupported DCB version\n");
 		return;
 	}
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
index 2a31b7d66a6d..87bf41cef0c6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/Kbuild
@@ -6,6 +6,7 @@ nvkm-y += nvkm/subdev/pci/nv40.o
 nvkm-y += nvkm/subdev/pci/nv46.o
 nvkm-y += nvkm/subdev/pci/nv4c.o
 nvkm-y += nvkm/subdev/pci/g84.o
+nvkm-y += nvkm/subdev/pci/g92.o
 nvkm-y += nvkm/subdev/pci/g94.o
 nvkm-y += nvkm/subdev/pci/gf100.o
 nvkm-y += nvkm/subdev/pci/gf106.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
new file mode 100644
index 000000000000..48874359d5f6
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g92.c
@@ -0,0 +1,57 @@
+/*
+ * Copyright 2015 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+int
+g92_pcie_version_supported(struct nvkm_pci *pci)
+{
+	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
+		return 2;
+	return 1;
+}
+
+static const struct nvkm_pci_func
+g92_pci_func = {
+	.init = g84_pci_init,
+	.rd32 = nv40_pci_rd32,
+	.wr08 = nv40_pci_wr08,
+	.wr32 = nv40_pci_wr32,
+	.msi_rearm = nv46_pci_msi_rearm,
+
+	.pcie.init = g84_pcie_init,
+	.pcie.set_link = g84_pcie_set_link,
+
+	.pcie.max_speed = g84_pcie_max_speed,
+	.pcie.cur_speed = g84_pcie_cur_speed,
+
+	.pcie.set_version = g84_pcie_set_version,
+	.pcie.version = g84_pcie_version,
+	.pcie.version_supported = g92_pcie_version_supported,
+};
+
+int
+g92_pci_new(struct nvkm_device *device, int index, struct nvkm_pci **ppci)
+{
+	return nvkm_pci_new_(&g92_pci_func, device, index, ppci);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
index 43444123bc04..09adb37a5664 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/g94.c
@@ -23,14 +23,6 @@
  */
 #include "priv.h"
 
-int
-g94_pcie_version_supported(struct nvkm_pci *pci)
-{
-	if ((nvkm_pci_rd32(pci, 0x460) & 0x200) == 0x200)
-		return 2;
-	return 1;
-}
-
 static const struct nvkm_pci_func
 g94_pci_func = {
 	.init = g84_pci_init,
@@ -47,7 +39,7 @@ g94_pci_func = {
 
 	.pcie.set_version = g84_pcie_set_version,
 	.pcie.version = g84_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
index e30ea676baf6..00a5e7d3ee9d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf100.c
@@ -92,7 +92,7 @@ gf100_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
index c3b798c5c6dd..11bf419afe3f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/gf106.c
@@ -39,7 +39,7 @@ gf106_pci_func = {
 
 	.pcie.set_version = gf100_pcie_set_version,
 	.pcie.version = gf100_pcie_version,
-	.pcie.version_supported = g94_pcie_version_supported,
+	.pcie.version_supported = g92_pcie_version_supported,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
index 23de3180aae5..86921ec962d6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pci/priv.h
@@ -44,7 +44,7 @@ enum nvkm_pcie_speed g84_pcie_max_speed(struct nvkm_pci *);
 int g84_pcie_init(struct nvkm_pci *);
 int g84_pcie_set_link(struct nvkm_pci *, enum nvkm_pcie_speed, u8);
 
-int g94_pcie_version_supported(struct nvkm_pci *);
+int g92_pcie_version_supported(struct nvkm_pci *);
 
 void gf100_pcie_set_version(struct nvkm_pci *, u8);
 int gf100_pcie_version(struct nvkm_pci *);
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
index 88b643b8664e..ca57c1e491b0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/Kbuild
@@ -8,3 +8,6 @@ nvkm-y += nvkm/subdev/pmu/gk110.o
 nvkm-y += nvkm/subdev/pmu/gk208.o
 nvkm-y += nvkm/subdev/pmu/gk20a.o
 nvkm-y += nvkm/subdev/pmu/gm107.o
+nvkm-y += nvkm/subdev/pmu/gm20b.o
+nvkm-y += nvkm/subdev/pmu/gp100.o
+nvkm-y += nvkm/subdev/pmu/gp102.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
index 8dd164d13043..a73f690eb4b5 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/base.c
@@ -32,251 +32,121 @@ nvkm_pmu_pgob(struct nvkm_pmu *pmu, bool enable)
 		pmu->func->pgob(pmu, enable);
 }
 
-int
-nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2],
-	      u32 process, u32 message, u32 data0, u32 data1)
-{
-	struct nvkm_subdev *subdev = &pmu->subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 addr;
-
-	mutex_lock(&subdev->mutex);
-	/* wait for a free slot in the fifo */
-	addr  = nvkm_rd32(device, 0x10a4a0);
-	if (nvkm_msec(device, 2000,
-		u32 tmp = nvkm_rd32(device, 0x10a4b0);
-		if (tmp != (addr ^ 8))
-			break;
-	) < 0) {
-		mutex_unlock(&subdev->mutex);
-		return -EBUSY;
-	}
-
-	/* we currently only support a single process at a time waiting
-	 * on a synchronous reply, take the PMU mutex and tell the
-	 * receive handler what we're waiting for
-	 */
-	if (reply) {
-		pmu->recv.message = message;
-		pmu->recv.process = process;
-	}
-
-	/* acquire data segment access */
-	do {
-		nvkm_wr32(device, 0x10a580, 0x00000001);
-	} while (nvkm_rd32(device, 0x10a580) != 0x00000001);
-
-	/* write the packet */
-	nvkm_wr32(device, 0x10a1c0, 0x01000000 | (((addr & 0x07) << 4) +
-				pmu->send.base));
-	nvkm_wr32(device, 0x10a1c4, process);
-	nvkm_wr32(device, 0x10a1c4, message);
-	nvkm_wr32(device, 0x10a1c4, data0);
-	nvkm_wr32(device, 0x10a1c4, data1);
-	nvkm_wr32(device, 0x10a4a0, (addr + 1) & 0x0f);
-
-	/* release data segment access */
-	nvkm_wr32(device, 0x10a580, 0x00000000);
-
-	/* wait for reply, if requested */
-	if (reply) {
-		wait_event(pmu->recv.wait, (pmu->recv.process == 0));
-		reply[0] = pmu->recv.data[0];
-		reply[1] = pmu->recv.data[1];
-	}
-
-	mutex_unlock(&subdev->mutex);
-	return 0;
-}
-
 static void
 nvkm_pmu_recv(struct work_struct *work)
 {
-	struct nvkm_pmu *pmu = container_of(work, struct nvkm_pmu, recv.work);
-	struct nvkm_subdev *subdev = &pmu->subdev;
-	struct nvkm_device *device = subdev->device;
-	u32 process, message, data0, data1;
-
-	/* nothing to do if GET == PUT */
-	u32 addr =  nvkm_rd32(device, 0x10a4cc);
-	if (addr == nvkm_rd32(device, 0x10a4c8))
-		return;
-
-	/* acquire data segment access */
-	do {
-		nvkm_wr32(device, 0x10a580, 0x00000002);
-	} while (nvkm_rd32(device, 0x10a580) != 0x00000002);
-
-	/* read the packet */
-	nvkm_wr32(device, 0x10a1c0, 0x02000000 | (((addr & 0x07) << 4) +
-				pmu->recv.base));
-	process = nvkm_rd32(device, 0x10a1c4);
-	message = nvkm_rd32(device, 0x10a1c4);
-	data0   = nvkm_rd32(device, 0x10a1c4);
-	data1   = nvkm_rd32(device, 0x10a1c4);
-	nvkm_wr32(device, 0x10a4cc, (addr + 1) & 0x0f);
-
-	/* release data segment access */
-	nvkm_wr32(device, 0x10a580, 0x00000000);
-
-	/* wake process if it's waiting on a synchronous reply */
-	if (pmu->recv.process) {
-		if (process == pmu->recv.process &&
-		    message == pmu->recv.message) {
-			pmu->recv.data[0] = data0;
-			pmu->recv.data[1] = data1;
-			pmu->recv.process = 0;
-			wake_up(&pmu->recv.wait);
-			return;
-		}
-	}
+	struct nvkm_pmu *pmu = container_of(work, typeof(*pmu), recv.work);
+	return pmu->func->recv(pmu);
+}
 
-	/* right now there's no other expected responses from the engine,
-	 * so assume that any unexpected message is an error.
-	 */
-	nvkm_warn(subdev, "%c%c%c%c %08x %08x %08x %08x\n",
-		  (char)((process & 0x000000ff) >>  0),
-		  (char)((process & 0x0000ff00) >>  8),
-		  (char)((process & 0x00ff0000) >> 16),
-		  (char)((process & 0xff000000) >> 24),
-		  process, message, data0, data1);
+int
+nvkm_pmu_send(struct nvkm_pmu *pmu, u32 reply[2],
+	      u32 process, u32 message, u32 data0, u32 data1)
+{
+	if (!pmu || !pmu->func->send)
+		return -ENODEV;
+	return pmu->func->send(pmu, reply, process, message, data0, data1);
 }
 
 static void
 nvkm_pmu_intr(struct nvkm_subdev *subdev)
 {
 	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
-	struct nvkm_device *device = pmu->subdev.device;
-	u32 disp = nvkm_rd32(device, 0x10a01c);
-	u32 intr = nvkm_rd32(device, 0x10a008) & disp & ~(disp >> 16);
-
-	if (intr & 0x00000020) {
-		u32 stat = nvkm_rd32(device, 0x10a16c);
-		if (stat & 0x80000000) {
-			nvkm_error(subdev, "UAS fault at %06x addr %08x\n",
-				   stat & 0x00ffffff,
-				   nvkm_rd32(device, 0x10a168));
-			nvkm_wr32(device, 0x10a16c, 0x00000000);
-			intr &= ~0x00000020;
-		}
-	}
-
-	if (intr & 0x00000040) {
-		schedule_work(&pmu->recv.work);
-		nvkm_wr32(device, 0x10a004, 0x00000040);
-		intr &= ~0x00000040;
-	}
-
-	if (intr & 0x00000080) {
-		nvkm_info(subdev, "wr32 %06x %08x\n",
-			  nvkm_rd32(device, 0x10a7a0),
-			  nvkm_rd32(device, 0x10a7a4));
-		nvkm_wr32(device, 0x10a004, 0x00000080);
-		intr &= ~0x00000080;
-	}
-
-	if (intr) {
-		nvkm_error(subdev, "intr %08x\n", intr);
-		nvkm_wr32(device, 0x10a004, intr);
-	}
+	if (!pmu->func->intr)
+		return;
+	pmu->func->intr(pmu);
 }
 
 static int
 nvkm_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
 {
 	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
-	struct nvkm_device *device = pmu->subdev.device;
 
-	nvkm_wr32(device, 0x10a014, 0x00000060);
+	if (pmu->func->fini)
+		pmu->func->fini(pmu);
+
 	flush_work(&pmu->recv.work);
 	return 0;
 }
 
 static int
-nvkm_pmu_init(struct nvkm_subdev *subdev)
+nvkm_pmu_reset(struct nvkm_pmu *pmu)
 {
-	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
 	struct nvkm_device *device = pmu->subdev.device;
-	int i;
 
-	/* prevent previous ucode from running, wait for idle, reset */
-	nvkm_wr32(device, 0x10a014, 0x0000ffff); /* INTR_EN_CLR = ALL */
+	if (!(nvkm_rd32(device, 0x000200) & 0x00002000))
+		return 0;
+
+	/* Inhibit interrupts, and wait for idle. */
+	nvkm_wr32(device, 0x10a014, 0x0000ffff);
 	nvkm_msec(device, 2000,
 		if (!nvkm_rd32(device, 0x10a04c))
 			break;
 	);
-	nvkm_mask(device, 0x000200, 0x00002000, 0x00000000);
-	nvkm_mask(device, 0x000200, 0x00002000, 0x00002000);
-	nvkm_rd32(device, 0x000200);
+
+	/* Reset. */
+	pmu->func->reset(pmu);
+
+	/* Wait for IMEM/DMEM scrubbing to be complete. */
 	nvkm_msec(device, 2000,
 		if (!(nvkm_rd32(device, 0x10a10c) & 0x00000006))
 			break;
 	);
 
-	/* upload data segment */
-	nvkm_wr32(device, 0x10a1c0, 0x01000000);
-	for (i = 0; i < pmu->func->data.size / 4; i++)
-		nvkm_wr32(device, 0x10a1c4, pmu->func->data.data[i]);
-
-	/* upload code segment */
-	nvkm_wr32(device, 0x10a180, 0x01000000);
-	for (i = 0; i < pmu->func->code.size / 4; i++) {
-		if ((i & 0x3f) == 0)
-			nvkm_wr32(device, 0x10a188, i >> 6);
-		nvkm_wr32(device, 0x10a184, pmu->func->code.data[i]);
-	}
-
-	/* start it running */
-	nvkm_wr32(device, 0x10a10c, 0x00000000);
-	nvkm_wr32(device, 0x10a104, 0x00000000);
-	nvkm_wr32(device, 0x10a100, 0x00000002);
-
-	/* wait for valid host->pmu ring configuration */
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x10a4d0))
-			break;
-	) < 0)
-		return -EBUSY;
-	pmu->send.base = nvkm_rd32(device, 0x10a4d0) & 0x0000ffff;
-	pmu->send.size = nvkm_rd32(device, 0x10a4d0) >> 16;
+	return 0;
+}
 
-	/* wait for valid pmu->host ring configuration */
-	if (nvkm_msec(device, 2000,
-		if (nvkm_rd32(device, 0x10a4dc))
-			break;
-	) < 0)
-		return -EBUSY;
-	pmu->recv.base = nvkm_rd32(device, 0x10a4dc) & 0x0000ffff;
-	pmu->recv.size = nvkm_rd32(device, 0x10a4dc) >> 16;
+static int
+nvkm_pmu_preinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	return nvkm_pmu_reset(pmu);
+}
 
-	nvkm_wr32(device, 0x10a010, 0x000000e0);
-	return 0;
+static int
+nvkm_pmu_init(struct nvkm_subdev *subdev)
+{
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	int ret = nvkm_pmu_reset(pmu);
+	if (ret == 0 && pmu->func->init)
+		ret = pmu->func->init(pmu);
+	return ret;
 }
 
 static void *
 nvkm_pmu_dtor(struct nvkm_subdev *subdev)
 {
+	struct nvkm_pmu *pmu = nvkm_pmu(subdev);
+	nvkm_falcon_del(&pmu->falcon);
 	return nvkm_pmu(subdev);
 }
 
 static const struct nvkm_subdev_func
 nvkm_pmu = {
 	.dtor = nvkm_pmu_dtor,
+	.preinit = nvkm_pmu_preinit,
 	.init = nvkm_pmu_init,
 	.fini = nvkm_pmu_fini,
 	.intr = nvkm_pmu_intr,
 };
 
 int
+nvkm_pmu_ctor(const struct nvkm_pmu_func *func, struct nvkm_device *device,
+	      int index, struct nvkm_pmu *pmu)
+{
+	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
+	pmu->func = func;
+	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
+	init_waitqueue_head(&pmu->recv.wait);
+	return nvkm_falcon_v1_new(&pmu->subdev, "PMU", 0x10a000, &pmu->falcon);
+}
+
+int
 nvkm_pmu_new_(const struct nvkm_pmu_func *func, struct nvkm_device *device,
 	      int index, struct nvkm_pmu **ppmu)
 {
 	struct nvkm_pmu *pmu;
 	if (!(pmu = *ppmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	nvkm_subdev_ctor(&nvkm_pmu, device, index, &pmu->subdev);
-	pmu->func = func;
-	INIT_WORK(&pmu->recv.work, nvkm_pmu_recv);
-	init_waitqueue_head(&pmu->recv.wait);
-	return 0;
+	return nvkm_pmu_ctor(func, device, index, *ppmu);
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
index e2faccffee6f..0bcf0b307a61 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf100.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gf100_pmu_data[] = {
+static uint32_t gf100_pmu_data[] = {
 /* 0x0000: proc_kern */
 	0x52544e49,
 	0x00000000,
@@ -916,7 +916,7 @@ uint32_t gf100_pmu_data[] = {
 	0x00000000,
 };
 
-uint32_t gf100_pmu_code[] = {
+static uint32_t gf100_pmu_code[] = {
 	0x03920ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
index 2d5bdc539697..fe8905666c67 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gf119.fuc4.h
@@ -1,4 +1,4 @@
-uint32_t gf119_pmu_data[] = {
+static uint32_t gf119_pmu_data[] = {
 /* 0x0000: proc_kern */
 	0x52544e49,
 	0x00000000,
@@ -915,7 +915,7 @@ uint32_t gf119_pmu_data[] = {
 	0x00000000,
 };
 
-uint32_t gf119_pmu_code[] = {
+static uint32_t gf119_pmu_code[] = {
 	0x03410ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
index 3c731ff12871..9cf4e6fc724e 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gk208.fuc5.h
@@ -1,4 +1,4 @@
-uint32_t gk208_pmu_data[] = {
+static uint32_t gk208_pmu_data[] = {
 /* 0x0000: proc_kern */
 	0x52544e49,
 	0x00000000,
@@ -915,7 +915,7 @@ uint32_t gk208_pmu_data[] = {
 	0x00000000,
 };
 
-uint32_t gk208_pmu_code[] = {
+static uint32_t gk208_pmu_code[] = {
 	0x02f90ef5,
 /* 0x0004: rd32 */
 	0xf607a040,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
index e83341815ec6..5d692425b190 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/fuc/gt215.fuc3.h
@@ -1,4 +1,4 @@
-uint32_t gt215_pmu_data[] = {
+static uint32_t gt215_pmu_data[] = {
 /* 0x0000: proc_kern */
 	0x52544e49,
 	0x00000000,
@@ -916,7 +916,7 @@ uint32_t gt215_pmu_data[] = {
 	0x00000000,
 };
 
-uint32_t gt215_pmu_code[] = {
+static uint32_t gt215_pmu_code[] = {
 	0x03920ef5,
 /* 0x0004: rd32 */
 	0x07a007f1,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf100.c
index aeb8ccd891fc..0e36d4cb7201 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf100.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf100.c
@@ -30,6 +30,12 @@ gf100_pmu = {
 	.code.size = sizeof(gf100_pmu_code),
 	.data.data = gf100_pmu_data,
 	.data.size = sizeof(gf100_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf119.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf119.c
index fbc88d8ecd4d..0e4ba4248b15 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf119.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gf119.c
@@ -30,6 +30,12 @@ gf119_pmu = {
 	.code.size = sizeof(gf119_pmu_code),
 	.data.data = gf119_pmu_data,
 	.data.size = sizeof(gf119_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
index 86f9f3b13f71..2ad858d825ac 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk104.c
@@ -109,6 +109,12 @@ gk104_pmu = {
 	.code.size = sizeof(gk104_pmu_code),
 	.data.data = gk104_pmu_data,
 	.data.size = sizeof(gk104_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 	.pgob = gk104_pmu_pgob,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c
index ae255247c9d1..fc4b8ecfdaeb 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk110.c
@@ -88,6 +88,12 @@ gk110_pmu = {
 	.code.size = sizeof(gk110_pmu_code),
 	.data.data = gk110_pmu_data,
 	.data.size = sizeof(gk110_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 	.pgob = gk110_pmu_pgob,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c
index 3b4917637902..e9a91277683a 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk208.c
@@ -30,6 +30,12 @@ gk208_pmu = {
 	.code.size = sizeof(gk208_pmu_code),
 	.data.data = gk208_pmu_data,
 	.data.size = sizeof(gk208_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 	.pgob = gk110_pmu_pgob,
 };
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
index f996d90c9f0d..9ca0db796cbe 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gk20a.c
@@ -19,7 +19,7 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
-#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base.subdev)
+#define gk20a_pmu(p) container_of((p), struct gk20a_pmu, base)
 #include "priv.h"
 
 #include <subdev/clk.h>
@@ -43,9 +43,8 @@ struct gk20a_pmu {
 };
 
 struct gk20a_pmu_dvfs_dev_status {
-	unsigned long total;
-	unsigned long busy;
-	int cur_state;
+	u32 total;
+	u32 busy;
 };
 
 static int
@@ -56,13 +55,12 @@ gk20a_pmu_dvfs_target(struct gk20a_pmu *pmu, int *state)
 	return nvkm_clk_astate(clk, *state, 0, false);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_cur_state(struct gk20a_pmu *pmu, int *state)
 {
 	struct nvkm_clk *clk = pmu->base.subdev.device->clk;
 
 	*state = clk->pstate;
-	return 0;
 }
 
 static int
@@ -90,28 +88,26 @@ gk20a_pmu_dvfs_get_target_state(struct gk20a_pmu *pmu,
 
 	*state = level;
 
-	if (level == cur_level)
-		return 0;
-	else
-		return 1;
+	return (level != cur_level);
 }
 
-static int
+static void
 gk20a_pmu_dvfs_get_dev_status(struct gk20a_pmu *pmu,
 			      struct gk20a_pmu_dvfs_dev_status *status)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	status->busy = nvkm_rd32(device, 0x10a508 + (BUSY_SLOT * 0x10));
-	status->total= nvkm_rd32(device, 0x10a508 + (CLK_SLOT * 0x10));
-	return 0;
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	status->busy = nvkm_falcon_rd32(falcon, 0x508 + (BUSY_SLOT * 0x10));
+	status->total= nvkm_falcon_rd32(falcon, 0x508 + (CLK_SLOT * 0x10));
 }
 
 static void
 gk20a_pmu_dvfs_reset_dev_status(struct gk20a_pmu *pmu)
 {
-	struct nvkm_device *device = pmu->base.subdev.device;
-	nvkm_wr32(device, 0x10a508 + (BUSY_SLOT * 0x10), 0x80000000);
-	nvkm_wr32(device, 0x10a508 + (CLK_SLOT * 0x10), 0x80000000);
+	struct nvkm_falcon *falcon = pmu->base.falcon;
+
+	nvkm_falcon_wr32(falcon, 0x508 + (BUSY_SLOT * 0x10), 0x80000000);
+	nvkm_falcon_wr32(falcon, 0x508 + (CLK_SLOT * 0x10), 0x80000000);
 }
 
 static void
@@ -127,7 +123,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	struct nvkm_timer *tmr = device->timer;
 	struct nvkm_volt *volt = device->volt;
 	u32 utilization = 0;
-	int state, ret;
+	int state;
 
 	/*
 	 * The PMU is initialized before CLK and VOLT, so we have to make sure the
@@ -136,11 +132,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	if (!clk || !volt)
 		goto resched;
 
-	ret = gk20a_pmu_dvfs_get_dev_status(pmu, &status);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get device status\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_dev_status(pmu, &status);
 
 	if (status.total)
 		utilization = div_u64((u64)status.busy * 100, status.total);
@@ -150,11 +142,7 @@ gk20a_pmu_dvfs_work(struct nvkm_alarm *alarm)
 	nvkm_trace(subdev, "utilization = %d %%, avg_load = %d %%\n",
 		   utilization, data->avg_load);
 
-	ret = gk20a_pmu_dvfs_get_cur_state(pmu, &state);
-	if (ret) {
-		nvkm_warn(subdev, "failed to get current state\n");
-		goto resched;
-	}
+	gk20a_pmu_dvfs_get_cur_state(pmu, &state);
 
 	if (gk20a_pmu_dvfs_get_target_state(pmu, &state, data->avg_load)) {
 		nvkm_trace(subdev, "set new state to %d\n", state);
@@ -166,32 +154,36 @@ resched:
 	nvkm_timer_alarm(tmr, 100000000, alarm);
 }
 
-static int
-gk20a_pmu_fini(struct nvkm_subdev *subdev, bool suspend)
+static void
+gk20a_pmu_fini(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	nvkm_timer_alarm_cancel(subdev->device->timer, &pmu->alarm);
-	return 0;
-}
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	nvkm_timer_alarm_cancel(pmu->subdev.device->timer, &gpmu->alarm);
 
-static void *
-gk20a_pmu_dtor(struct nvkm_subdev *subdev)
-{
-	return gk20a_pmu(subdev);
+	nvkm_falcon_put(pmu->falcon, &pmu->subdev);
 }
 
 static int
-gk20a_pmu_init(struct nvkm_subdev *subdev)
+gk20a_pmu_init(struct nvkm_pmu *pmu)
 {
-	struct gk20a_pmu *pmu = gk20a_pmu(subdev);
-	struct nvkm_device *device = pmu->base.subdev.device;
+	struct gk20a_pmu *gpmu = gk20a_pmu(pmu);
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = pmu->subdev.device;
+	struct nvkm_falcon *falcon = pmu->falcon;
+	int ret;
+
+	ret = nvkm_falcon_get(falcon, subdev);
+	if (ret) {
+		nvkm_error(subdev, "cannot acquire %s falcon!\n", falcon->name);
+		return ret;
+	}
 
 	/* init pwr perf counter */
-	nvkm_wr32(device, 0x10a504 + (BUSY_SLOT * 0x10), 0x00200001);
-	nvkm_wr32(device, 0x10a50c + (BUSY_SLOT * 0x10), 0x00000002);
-	nvkm_wr32(device, 0x10a50c + (CLK_SLOT * 0x10), 0x00000003);
+	nvkm_falcon_wr32(falcon, 0x504 + (BUSY_SLOT * 0x10), 0x00200001);
+	nvkm_falcon_wr32(falcon, 0x50c + (BUSY_SLOT * 0x10), 0x00000002);
+	nvkm_falcon_wr32(falcon, 0x50c + (CLK_SLOT * 0x10), 0x00000003);
 
-	nvkm_timer_alarm(device->timer, 2000000000, &pmu->alarm);
+	nvkm_timer_alarm(device->timer, 2000000000, &gpmu->alarm);
 	return 0;
 }
 
@@ -202,26 +194,26 @@ gk20a_dvfs_data= {
 	.p_smooth = 1,
 };
 
-static const struct nvkm_subdev_func
+static const struct nvkm_pmu_func
 gk20a_pmu = {
 	.init = gk20a_pmu_init,
 	.fini = gk20a_pmu_fini,
-	.dtor = gk20a_pmu_dtor,
+	.reset = gt215_pmu_reset,
 };
 
 int
 gk20a_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
 {
-	static const struct nvkm_pmu_func func = {};
 	struct gk20a_pmu *pmu;
 
 	if (!(pmu = kzalloc(sizeof(*pmu), GFP_KERNEL)))
 		return -ENOMEM;
-	pmu->base.func = &func;
 	*ppmu = &pmu->base;
 
-	nvkm_subdev_ctor(&gk20a_pmu, device, index, &pmu->base.subdev);
+	nvkm_pmu_ctor(&gk20a_pmu, device, index, &pmu->base);
+
 	pmu->data = &gk20a_dvfs_data;
 	nvkm_alarm_init(&pmu->alarm, gk20a_pmu_dvfs_work);
+
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm107.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm107.c
index 31b8692b4641..9a248ed75f09 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm107.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm107.c
@@ -32,6 +32,12 @@ gm107_pmu = {
 	.code.size = sizeof(gm107_pmu_code),
 	.data.data = gm107_pmu_data,
 	.data.size = sizeof(gm107_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
new file mode 100644
index 000000000000..0b8a1cc4a0ee
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gm20b.c
@@ -0,0 +1,34 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "priv.h"
+
+static const struct nvkm_pmu_func
+gm20b_pmu = {
+	.reset = gt215_pmu_reset,
+};
+
+int
+gm20b_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gm20b_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp100.c
new file mode 100644
index 000000000000..6c41c20c85a7
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp100.c
@@ -0,0 +1,35 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static const struct nvkm_pmu_func
+gp100_pmu = {
+	.reset = gt215_pmu_reset,
+};
+
+int
+gp100_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gp100_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
new file mode 100644
index 000000000000..f017352206c9
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gp102.c
@@ -0,0 +1,43 @@
+/*
+ * Copyright 2016 Red Hat Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Ben Skeggs <bskeggs@redhat.com>
+ */
+#include "priv.h"
+
+static void
+gp102_pmu_reset(struct nvkm_pmu *pmu)
+{
+	struct nvkm_device *device = pmu->subdev.device;
+	nvkm_mask(device, 0x10a3c0, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x10a3c0, 0x00000001, 0x00000000);
+}
+
+static const struct nvkm_pmu_func
+gp102_pmu = {
+	.reset = gp102_pmu_reset,
+};
+
+int
+gp102_pmu_new(struct nvkm_device *device, int index, struct nvkm_pmu **ppmu)
+{
+	return nvkm_pmu_new_(&gp102_pmu, device, index, ppmu);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
index 8ba7fa4ca75b..90d428b3be97 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/gt215.c
@@ -24,21 +24,229 @@
 #include "priv.h"
 #include "fuc/gt215.fuc3.h"
 
-static void
+#include <subdev/timer.h>
+
+int
+gt215_pmu_send(struct nvkm_pmu *pmu, u32 reply[2],
+	       u32 process, u32 message, u32 data0, u32 data1)
+{
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 addr;
+
+	mutex_lock(&subdev->mutex);
+	/* wait for a free slot in the fifo */
+	addr  = nvkm_rd32(device, 0x10a4a0);
+	if (nvkm_msec(device, 2000,
+		u32 tmp = nvkm_rd32(device, 0x10a4b0);
+		if (tmp != (addr ^ 8))
+			break;
+	) < 0) {
+		mutex_unlock(&subdev->mutex);
+		return -EBUSY;
+	}
+
+	/* we currently only support a single process at a time waiting
+	 * on a synchronous reply, take the PMU mutex and tell the
+	 * receive handler what we're waiting for
+	 */
+	if (reply) {
+		pmu->recv.message = message;
+		pmu->recv.process = process;
+	}
+
+	/* acquire data segment access */
+	do {
+		nvkm_wr32(device, 0x10a580, 0x00000001);
+	} while (nvkm_rd32(device, 0x10a580) != 0x00000001);
+
+	/* write the packet */
+	nvkm_wr32(device, 0x10a1c0, 0x01000000 | (((addr & 0x07) << 4) +
+				pmu->send.base));
+	nvkm_wr32(device, 0x10a1c4, process);
+	nvkm_wr32(device, 0x10a1c4, message);
+	nvkm_wr32(device, 0x10a1c4, data0);
+	nvkm_wr32(device, 0x10a1c4, data1);
+	nvkm_wr32(device, 0x10a4a0, (addr + 1) & 0x0f);
+
+	/* release data segment access */
+	nvkm_wr32(device, 0x10a580, 0x00000000);
+
+	/* wait for reply, if requested */
+	if (reply) {
+		wait_event(pmu->recv.wait, (pmu->recv.process == 0));
+		reply[0] = pmu->recv.data[0];
+		reply[1] = pmu->recv.data[1];
+	}
+
+	mutex_unlock(&subdev->mutex);
+	return 0;
+}
+
+void
+gt215_pmu_recv(struct nvkm_pmu *pmu)
+{
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 process, message, data0, data1;
+
+	/* nothing to do if GET == PUT */
+	u32 addr =  nvkm_rd32(device, 0x10a4cc);
+	if (addr == nvkm_rd32(device, 0x10a4c8))
+		return;
+
+	/* acquire data segment access */
+	do {
+		nvkm_wr32(device, 0x10a580, 0x00000002);
+	} while (nvkm_rd32(device, 0x10a580) != 0x00000002);
+
+	/* read the packet */
+	nvkm_wr32(device, 0x10a1c0, 0x02000000 | (((addr & 0x07) << 4) +
+				pmu->recv.base));
+	process = nvkm_rd32(device, 0x10a1c4);
+	message = nvkm_rd32(device, 0x10a1c4);
+	data0   = nvkm_rd32(device, 0x10a1c4);
+	data1   = nvkm_rd32(device, 0x10a1c4);
+	nvkm_wr32(device, 0x10a4cc, (addr + 1) & 0x0f);
+
+	/* release data segment access */
+	nvkm_wr32(device, 0x10a580, 0x00000000);
+
+	/* wake process if it's waiting on a synchronous reply */
+	if (pmu->recv.process) {
+		if (process == pmu->recv.process &&
+		    message == pmu->recv.message) {
+			pmu->recv.data[0] = data0;
+			pmu->recv.data[1] = data1;
+			pmu->recv.process = 0;
+			wake_up(&pmu->recv.wait);
+			return;
+		}
+	}
+
+	/* right now there's no other expected responses from the engine,
+	 * so assume that any unexpected message is an error.
+	 */
+	nvkm_warn(subdev, "%c%c%c%c %08x %08x %08x %08x\n",
+		  (char)((process & 0x000000ff) >>  0),
+		  (char)((process & 0x0000ff00) >>  8),
+		  (char)((process & 0x00ff0000) >> 16),
+		  (char)((process & 0xff000000) >> 24),
+		  process, message, data0, data1);
+}
+
+void
+gt215_pmu_intr(struct nvkm_pmu *pmu)
+{
+	struct nvkm_subdev *subdev = &pmu->subdev;
+	struct nvkm_device *device = subdev->device;
+	u32 disp = nvkm_rd32(device, 0x10a01c);
+	u32 intr = nvkm_rd32(device, 0x10a008) & disp & ~(disp >> 16);
+
+	if (intr & 0x00000020) {
+		u32 stat = nvkm_rd32(device, 0x10a16c);
+		if (stat & 0x80000000) {
+			nvkm_error(subdev, "UAS fault at %06x addr %08x\n",
+				   stat & 0x00ffffff,
+				   nvkm_rd32(device, 0x10a168));
+			nvkm_wr32(device, 0x10a16c, 0x00000000);
+			intr &= ~0x00000020;
+		}
+	}
+
+	if (intr & 0x00000040) {
+		schedule_work(&pmu->recv.work);
+		nvkm_wr32(device, 0x10a004, 0x00000040);
+		intr &= ~0x00000040;
+	}
+
+	if (intr & 0x00000080) {
+		nvkm_info(subdev, "wr32 %06x %08x\n",
+			  nvkm_rd32(device, 0x10a7a0),
+			  nvkm_rd32(device, 0x10a7a4));
+		nvkm_wr32(device, 0x10a004, 0x00000080);
+		intr &= ~0x00000080;
+	}
+
+	if (intr) {
+		nvkm_error(subdev, "intr %08x\n", intr);
+		nvkm_wr32(device, 0x10a004, intr);
+	}
+}
+
+void
+gt215_pmu_fini(struct nvkm_pmu *pmu)
+{
+	nvkm_wr32(pmu->subdev.device, 0x10a014, 0x00000060);
+}
+
+void
 gt215_pmu_reset(struct nvkm_pmu *pmu)
 {
 	struct nvkm_device *device = pmu->subdev.device;
-	nvkm_mask(device, 0x022210, 0x00000001, 0x00000000);
-	nvkm_mask(device, 0x022210, 0x00000001, 0x00000001);
+	nvkm_mask(device, 0x000200, 0x00002000, 0x00000000);
+	nvkm_mask(device, 0x000200, 0x00002000, 0x00002000);
+	nvkm_rd32(device, 0x000200);
+}
+
+int
+gt215_pmu_init(struct nvkm_pmu *pmu)
+{
+	struct nvkm_device *device = pmu->subdev.device;
+	int i;
+
+	/* upload data segment */
+	nvkm_wr32(device, 0x10a1c0, 0x01000000);
+	for (i = 0; i < pmu->func->data.size / 4; i++)
+		nvkm_wr32(device, 0x10a1c4, pmu->func->data.data[i]);
+
+	/* upload code segment */
+	nvkm_wr32(device, 0x10a180, 0x01000000);
+	for (i = 0; i < pmu->func->code.size / 4; i++) {
+		if ((i & 0x3f) == 0)
+			nvkm_wr32(device, 0x10a188, i >> 6);
+		nvkm_wr32(device, 0x10a184, pmu->func->code.data[i]);
+	}
+
+	/* start it running */
+	nvkm_wr32(device, 0x10a10c, 0x00000000);
+	nvkm_wr32(device, 0x10a104, 0x00000000);
+	nvkm_wr32(device, 0x10a100, 0x00000002);
+
+	/* wait for valid host->pmu ring configuration */
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x10a4d0))
+			break;
+	) < 0)
+		return -EBUSY;
+	pmu->send.base = nvkm_rd32(device, 0x10a4d0) & 0x0000ffff;
+	pmu->send.size = nvkm_rd32(device, 0x10a4d0) >> 16;
+
+	/* wait for valid pmu->host ring configuration */
+	if (nvkm_msec(device, 2000,
+		if (nvkm_rd32(device, 0x10a4dc))
+			break;
+	) < 0)
+		return -EBUSY;
+	pmu->recv.base = nvkm_rd32(device, 0x10a4dc) & 0x0000ffff;
+	pmu->recv.size = nvkm_rd32(device, 0x10a4dc) >> 16;
+
+	nvkm_wr32(device, 0x10a010, 0x000000e0);
+	return 0;
 }
 
 static const struct nvkm_pmu_func
 gt215_pmu = {
-	.reset = gt215_pmu_reset,
 	.code.data = gt215_pmu_code,
 	.code.size = sizeof(gt215_pmu_code),
 	.data.data = gt215_pmu_data,
 	.data.size = sizeof(gt215_pmu_data),
+	.reset = gt215_pmu_reset,
+	.init = gt215_pmu_init,
+	.fini = gt215_pmu_fini,
+	.intr = gt215_pmu_intr,
+	.send = gt215_pmu_send,
+	.recv = gt215_pmu_recv,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
index f38c88fae3d6..096cba069f72 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/pmu/priv.h
@@ -4,12 +4,12 @@
 #include <subdev/pmu.h>
 #include <subdev/pmu/fuc/os.h>
 
+int nvkm_pmu_ctor(const struct nvkm_pmu_func *, struct nvkm_device *,
+		  int index, struct nvkm_pmu *);
 int nvkm_pmu_new_(const struct nvkm_pmu_func *, struct nvkm_device *,
 		  int index, struct nvkm_pmu **);
 
 struct nvkm_pmu_func {
-	void (*reset)(struct nvkm_pmu *);
-
 	struct {
 		u32 *data;
 		u32  size;
@@ -20,8 +20,22 @@ struct nvkm_pmu_func {
 		u32  size;
 	} data;
 
+	void (*reset)(struct nvkm_pmu *);
+	int (*init)(struct nvkm_pmu *);
+	void (*fini)(struct nvkm_pmu *);
+	void (*intr)(struct nvkm_pmu *);
+	int (*send)(struct nvkm_pmu *, u32 reply[2], u32 process,
+		    u32 message, u32 data0, u32 data1);
+	void (*recv)(struct nvkm_pmu *);
 	void (*pgob)(struct nvkm_pmu *, bool);
 };
 
+void gt215_pmu_reset(struct nvkm_pmu *);
+int gt215_pmu_init(struct nvkm_pmu *);
+void gt215_pmu_fini(struct nvkm_pmu *);
+void gt215_pmu_intr(struct nvkm_pmu *);
+void gt215_pmu_recv(struct nvkm_pmu *);
+int gt215_pmu_send(struct nvkm_pmu *, u32[2], u32, u32, u32, u32);
+
 void gk110_pmu_pgob(struct nvkm_pmu *, bool);
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
index b02b868a6589..5076d1500f47 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/Kbuild
@@ -1,3 +1,7 @@
 nvkm-y += nvkm/subdev/secboot/base.o
+nvkm-y += nvkm/subdev/secboot/ls_ucode_gr.o
+nvkm-y += nvkm/subdev/secboot/acr.o
+nvkm-y += nvkm/subdev/secboot/acr_r352.o
+nvkm-y += nvkm/subdev/secboot/acr_r361.o
 nvkm-y += nvkm/subdev/secboot/gm200.o
 nvkm-y += nvkm/subdev/secboot/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
new file mode 100644
index 000000000000..75dc06557877
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.c
@@ -0,0 +1,54 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr.h"
+
+#include <core/firmware.h>
+
+/**
+ * Convenience function to duplicate a firmware file in memory and check that
+ * it has the required minimum size.
+ */
+void *
+nvkm_acr_load_firmware(const struct nvkm_subdev *subdev, const char *name,
+		       size_t min_size)
+{
+	const struct firmware *fw;
+	void *blob;
+	int ret;
+
+	ret = nvkm_firmware_get(subdev->device, name, &fw);
+	if (ret)
+		return ERR_PTR(ret);
+	if (fw->size < min_size) {
+		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
+			   name, min_size);
+		nvkm_firmware_put(fw);
+		return ERR_PTR(-EINVAL);
+	}
+	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
+	nvkm_firmware_put(fw);
+	if (!blob)
+		return ERR_PTR(-ENOMEM);
+
+	return blob;
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
new file mode 100644
index 000000000000..97795b342b6f
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_H__
+#define __NVKM_SECBOOT_ACR_H__
+
+#include "priv.h"
+
+struct nvkm_acr;
+
+/**
+ * struct nvkm_acr_func - properties and functions specific to an ACR
+ *
+ * @load: make the ACR ready to run on the given secboot device
+ * @reset: reset the specified falcon
+ * @start: start the specified falcon (assumed to have been reset)
+ */
+struct nvkm_acr_func {
+	void (*dtor)(struct nvkm_acr *);
+	int (*oneinit)(struct nvkm_acr *, struct nvkm_secboot *);
+	int (*fini)(struct nvkm_acr *, struct nvkm_secboot *, bool);
+	int (*load)(struct nvkm_acr *, struct nvkm_secboot *,
+		    struct nvkm_gpuobj *, u64);
+	int (*reset)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+	int (*start)(struct nvkm_acr *, struct nvkm_secboot *,
+		     enum nvkm_secboot_falcon);
+};
+
+/**
+ * struct nvkm_acr - instance of an ACR
+ *
+ * @boot_falcon: ID of the falcon that will perform secure boot
+ * @managed_falcons: bitfield of falcons managed by this ACR
+ * @start_address: virtual start address of the HS bootloader
+ */
+struct nvkm_acr {
+	const struct nvkm_acr_func *func;
+	const struct nvkm_subdev *subdev;
+
+	enum nvkm_secboot_falcon boot_falcon;
+	unsigned long managed_falcons;
+	u32 start_address;
+};
+
+void *nvkm_acr_load_firmware(const struct nvkm_subdev *, const char *, size_t);
+
+struct nvkm_acr *acr_r352_new(unsigned long);
+struct nvkm_acr *acr_r361_new(unsigned long);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
new file mode 100644
index 000000000000..1aa37ea18580
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.c
@@ -0,0 +1,936 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <core/gpuobj.h>
+#include <core/firmware.h>
+#include <engine/falcon.h>
+
+/**
+ * struct hsf_fw_header - HS firmware descriptor
+ * @sig_dbg_offset:	offset of the debug signature
+ * @sig_dbg_size:	size of the debug signature
+ * @sig_prod_offset:	offset of the production signature
+ * @sig_prod_size:	size of the production signature
+ * @patch_loc:		offset of the offset (sic) of where the signature is
+ * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
+ * @hdr_offset:		offset of the load header (see struct hs_load_header)
+ * @hdr_size:		size of above header
+ *
+ * This structure is embedded in the HS firmware image at
+ * hs_bin_hdr.header_offset.
+ */
+struct hsf_fw_header {
+	u32 sig_dbg_offset;
+	u32 sig_dbg_size;
+	u32 sig_prod_offset;
+	u32 sig_prod_size;
+	u32 patch_loc;
+	u32 patch_sig;
+	u32 hdr_offset;
+	u32 hdr_size;
+};
+
+/**
+ * struct acr_r352_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r352_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	u32 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 code_dma_base1;
+	u32 data_dma_base1;
+};
+
+/**
+ * acr_r352_generate_flcn_bl_desc - generate generic BL descriptor for LS image
+ */
+static void
+acr_r352_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r352_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &_img->ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = (base + pdesc->app_resident_code_offset) >> 8;
+	addr_data = (base + pdesc->app_resident_data_offset) >> 8;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = lower_32_bits(addr_code);
+	desc->code_dma_base1 = upper_32_bits(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = lower_32_bits(addr_data);
+	desc->data_dma_base1 = upper_32_bits(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+
+/**
+ * struct hsflcn_acr_desc - data section of the HS firmware
+ *
+ * This header is to be copied at the beginning of DMEM by the HS bootloader.
+ *
+ * @signature:		signature of ACR ucode
+ * @wpr_region_id:	region ID holding the WPR header and its details
+ * @wpr_offset:		offset from the WPR region holding the wpr header
+ * @regions:		region descriptors
+ * @nonwpr_ucode_blob_size:	size of LS blob
+ * @nonwpr_ucode_blob_start:	FB location of LS blob is
+ */
+struct hsflcn_acr_desc {
+	union {
+		u8 reserved_dmem[0x200];
+		u32 signatures[4];
+	} ucode_reserved_space;
+	u32 wpr_region_id;
+	u32 wpr_offset;
+	u32 mmu_mem_range;
+#define FLCN_ACR_MAX_REGIONS 2
+	struct {
+		u32 no_regions;
+		struct {
+			u32 start_addr;
+			u32 end_addr;
+			u32 region_id;
+			u32 read_mask;
+			u32 write_mask;
+			u32 client_mask;
+		} region_props[FLCN_ACR_MAX_REGIONS];
+	} regions;
+	u32 ucode_blob_size;
+	u64 ucode_blob_base __aligned(8);
+	struct {
+		u32 vpr_enabled;
+		u32 vpr_start;
+		u32 vpr_end;
+		u32 hdcp_policies;
+	} vpr_desc;
+};
+
+
+/*
+ * Low-secure blob creation
+ */
+
+/**
+ * ls_ucode_img_load() - create a lsf_ucode_img and load it
+ */
+struct ls_ucode_img *
+acr_r352_ls_ucode_img_load(const struct acr_r352 *acr,
+			   enum nvkm_secboot_falcon falcon_id)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct ls_ucode_img_r352 *img;
+	int ret;
+
+	img = kzalloc(sizeof(*img), GFP_KERNEL);
+	if (!img)
+		return ERR_PTR(-ENOMEM);
+
+	img->base.falcon_id = falcon_id;
+
+	ret = acr->func->ls_func[falcon_id]->load(subdev, &img->base);
+
+	if (ret) {
+		kfree(img->base.ucode_data);
+		kfree(img->base.sig);
+		kfree(img);
+		return ERR_PTR(ret);
+	}
+
+	/* Check that the signature size matches our expectations... */
+	if (img->base.sig_size != sizeof(img->lsb_header.signature)) {
+		nvkm_error(subdev, "invalid signature size for %s falcon!\n",
+			   nvkm_secboot_falcon_name[falcon_id]);
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Copy signature to the right place */
+	memcpy(&img->lsb_header.signature, img->base.sig, img->base.sig_size);
+
+	/* not needed? the signature should already have the right value */
+	img->lsb_header.signature.falcon_id = falcon_id;
+
+	return &img->base;
+}
+
+#define LSF_LSB_HEADER_ALIGN 256
+#define LSF_BL_DATA_ALIGN 256
+#define LSF_BL_DATA_SIZE_ALIGN 256
+#define LSF_BL_CODE_SIZE_ALIGN 256
+#define LSF_UCODE_DATA_ALIGN 4096
+
+/**
+ * acr_r352_ls_img_fill_headers - fill the WPR and LSB headers of an image
+ * @acr:	ACR to use
+ * @img:	image to generate for
+ * @offset:	offset in the WPR region where this image starts
+ *
+ * Allocate space in the WPR area from offset and write the WPR and LSB headers
+ * accordingly.
+ *
+ * Return: offset at the end of this image.
+ */
+static u32
+acr_r352_ls_img_fill_headers(struct acr_r352 *acr,
+			     struct ls_ucode_img_r352 *img, u32 offset)
+{
+	struct ls_ucode_img *_img = &img->base;
+	struct acr_r352_lsf_wpr_header *whdr = &img->wpr_header;
+	struct acr_r352_lsf_lsb_header *lhdr = &img->lsb_header;
+	struct ls_ucode_img_desc *desc = &_img->ucode_desc;
+	const struct acr_r352_ls_func *func =
+					    acr->func->ls_func[_img->falcon_id];
+
+	/* Fill WPR header */
+	whdr->falcon_id = _img->falcon_id;
+	whdr->bootstrap_owner = acr->base.boot_falcon;
+	whdr->status = LSF_IMAGE_STATUS_COPY;
+
+	/* Skip bootstrapping falcons started by someone else than ACR */
+	if (acr->lazy_bootstrap & BIT(_img->falcon_id))
+		whdr->lazy_bootstrap = 1;
+
+	/* Align, save off, and include an LSB header size */
+	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
+	whdr->lsb_offset = offset;
+	offset += sizeof(*lhdr);
+
+	/*
+	 * Align, save off, and include the original (static) ucode
+	 * image size
+	 */
+	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
+	lhdr->ucode_off = offset;
+	offset += _img->ucode_size;
+
+	/*
+	 * For falcons that use a boot loader (BL), we append a loader
+	 * desc structure on the end of the ucode image and consider
+	 * this the boot loader data. The host will then copy the loader
+	 * desc args to this space within the WPR region (before locking
+	 * down) and the HS bin will then copy them to DMEM 0 for the
+	 * loader.
+	 */
+	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
+				   LSF_BL_CODE_SIZE_ALIGN);
+	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
+				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
+	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
+				lhdr->bl_code_size - lhdr->ucode_size;
+	/*
+	 * Though the BL is located at 0th offset of the image, the VA
+	 * is different to make sure that it doesn't collide the actual
+	 * OS VA range
+	 */
+	lhdr->bl_imem_off = desc->bootloader_imem_offset;
+	lhdr->app_code_off = desc->app_start_offset +
+			     desc->app_resident_code_offset;
+	lhdr->app_code_size = desc->app_resident_code_size;
+	lhdr->app_data_off = desc->app_start_offset +
+			     desc->app_resident_data_offset;
+	lhdr->app_data_size = desc->app_resident_data_size;
+
+	lhdr->flags = func->lhdr_flags;
+	if (_img->falcon_id == acr->base.boot_falcon)
+		lhdr->flags |= LSF_FLAG_DMACTL_REQ_CTX;
+
+	/* Align and save off BL descriptor size */
+	lhdr->bl_data_size = ALIGN(func->bl_desc_size, LSF_BL_DATA_SIZE_ALIGN);
+
+	/*
+	 * Align, save off, and include the additional BL data
+	 */
+	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
+	lhdr->bl_data_off = offset;
+	offset += lhdr->bl_data_size;
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_fill_headers - fill WPR and LSB headers of all managed images
+ */
+int
+acr_r352_ls_fill_headers(struct acr_r352 *acr, struct list_head *imgs)
+{
+	struct ls_ucode_img_r352 *img;
+	struct list_head *l;
+	u32 count = 0;
+	u32 offset;
+
+	/* Count the number of images to manage */
+	list_for_each(l, imgs)
+		count++;
+
+	/*
+	 * Start with an array of WPR headers at the base of the WPR.
+	 * The expectation here is that the secure falcon will do a single DMA
+	 * read of this array and cache it internally so it's ok to pack these.
+	 * Also, we add 1 to the falcon count to indicate the end of the array.
+	 */
+	offset = sizeof(img->wpr_header) * (count + 1);
+
+	/*
+	 * Walk the managed falcons, accounting for the LSB structs
+	 * as well as the ucode images.
+	 */
+	list_for_each_entry(img, imgs, base.node) {
+		offset = acr_r352_ls_img_fill_headers(acr, img, offset);
+	}
+
+	return offset;
+}
+
+/**
+ * acr_r352_ls_write_wpr - write the WPR blob contents
+ */
+int
+acr_r352_ls_write_wpr(struct acr_r352 *acr, struct list_head *imgs,
+		      struct nvkm_gpuobj *wpr_blob, u32 wpr_addr)
+{
+	struct ls_ucode_img *_img;
+	u32 pos = 0;
+
+	nvkm_kmap(wpr_blob);
+
+	list_for_each_entry(_img, imgs, node) {
+		struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+		const struct acr_r352_ls_func *ls_func =
+					    acr->func->ls_func[_img->falcon_id];
+		u8 gdesc[ls_func->bl_desc_size];
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
+				      sizeof(img->wpr_header));
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
+				     &img->lsb_header, sizeof(img->lsb_header));
+
+		/* Generate and write BL descriptor */
+		memset(gdesc, 0, ls_func->bl_desc_size);
+		ls_func->generate_bl_desc(&acr->base, _img, wpr_addr, gdesc);
+
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.bl_data_off,
+				      gdesc, ls_func->bl_desc_size);
+
+		/* Copy ucode */
+		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
+				      _img->ucode_data, _img->ucode_size);
+
+		pos += sizeof(img->wpr_header);
+	}
+
+	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
+
+	nvkm_done(wpr_blob);
+
+	return 0;
+}
+
+/* Both size and address of WPR need to be 128K-aligned */
+#define WPR_ALIGNMENT	0x20000
+/**
+ * acr_r352_prepare_ls_blob() - prepare the LS blob
+ *
+ * For each securely managed falcon, load the FW, signatures and bootloaders and
+ * prepare a ucode blob. Then, compute the offsets in the WPR region for each
+ * blob, and finally write the headers and ucode blobs into a GPU object that
+ * will be copied into the WPR region by the HS firmware.
+ */
+static int
+acr_r352_prepare_ls_blob(struct acr_r352 *acr, u64 wpr_addr, u32 wpr_size)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct list_head imgs;
+	struct ls_ucode_img *img, *t;
+	unsigned long managed_falcons = acr->base.managed_falcons;
+	int managed_count = 0;
+	u32 image_wpr_size;
+	int falcon_id;
+	int ret;
+
+	INIT_LIST_HEAD(&imgs);
+
+	/* Load all LS blobs */
+	for_each_set_bit(falcon_id, &managed_falcons, NVKM_SECBOOT_FALCON_END) {
+		struct ls_ucode_img *img;
+
+		img = acr->func->ls_ucode_img_load(acr, falcon_id);
+		if (IS_ERR(img)) {
+			ret = PTR_ERR(img);
+			goto cleanup;
+		}
+
+		list_add_tail(&img->node, &imgs);
+		managed_count++;
+	}
+
+	/*
+	 * Fill the WPR and LSF headers with the right offsets and compute
+	 * required WPR size
+	 */
+	image_wpr_size = acr->func->ls_fill_headers(acr, &imgs);
+	image_wpr_size = ALIGN(image_wpr_size, WPR_ALIGNMENT);
+
+	/* Allocate GPU object that will contain the WPR region */
+	ret = nvkm_gpuobj_new(subdev->device, image_wpr_size, WPR_ALIGNMENT,
+			      false, NULL, &acr->ls_blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_debug(subdev, "%d managed LS falcons, WPR size is %d bytes\n",
+		    managed_count, image_wpr_size);
+
+	/* If WPR address and size are not fixed, set them to fit the LS blob */
+	if (wpr_size == 0) {
+		wpr_addr = acr->ls_blob->addr;
+		wpr_size = image_wpr_size;
+	/*
+	 * But if the WPR region is set by the bootloader, it is illegal for
+	 * the HS blob to be larger than this region.
+	 */
+	} else if (image_wpr_size > wpr_size) {
+		nvkm_error(subdev, "WPR region too small for FW blob!\n");
+		nvkm_error(subdev, "required: %dB\n", image_wpr_size);
+		nvkm_error(subdev, "available: %dB\n", wpr_size);
+		ret = -ENOSPC;
+		goto cleanup;
+	}
+
+	/* Write LS blob */
+	ret = acr->func->ls_write_wpr(acr, &imgs, acr->ls_blob, wpr_addr);
+	if (ret)
+		nvkm_gpuobj_del(&acr->ls_blob);
+
+cleanup:
+	list_for_each_entry_safe(img, t, &imgs, node) {
+		kfree(img->ucode_data);
+		kfree(img->sig);
+		kfree(img);
+	}
+
+	return ret;
+}
+
+
+
+
+/**
+ * acr_r352_hsf_patch_signature() - patch HS blob with correct signature
+ */
+static void
+acr_r352_hsf_patch_signature(struct nvkm_secboot *sb, void *acr_image)
+{
+	struct fw_bin_header *hsbin_hdr = acr_image;
+	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
+	void *hs_data = acr_image + hsbin_hdr->data_offset;
+	void *sig;
+	u32 sig_size;
+
+	/* Falcon in debug or production mode? */
+	if (sb->boot_falcon->debug) {
+		sig = acr_image + fw_hdr->sig_dbg_offset;
+		sig_size = fw_hdr->sig_dbg_size;
+	} else {
+		sig = acr_image + fw_hdr->sig_prod_offset;
+		sig_size = fw_hdr->sig_prod_size;
+	}
+
+	/* Patch signature */
+	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
+}
+
+static void
+acr_r352_fixup_hs_desc(struct acr_r352 *acr, struct nvkm_secboot *sb,
+		       struct hsflcn_acr_desc *desc)
+{
+	struct nvkm_gpuobj *ls_blob = acr->ls_blob;
+
+	/* WPR region information if WPR is not fixed */
+	if (sb->wpr_size == 0) {
+		u32 wpr_start = ls_blob->addr;
+		u32 wpr_end = wpr_start + ls_blob->size;
+
+		desc->wpr_region_id = 1;
+		desc->regions.no_regions = 2;
+		desc->regions.region_props[0].start_addr = wpr_start >> 8;
+		desc->regions.region_props[0].end_addr = wpr_end >> 8;
+		desc->regions.region_props[0].region_id = 1;
+		desc->regions.region_props[0].read_mask = 0xf;
+		desc->regions.region_props[0].write_mask = 0xc;
+		desc->regions.region_props[0].client_mask = 0x2;
+	} else {
+		desc->ucode_blob_base = ls_blob->addr;
+		desc->ucode_blob_size = ls_blob->size;
+	}
+}
+
+static void
+acr_r352_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			     u64 offset)
+{
+	struct acr_r352_flcn_bl_desc *bl_desc = _bl_desc;
+	u64 addr_code, addr_data;
+
+	addr_code = offset >> 8;
+	addr_data = (offset + hdr->data_dma_base) >> 8;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = lower_32_bits(addr_code);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = lower_32_bits(addr_data);
+	bl_desc->data_size = hdr->data_size;
+}
+
+/**
+ * acr_r352_prepare_hs_blob - load and prepare a HS blob and BL descriptor
+ *
+ * @sb secure boot instance to prepare for
+ * @fw name of the HS firmware to load
+ * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
+ * @bl_desc pointer to the BL descriptor to write for this firmware
+ * @patch whether we should patch the HS descriptor (only for HS loaders)
+ */
+static int
+acr_r352_prepare_hs_blob(struct acr_r352 *acr, struct nvkm_secboot *sb,
+			 const char *fw, struct nvkm_gpuobj **blob,
+			 struct hsf_load_header *load_header, bool patch)
+{
+	struct nvkm_subdev *subdev = &sb->subdev;
+	void *acr_image;
+	struct fw_bin_header *hsbin_hdr;
+	struct hsf_fw_header *fw_hdr;
+	struct hsf_load_header *load_hdr;
+	void *acr_data;
+	int ret;
+
+	acr_image = nvkm_acr_load_firmware(subdev, fw, 0);
+	if (IS_ERR(acr_image))
+		return PTR_ERR(acr_image);
+
+	hsbin_hdr = acr_image;
+	fw_hdr = acr_image + hsbin_hdr->header_offset;
+	load_hdr = acr_image + fw_hdr->hdr_offset;
+	acr_data = acr_image + hsbin_hdr->data_offset;
+
+	/* Patch signature */
+	acr_r352_hsf_patch_signature(sb, acr_image);
+
+	/* Patch descriptor with WPR information? */
+	if (patch) {
+		struct hsflcn_acr_desc *desc;
+
+		desc = acr_data + load_hdr->data_dma_base;
+		acr_r352_fixup_hs_desc(acr, sb, desc);
+	}
+
+	if (load_hdr->num_apps > ACR_R352_MAX_APPS) {
+		nvkm_error(subdev, "more apps (%d) than supported (%d)!",
+			   load_hdr->num_apps, ACR_R352_MAX_APPS);
+		ret = -EINVAL;
+		goto cleanup;
+	}
+	memcpy(load_header, load_hdr, sizeof(*load_header) +
+			       (sizeof(load_hdr->app[0]) * load_hdr->num_apps));
+
+	/* Create ACR blob and copy HS data to it */
+	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
+			      0x1000, false, NULL, blob);
+	if (ret)
+		goto cleanup;
+
+	nvkm_kmap(*blob);
+	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
+	nvkm_done(*blob);
+
+cleanup:
+	kfree(acr_image);
+
+	return ret;
+}
+
+static int
+acr_r352_prepare_hsbl_blob(struct acr_r352 *acr)
+{
+	const struct nvkm_subdev *subdev = acr->base.subdev;
+	struct fw_bin_header *hdr;
+	struct fw_bl_desc *hsbl_desc;
+
+	acr->hsbl_blob = nvkm_acr_load_firmware(subdev, "acr/bl", 0);
+	if (IS_ERR(acr->hsbl_blob)) {
+		int ret = PTR_ERR(acr->hsbl_blob);
+
+		acr->hsbl_blob = NULL;
+		return ret;
+	}
+
+	hdr = acr->hsbl_blob;
+	hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+
+	/* virtual start address for boot vector */
+	acr->base.start_address = hsbl_desc->start_tag << 8;
+
+	return 0;
+}
+
+/**
+ * acr_r352_load_blobs - load blobs common to all ACR V1 versions.
+ *
+ * This includes the LS blob, HS ucode loading blob, and HS bootloader.
+ *
+ * The HS ucode unload blob is only used on dGPU if the WPR region is variable.
+ */
+int
+acr_r352_load_blobs(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	/* Firmware already loaded? */
+	if (acr->firmware_ok)
+		return 0;
+
+	/* Load and prepare the managed falcon's firmwares */
+	ret = acr_r352_prepare_ls_blob(acr, sb->wpr_addr, sb->wpr_size);
+	if (ret)
+		return ret;
+
+	/* Load the HS firmware that will load the LS firmwares */
+	if (!acr->load_blob) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_load",
+					       &acr->load_blob,
+					       &acr->load_bl_header, true);
+		if (ret)
+			return ret;
+	}
+
+	/* If the ACR region is dynamically programmed, we need an unload FW */
+	if (sb->wpr_size == 0) {
+		ret = acr_r352_prepare_hs_blob(acr, sb, "acr/ucode_unload",
+					       &acr->unload_blob,
+					       &acr->unload_bl_header, false);
+		if (ret)
+			return ret;
+	}
+
+	/* Load the HS firmware bootloader */
+	if (!acr->hsbl_blob) {
+		ret = acr_r352_prepare_hsbl_blob(acr);
+		if (ret)
+			return ret;
+	}
+
+	acr->firmware_ok = true;
+	nvkm_debug(&sb->subdev, "LS blob successfully created\n");
+
+	return 0;
+}
+
+/**
+ * acr_r352_load() - prepare HS falcon to run the specified blob, mapped
+ * at GPU address offset.
+ */
+static int
+acr_r352_load(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	      struct nvkm_gpuobj *blob, u64 offset)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	struct nvkm_falcon *falcon = sb->boot_falcon;
+	struct fw_bin_header *hdr = acr->hsbl_blob;
+	struct fw_bl_desc *hsbl_desc = acr->hsbl_blob + hdr->header_offset;
+	void *blob_data = acr->hsbl_blob + hdr->data_offset;
+	void *hsbl_code = blob_data + hsbl_desc->code_off;
+	void *hsbl_data = blob_data + hsbl_desc->data_off;
+	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
+	const struct hsf_load_header *load_hdr;
+	const u32 bl_desc_size = acr->func->hs_bl_desc_size;
+	u8 bl_desc[bl_desc_size];
+
+	/* Find the bootloader descriptor for our blob and copy it */
+	if (blob == acr->load_blob) {
+		load_hdr = &acr->load_bl_header;
+	} else if (blob == acr->unload_blob) {
+		load_hdr = &acr->unload_bl_header;
+	} else {
+		nvkm_error(_acr->subdev, "invalid secure boot blob!\n");
+		return -EINVAL;
+	}
+
+	/*
+	 * Copy HS bootloader data
+	 */
+	nvkm_falcon_load_dmem(falcon, hsbl_data, 0x0, hsbl_desc->data_size, 0);
+
+	/* Copy HS bootloader code to end of IMEM */
+	nvkm_falcon_load_imem(falcon, hsbl_code, falcon->code.limit - code_size,
+			      code_size, hsbl_desc->start_tag, 0, false);
+
+	/* Generate the BL header */
+	memset(bl_desc, 0, bl_desc_size);
+	acr->func->generate_hs_bl_desc(load_hdr, bl_desc, offset);
+
+	/*
+	 * Copy HS BL header where the HS descriptor expects it to be
+	 */
+	nvkm_falcon_load_dmem(falcon, bl_desc, hsbl_desc->dmem_load_off,
+			      bl_desc_size, 0);
+
+	return 0;
+}
+
+static int
+acr_r352_shutdown(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int i;
+
+	/* Run the unload blob to unprotect the WPR region */
+	if (acr->unload_blob && sb->wpr_set) {
+		int ret;
+
+		nvkm_debug(&sb->subdev, "running HS unload blob\n");
+		ret = sb->func->run_blob(sb, acr->unload_blob);
+		if (ret)
+			return ret;
+		nvkm_debug(&sb->subdev, "HS unload blob completed\n");
+	}
+
+	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
+		acr->falcon_state[i] = NON_SECURE;
+
+	sb->wpr_set = false;
+
+	return 0;
+}
+
+static int
+acr_r352_bootstrap(struct acr_r352 *acr, struct nvkm_secboot *sb)
+{
+	int ret;
+
+	if (sb->wpr_set)
+		return 0;
+
+	/* Make sure all blobs are ready */
+	ret = acr_r352_load_blobs(acr, sb);
+	if (ret)
+		return ret;
+
+	nvkm_debug(&sb->subdev, "running HS load blob\n");
+	ret = sb->func->run_blob(sb, acr->load_blob);
+	/* clear halt interrupt */
+	nvkm_falcon_clear_interrupt(sb->boot_falcon, 0x10);
+	if (ret)
+		return ret;
+	nvkm_debug(&sb->subdev, "HS load blob completed\n");
+
+	sb->wpr_set = true;
+
+	return 0;
+}
+
+/*
+ * acr_r352_reset() - execute secure boot from the prepared state
+ *
+ * Load the HS bootloader and ask the falcon to run it. This will in turn
+ * load the HS firmware and run it, so once the falcon stops all the managed
+ * falcons should have their LS firmware loaded and be ready to run.
+ */
+static int
+acr_r352_reset(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+	       enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	int ret;
+
+	/*
+	 * Dummy GM200 implementation: perform secure boot each time we are
+	 * called on FECS. Since only FECS and GPCCS are managed and started
+	 * together, this ought to be safe.
+	 *
+	 * Once we have proper PMU firmware and support, this will be changed
+	 * to a proper call to the PMU method.
+	 */
+	if (falcon != NVKM_SECBOOT_FALCON_FECS)
+		goto end;
+
+	ret = acr_r352_shutdown(acr, sb);
+	if (ret)
+		return ret;
+
+	acr_r352_bootstrap(acr, sb);
+	if (ret)
+		return ret;
+
+end:
+	acr->falcon_state[falcon] = RESET;
+	return 0;
+}
+
+static int
+acr_r352_start(struct nvkm_acr *_acr, struct nvkm_secboot *sb,
+		    enum nvkm_secboot_falcon falcon)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+	const struct nvkm_subdev *subdev = &sb->subdev;
+	int base;
+
+	switch (falcon) {
+	case NVKM_SECBOOT_FALCON_FECS:
+		base = 0x409000;
+		break;
+	case NVKM_SECBOOT_FALCON_GPCCS:
+		base = 0x41a000;
+		break;
+	default:
+		nvkm_error(subdev, "cannot start unhandled falcon!\n");
+		return -EINVAL;
+	}
+
+	nvkm_wr32(subdev->device, base + 0x130, 0x00000002);
+	acr->falcon_state[falcon] = RUNNING;
+
+	return 0;
+}
+
+static int
+acr_r352_fini(struct nvkm_acr *_acr, struct nvkm_secboot *sb, bool suspend)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	return acr_r352_shutdown(acr, sb);
+}
+
+static void
+acr_r352_dtor(struct nvkm_acr *_acr)
+{
+	struct acr_r352 *acr = acr_r352(_acr);
+
+	nvkm_gpuobj_del(&acr->unload_blob);
+
+	kfree(acr->hsbl_blob);
+	nvkm_gpuobj_del(&acr->load_blob);
+	nvkm_gpuobj_del(&acr->ls_blob);
+
+	kfree(acr);
+}
+
+const struct acr_r352_ls_func
+acr_r352_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r352_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r352_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r352_func = {
+	.generate_hs_bl_desc = acr_r352_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r352_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r352_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r352_ls_gpccs_func,
+	},
+};
+
+static const struct nvkm_acr_func
+acr_r352_base_func = {
+	.dtor = acr_r352_dtor,
+	.fini = acr_r352_fini,
+	.load = acr_r352_load,
+	.reset = acr_r352_reset,
+	.start = acr_r352_start,
+};
+
+struct nvkm_acr *
+acr_r352_new_(const struct acr_r352_func *func,
+	      enum nvkm_secboot_falcon boot_falcon,
+	      unsigned long managed_falcons)
+{
+	struct acr_r352 *acr;
+
+	acr = kzalloc(sizeof(*acr), GFP_KERNEL);
+	if (!acr)
+		return ERR_PTR(-ENOMEM);
+
+	acr->base.boot_falcon = boot_falcon;
+	acr->base.managed_falcons = managed_falcons;
+	acr->base.func = &acr_r352_base_func;
+	acr->func = func;
+
+	return &acr->base;
+}
+
+struct nvkm_acr *
+acr_r352_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r352_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
new file mode 100644
index 000000000000..ad5923b0fd3c
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r352.h
@@ -0,0 +1,250 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+#ifndef __NVKM_SECBOOT_ACR_R352_H__
+#define __NVKM_SECBOOT_ACR_R352_H__
+
+#include "acr.h"
+#include "ls_ucode.h"
+
+struct ls_ucode_img;
+
+#define ACR_R352_MAX_APPS 8
+
+/*
+ *
+ * LS blob structures
+ *
+ */
+
+/**
+ * struct acr_r352_lsf_lsb_header - LS firmware header
+ * @signature:		signature to verify the firmware against
+ * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
+ *                      blob contains the bootloader, code and data of the
+ *                      LS falcon
+ * @ucode_size:		size of the ucode blob, including bootloader
+ * @data_size:		size of the ucode blob data
+ * @bl_code_size:	size of the bootloader code
+ * @bl_imem_off:	offset in imem of the bootloader
+ * @bl_data_off:	offset of the bootloader data in WPR region
+ * @bl_data_size:	size of the bootloader data
+ * @app_code_off:	offset of the app code relative to ucode_off
+ * @app_code_size:	size of the app code
+ * @app_data_off:	offset of the app data relative to ucode_off
+ * @app_data_size:	size of the app data
+ * @flags:		flags for the secure bootloader
+ *
+ * This structure is written into the WPR region for each managed falcon. Each
+ * instance is referenced by the lsb_offset member of the corresponding
+ * lsf_wpr_header.
+ */
+struct acr_r352_lsf_lsb_header {
+	/**
+	 * LS falcon signatures
+	 * @prd_keys:		signature to use in production mode
+	 * @dgb_keys:		signature to use in debug mode
+	 * @b_prd_present:	whether the production key is present
+	 * @b_dgb_present:	whether the debug key is present
+	 * @falcon_id:		ID of the falcon the ucode applies to
+	 */
+	struct {
+		u8 prd_keys[2][16];
+		u8 dbg_keys[2][16];
+		u32 b_prd_present;
+		u32 b_dbg_present;
+		u32 falcon_id;
+	} signature;
+	u32 ucode_off;
+	u32 ucode_size;
+	u32 data_size;
+	u32 bl_code_size;
+	u32 bl_imem_off;
+	u32 bl_data_off;
+	u32 bl_data_size;
+	u32 app_code_off;
+	u32 app_code_size;
+	u32 app_data_off;
+	u32 app_data_size;
+	u32 flags;
+#define LSF_FLAG_LOAD_CODE_AT_0		1
+#define LSF_FLAG_DMACTL_REQ_CTX		4
+#define LSF_FLAG_FORCE_PRIV_LOAD	8
+};
+
+/**
+ * struct acr_r352_lsf_wpr_header - LS blob WPR Header
+ * @falcon_id:		LS falcon ID
+ * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
+ * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
+ * @lazy_bootstrap:	skip bootstrapping by ACR
+ * @status:		bootstrapping status
+ *
+ * An array of these is written at the beginning of the WPR region, one for
+ * each managed falcon. The array is terminated by an instance which falcon_id
+ * is LSF_FALCON_ID_INVALID.
+ */
+struct acr_r352_lsf_wpr_header {
+	u32 falcon_id;
+	u32 lsb_offset;
+	u32 bootstrap_owner;
+	u32 lazy_bootstrap;
+	u32 status;
+#define LSF_IMAGE_STATUS_NONE				0
+#define LSF_IMAGE_STATUS_COPY				1
+#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
+#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
+#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
+#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
+#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
+};
+
+/**
+ * struct ls_ucode_img_r352 - ucode image augmented with r352 headers
+ */
+struct ls_ucode_img_r352 {
+	struct ls_ucode_img base;
+
+	struct acr_r352_lsf_wpr_header wpr_header;
+	struct acr_r352_lsf_lsb_header lsb_header;
+};
+#define ls_ucode_img_r352(i) container_of(i, struct ls_ucode_img_r352, base)
+
+
+/*
+ * HS blob structures
+ */
+
+struct hsf_load_header_app {
+	u32 sec_code_off;
+	u32 sec_code_size;
+};
+
+/**
+ * struct hsf_load_header - HS firmware load header
+ */
+struct hsf_load_header {
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 data_dma_base;
+	u32 data_size;
+	u32 num_apps;
+	struct hsf_load_header_app app[0];
+};
+
+/**
+ * struct acr_r352_ls_func - manages a single LS firmware
+ *
+ * @load: load the external firmware into a ls_ucode_img
+ * @generate_bl_desc: function called on a block of bl_desc_size to generate the
+ *		      proper bootloader descriptor for this LS firmware
+ * @bl_desc_size: size of the bootloader descriptor
+ * @lhdr_flags: LS flags
+ */
+struct acr_r352_ls_func {
+	int (*load)(const struct nvkm_subdev *, struct ls_ucode_img *);
+	void (*generate_bl_desc)(const struct nvkm_acr *,
+				 const struct ls_ucode_img *, u64, void *);
+	u32 bl_desc_size;
+	u32 lhdr_flags;
+};
+
+struct acr_r352;
+
+/**
+ * struct acr_r352_func - manages nuances between ACR versions
+ *
+ * @generate_hs_bl_desc: function called on a block of bl_desc_size to generate
+ *			 the proper HS bootloader descriptor
+ * @hs_bl_desc_size: size of the HS bootloader descriptor
+ */
+struct acr_r352_func {
+	void (*generate_hs_bl_desc)(const struct hsf_load_header *, void *,
+				    u64);
+	u32 hs_bl_desc_size;
+
+	struct ls_ucode_img *(*ls_ucode_img_load)(const struct acr_r352 *,
+						  enum nvkm_secboot_falcon);
+	int (*ls_fill_headers)(struct acr_r352 *, struct list_head *);
+	int (*ls_write_wpr)(struct acr_r352 *, struct list_head *,
+			    struct nvkm_gpuobj *, u32);
+
+	const struct acr_r352_ls_func *ls_func[NVKM_SECBOOT_FALCON_END];
+};
+
+/**
+ * struct acr_r352 - ACR data for driver release 352 (and beyond)
+ */
+struct acr_r352 {
+	struct nvkm_acr base;
+	const struct acr_r352_func *func;
+
+	/*
+	 * HS FW - lock WPR region (dGPU only) and load LS FWs
+	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
+	 */
+	struct nvkm_gpuobj *load_blob;
+	struct {
+		struct hsf_load_header load_bl_header;
+		struct hsf_load_header_app __load_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS FW - unlock WPR region (dGPU only) */
+	struct nvkm_gpuobj *unload_blob;
+	struct {
+		struct hsf_load_header unload_bl_header;
+		struct hsf_load_header_app __unload_apps[ACR_R352_MAX_APPS];
+	};
+
+	/* HS bootloader */
+	void *hsbl_blob;
+
+	/* LS FWs, to be loaded by the HS ACR */
+	struct nvkm_gpuobj *ls_blob;
+
+	/* Firmware already loaded? */
+	bool firmware_ok;
+
+	/* Falcons to lazy-bootstrap */
+	u32 lazy_bootstrap;
+
+	/* To keep track of the state of all managed falcons */
+	enum {
+		/* In non-secure state, no firmware loaded, no privileges*/
+		NON_SECURE = 0,
+		/* In low-secure mode and ready to be started */
+		RESET,
+		/* In low-secure mode and running */
+		RUNNING,
+	} falcon_state[NVKM_SECBOOT_FALCON_END];
+};
+#define acr_r352(acr) container_of(acr, struct acr_r352, base)
+
+struct nvkm_acr *acr_r352_new_(const struct acr_r352_func *,
+			       enum nvkm_secboot_falcon, unsigned long);
+
+struct ls_ucode_img *acr_r352_ls_ucode_img_load(const struct acr_r352 *,
+						enum nvkm_secboot_falcon);
+int acr_r352_ls_fill_headers(struct acr_r352 *, struct list_head *);
+int acr_r352_ls_write_wpr(struct acr_r352 *, struct list_head *,
+			  struct nvkm_gpuobj *, u32);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
new file mode 100644
index 000000000000..f0aff1d98474
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/acr_r361.c
@@ -0,0 +1,138 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include "acr_r352.h"
+
+#include <engine/falcon.h>
+
+/**
+ * struct acr_r361_flcn_bl_desc - DMEM bootloader descriptor
+ * @signature:		16B signature for secure code. 0s if no secure code
+ * @ctx_dma:		DMA context to be used by BL while loading code/data
+ * @code_dma_base:	256B-aligned Physical FB Address where code is located
+ *			(falcon's $xcbase register)
+ * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @non_sec_code_size:	the size of the nonSecure code part.
+ * @sec_code_off:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @sec_code_size:	offset from code_dma_base where the secure code is
+ *                      located. The offset must be multiple of 256 to help perf
+ * @code_entry_point:	code entry point which will be invoked by BL after
+ *                      code is loaded.
+ * @data_dma_base:	256B aligned Physical FB Address where data is located.
+ *			(falcon's $xdbase register)
+ * @data_size:		size of data block. Should be multiple of 256B
+ *
+ * Structure used by the bootloader to load the rest of the code. This has
+ * to be filled by host and copied into DMEM at offset provided in the
+ * hsflcn_bl_desc.bl_desc_dmem_load_off.
+ */
+struct acr_r361_flcn_bl_desc {
+	u32 reserved[4];
+	u32 signature[4];
+	u32 ctx_dma;
+	struct flcn_u64 code_dma_base;
+	u32 non_sec_code_off;
+	u32 non_sec_code_size;
+	u32 sec_code_off;
+	u32 sec_code_size;
+	u32 code_entry_point;
+	struct flcn_u64 data_dma_base;
+	u32 data_size;
+};
+
+static void
+acr_r361_generate_flcn_bl_desc(const struct nvkm_acr *acr,
+			       const struct ls_ucode_img *_img, u64 wpr_addr,
+			       void *_desc)
+{
+	struct ls_ucode_img_r352 *img = ls_ucode_img_r352(_img);
+	struct acr_r361_flcn_bl_desc *desc = _desc;
+	const struct ls_ucode_img_desc *pdesc = &img->base.ucode_desc;
+	u64 base, addr_code, addr_data;
+
+	base = wpr_addr + img->lsb_header.ucode_off + pdesc->app_start_offset;
+	addr_code = base + pdesc->app_resident_code_offset;
+	addr_data = base + pdesc->app_resident_data_offset;
+
+	desc->ctx_dma = FALCON_DMAIDX_UCODE;
+	desc->code_dma_base = u64_to_flcn64(addr_code);
+	desc->non_sec_code_off = pdesc->app_resident_code_offset;
+	desc->non_sec_code_size = pdesc->app_resident_code_size;
+	desc->code_entry_point = pdesc->app_imem_entry;
+	desc->data_dma_base = u64_to_flcn64(addr_data);
+	desc->data_size = pdesc->app_resident_data_size;
+}
+
+static void
+acr_r361_generate_hs_bl_desc(const struct hsf_load_header *hdr, void *_bl_desc,
+			    u64 offset)
+{
+	struct acr_r361_flcn_bl_desc *bl_desc = _bl_desc;
+
+	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
+	bl_desc->code_dma_base = u64_to_flcn64(offset);
+	bl_desc->non_sec_code_off = hdr->non_sec_code_off;
+	bl_desc->non_sec_code_size = hdr->non_sec_code_size;
+	bl_desc->sec_code_off = hdr->app[0].sec_code_off;
+	bl_desc->sec_code_size = hdr->app[0].sec_code_size;
+	bl_desc->code_entry_point = 0;
+	bl_desc->data_dma_base = u64_to_flcn64(offset + hdr->data_dma_base);
+	bl_desc->data_size = hdr->data_size;
+}
+
+const struct acr_r352_ls_func
+acr_r361_ls_fecs_func = {
+	.load = acr_ls_ucode_load_fecs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+};
+
+const struct acr_r352_ls_func
+acr_r361_ls_gpccs_func = {
+	.load = acr_ls_ucode_load_gpccs,
+	.generate_bl_desc = acr_r361_generate_flcn_bl_desc,
+	.bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	/* GPCCS will be loaded using PRI */
+	.lhdr_flags = LSF_FLAG_FORCE_PRIV_LOAD,
+};
+
+const struct acr_r352_func
+acr_r361_func = {
+	.generate_hs_bl_desc = acr_r361_generate_hs_bl_desc,
+	.hs_bl_desc_size = sizeof(struct acr_r361_flcn_bl_desc),
+	.ls_ucode_img_load = acr_r352_ls_ucode_img_load,
+	.ls_fill_headers = acr_r352_ls_fill_headers,
+	.ls_write_wpr = acr_r352_ls_write_wpr,
+	.ls_func = {
+		[NVKM_SECBOOT_FALCON_FECS] = &acr_r361_ls_fecs_func,
+		[NVKM_SECBOOT_FALCON_GPCCS] = &acr_r361_ls_gpccs_func,
+	},
+};
+
+struct nvkm_acr *
+acr_r361_new(unsigned long managed_falcons)
+{
+	return acr_r352_new_(&acr_r361_func, NVKM_SECBOOT_FALCON_PMU,
+			     managed_falcons);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
index 314be2192b7d..27c9dfffb9a6 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/base.c
@@ -19,184 +19,108 @@
  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  * DEALINGS IN THE SOFTWARE.
  */
+
+/*
+ * Secure boot is the process by which NVIDIA-signed firmware is loaded into
+ * some of the falcons of a GPU. For production devices this is the only way
+ * for the firmware to access useful (but sensitive) registers.
+ *
+ * A Falcon microprocessor supporting advanced security modes can run in one of
+ * three modes:
+ *
+ * - Non-secure (NS). In this mode, functionality is similar to Falcon
+ *   architectures before security modes were introduced (pre-Maxwell), but
+ *   capability is restricted. In particular, certain registers may be
+ *   inaccessible for reads and/or writes, and physical memory access may be
+ *   disabled (on certain Falcon instances). This is the only possible mode that
+ *   can be used if you don't have microcode cryptographically signed by NVIDIA.
+ *
+ * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
+ *   not possible to read or write any Falcon internal state or Falcon registers
+ *   from outside the Falcon (for example, from the host system). The only way
+ *   to enable this mode is by loading microcode that has been signed by NVIDIA.
+ *   (The loading process involves tagging the IMEM block as secure, writing the
+ *   signature into a Falcon register, and starting execution. The hardware will
+ *   validate the signature, and if valid, grant HS privileges.)
+ *
+ * - Light Secure (LS). In this mode, the microprocessor has more privileges
+ *   than NS but fewer than HS. Some of the microprocessor state is visible to
+ *   host software to ease debugging. The only way to enable this mode is by HS
+ *   microcode enabling LS mode. Some privileges available to HS mode are not
+ *   available here. LS mode is introduced in GM20x.
+ *
+ * Secure boot consists in temporarily switching a HS-capable falcon (typically
+ * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
+ * load them, and switch managed falcons into LS mode. Once secure boot
+ * completes, no falcon remains in HS mode.
+ *
+ * Secure boot requires a write-protected memory region (WPR) which can only be
+ * written by the secure falcon. On dGPU, the driver sets up the WPR region in
+ * video memory. On Tegra, it is set up by the bootloader and its location and
+ * size written into memory controller registers.
+ *
+ * The secure boot process takes place as follows:
+ *
+ * 1) A LS blob is constructed that contains all the LS firmwares we want to
+ *    load, along with their signatures and bootloaders.
+ *
+ * 2) A HS blob (also called ACR) is created that contains the signed HS
+ *    firmware in charge of loading the LS firmwares into their respective
+ *    falcons.
+ *
+ * 3) The HS blob is loaded (via its own bootloader) and executed on the
+ *    HS-capable falcon. It authenticates itself, switches the secure falcon to
+ *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
+ *    LS blob into the WPR region (Tegra).
+ *
+ * 4) The LS blob is now secure from all external tampering. The HS falcon
+ *    checks the signatures of the LS firmwares and, if valid, switches the
+ *    managed falcons to LS mode and makes them ready to run the LS firmware.
+ *
+ * 5) The managed falcons remain in LS mode and can be started.
+ *
+ */
+
 #include "priv.h"
+#include "acr.h"
 
 #include <subdev/mc.h>
 #include <subdev/timer.h>
+#include <subdev/pmu.h>
 
-static const char *
-managed_falcons_names[] = {
+const char *
+nvkm_secboot_falcon_name[] = {
 	[NVKM_SECBOOT_FALCON_PMU] = "PMU",
 	[NVKM_SECBOOT_FALCON_RESERVED] = "<reserved>",
 	[NVKM_SECBOOT_FALCON_FECS] = "FECS",
 	[NVKM_SECBOOT_FALCON_GPCCS] = "GPCCS",
 	[NVKM_SECBOOT_FALCON_END] = "<invalid>",
 };
-
-/*
- * Helper falcon functions
- */
-
-static int
-falcon_clear_halt_interrupt(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	/* clear halt interrupt */
-	nvkm_mask(device, base + 0x004, 0x10, 0x10);
-	/* wait until halt interrupt is cleared */
-	ret = nvkm_wait_msec(device, 10, base + 0x008, 0x10, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-falcon_wait_idle(struct nvkm_device *device, u32 base)
-{
-	int ret;
-
-	ret = nvkm_wait_msec(device, 10, base + 0x04c, 0xffff, 0x0);
-	if (ret < 0)
-		return ret;
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_enable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* enable engine */
-	nvkm_mc_enable(device, sb->devidx);
-	ret = nvkm_wait_msec(device, 10, sb->base + 0x10c, 0x6, 0x0);
-	if (ret < 0) {
-		nvkm_error(&sb->subdev, "Falcon mem scrubbing timeout\n");
-		nvkm_mc_disable(device, sb->devidx);
-		return ret;
-	}
-
-	ret = falcon_wait_idle(device, sb->base);
-	if (ret)
-		return ret;
-
-	/* enable IRQs */
-	nvkm_wr32(device, sb->base + 0x010, 0xff);
-	nvkm_mc_intr_mask(device, sb->devidx, true);
-
-	return 0;
-}
-
-static int
-nvkm_secboot_falcon_disable(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-
-	/* disable IRQs and wait for any previous code to complete */
-	nvkm_mc_intr_mask(device, sb->devidx, false);
-	nvkm_wr32(device, sb->base + 0x014, 0xff);
-
-	falcon_wait_idle(device, sb->base);
-
-	/* disable engine */
-	nvkm_mc_disable(device, sb->devidx);
-
-	return 0;
-}
-
-int
-nvkm_secboot_falcon_reset(struct nvkm_secboot *sb)
-{
-	int ret;
-
-	ret = nvkm_secboot_falcon_disable(sb);
-	if (ret)
-		return ret;
-
-	ret = nvkm_secboot_falcon_enable(sb);
-	if (ret)
-		return ret;
-
-	return 0;
-}
-
-/**
- * nvkm_secboot_falcon_run - run the falcon that will perform secure boot
- *
- * This function is to be called after all chip-specific preparations have
- * been completed. It will start the falcon to perform secure boot, wait for
- * it to halt, and report if an error occurred.
- */
-int
-nvkm_secboot_falcon_run(struct nvkm_secboot *sb)
-{
-	struct nvkm_device *device = sb->subdev.device;
-	int ret;
-
-	/* Start falcon */
-	nvkm_wr32(device, sb->base + 0x100, 0x2);
-
-	/* Wait for falcon halt */
-	ret = nvkm_wait_msec(device, 100, sb->base + 0x100, 0x10, 0x10);
-	if (ret < 0)
-		return ret;
-
-	/* If mailbox register contains an error code, then ACR has failed */
-	ret = nvkm_rd32(device, sb->base + 0x040);
-	if (ret) {
-		nvkm_error(&sb->subdev, "ACR boot failed, ret 0x%08x", ret);
-		falcon_clear_halt_interrupt(device, sb->base);
-		return -EINVAL;
-	}
-
-	return 0;
-}
-
-
 /**
  * nvkm_secboot_reset() - reset specified falcon
  */
 int
-nvkm_secboot_reset(struct nvkm_secboot *sb, u32 falcon)
+nvkm_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
 {
 	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
+	if (!(BIT(falcon) & sb->acr->managed_falcons)) {
 		nvkm_error(&sb->subdev, "cannot reset unmanaged falcon!\n");
 		return -EINVAL;
 	}
 
-	return sb->func->reset(sb, falcon);
-}
-
-/**
- * nvkm_secboot_start() - start specified falcon
- */
-int
-nvkm_secboot_start(struct nvkm_secboot *sb, u32 falcon)
-{
-	/* Unmanaged falcon? */
-	if (!(BIT(falcon) & sb->func->managed_falcons)) {
-		nvkm_error(&sb->subdev, "cannot start unmanaged falcon!\n");
-		return -EINVAL;
-	}
-
-	return sb->func->start(sb, falcon);
+	return sb->acr->func->reset(sb->acr, sb, falcon);
 }
 
 /**
  * nvkm_secboot_is_managed() - check whether a given falcon is securely-managed
  */
 bool
-nvkm_secboot_is_managed(struct nvkm_secboot *secboot,
-			enum nvkm_secboot_falcon fid)
+nvkm_secboot_is_managed(struct nvkm_secboot *sb, enum nvkm_secboot_falcon fid)
 {
-	if (!secboot)
+	if (!sb)
 		return false;
 
-	return secboot->func->managed_falcons & BIT(fid);
+	return sb->acr->managed_falcons & BIT(fid);
 }
 
 static int
@@ -205,9 +129,19 @@ nvkm_secboot_oneinit(struct nvkm_subdev *subdev)
 	struct nvkm_secboot *sb = nvkm_secboot(subdev);
 	int ret = 0;
 
+	switch (sb->acr->boot_falcon) {
+	case NVKM_SECBOOT_FALCON_PMU:
+		sb->boot_falcon = subdev->device->pmu->falcon;
+		break;
+	default:
+		nvkm_error(subdev, "Unmanaged boot falcon %s!\n",
+			                nvkm_secboot_falcon_name[sb->acr->boot_falcon]);
+		return -EINVAL;
+	}
+
 	/* Call chip-specific init function */
-	if (sb->func->init)
-		ret = sb->func->init(sb);
+	if (sb->func->oneinit)
+		ret = sb->func->oneinit(sb);
 	if (ret) {
 		nvkm_error(subdev, "Secure Boot initialization failed: %d\n",
 			   ret);
@@ -249,7 +183,7 @@ nvkm_secboot = {
 };
 
 int
-nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
+nvkm_secboot_ctor(const struct nvkm_secboot_func *func, struct nvkm_acr *acr,
 		  struct nvkm_device *device, int index,
 		  struct nvkm_secboot *sb)
 {
@@ -257,22 +191,14 @@ nvkm_secboot_ctor(const struct nvkm_secboot_func *func,
 
 	nvkm_subdev_ctor(&nvkm_secboot, device, index, &sb->subdev);
 	sb->func = func;
-
-	/* setup the performing falcon's base address and masks */
-	switch (func->boot_falcon) {
-	case NVKM_SECBOOT_FALCON_PMU:
-		sb->devidx = NVKM_SUBDEV_PMU;
-		sb->base = 0x10a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "invalid secure boot falcon\n");
-		return -EINVAL;
-	};
+	sb->acr = acr;
+	acr->subdev = &sb->subdev;
 
 	nvkm_debug(&sb->subdev, "securely managed falcons:\n");
-	for_each_set_bit(fid, &sb->func->managed_falcons,
+	for_each_set_bit(fid, &sb->acr->managed_falcons,
 			 NVKM_SECBOOT_FALCON_END)
-		nvkm_debug(&sb->subdev, "- %s\n", managed_falcons_names[fid]);
+		nvkm_debug(&sb->subdev, "- %s\n",
+			   nvkm_secboot_falcon_name[fid]);
 
 	return 0;
 }
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
index f1e2dc914366..813c4eb0b25f 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.c
@@ -20,1313 +20,84 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-/*
- * Secure boot is the process by which NVIDIA-signed firmware is loaded into
- * some of the falcons of a GPU. For production devices this is the only way
- * for the firmware to access useful (but sensitive) registers.
- *
- * A Falcon microprocessor supporting advanced security modes can run in one of
- * three modes:
- *
- * - Non-secure (NS). In this mode, functionality is similar to Falcon
- *   architectures before security modes were introduced (pre-Maxwell), but
- *   capability is restricted. In particular, certain registers may be
- *   inaccessible for reads and/or writes, and physical memory access may be
- *   disabled (on certain Falcon instances). This is the only possible mode that
- *   can be used if you don't have microcode cryptographically signed by NVIDIA.
- *
- * - Heavy Secure (HS). In this mode, the microprocessor is a black box - it's
- *   not possible to read or write any Falcon internal state or Falcon registers
- *   from outside the Falcon (for example, from the host system). The only way
- *   to enable this mode is by loading microcode that has been signed by NVIDIA.
- *   (The loading process involves tagging the IMEM block as secure, writing the
- *   signature into a Falcon register, and starting execution. The hardware will
- *   validate the signature, and if valid, grant HS privileges.)
- *
- * - Light Secure (LS). In this mode, the microprocessor has more privileges
- *   than NS but fewer than HS. Some of the microprocessor state is visible to
- *   host software to ease debugging. The only way to enable this mode is by HS
- *   microcode enabling LS mode. Some privileges available to HS mode are not
- *   available here. LS mode is introduced in GM20x.
- *
- * Secure boot consists in temporarily switching a HS-capable falcon (typically
- * PMU) into HS mode in order to validate the LS firmwares of managed falcons,
- * load them, and switch managed falcons into LS mode. Once secure boot
- * completes, no falcon remains in HS mode.
- *
- * Secure boot requires a write-protected memory region (WPR) which can only be
- * written by the secure falcon. On dGPU, the driver sets up the WPR region in
- * video memory. On Tegra, it is set up by the bootloader and its location and
- * size written into memory controller registers.
- *
- * The secure boot process takes place as follows:
- *
- * 1) A LS blob is constructed that contains all the LS firmwares we want to
- *    load, along with their signatures and bootloaders.
- *
- * 2) A HS blob (also called ACR) is created that contains the signed HS
- *    firmware in charge of loading the LS firmwares into their respective
- *    falcons.
- *
- * 3) The HS blob is loaded (via its own bootloader) and executed on the
- *    HS-capable falcon. It authenticates itself, switches the secure falcon to
- *    HS mode and setup the WPR region around the LS blob (dGPU) or copies the
- *    LS blob into the WPR region (Tegra).
- *
- * 4) The LS blob is now secure from all external tampering. The HS falcon
- *    checks the signatures of the LS firmwares and, if valid, switches the
- *    managed falcons to LS mode and makes them ready to run the LS firmware.
- *
- * 5) The managed falcons remain in LS mode and can be started.
- *
- */
 
-#include "priv.h"
+#include "acr.h"
+#include "gm200.h"
 
 #include <core/gpuobj.h>
-#include <core/firmware.h>
 #include <subdev/fb.h>
-
-enum {
-	FALCON_DMAIDX_UCODE		= 0,
-	FALCON_DMAIDX_VIRT		= 1,
-	FALCON_DMAIDX_PHYS_VID		= 2,
-	FALCON_DMAIDX_PHYS_SYS_COH	= 3,
-	FALCON_DMAIDX_PHYS_SYS_NCOH	= 4,
-};
-
-/**
- * struct fw_bin_header - header of firmware files
- * @bin_magic:		always 0x3b1d14f0
- * @bin_ver:		version of the bin format
- * @bin_size:		entire image size including this header
- * @header_offset:	offset of the firmware/bootloader header in the file
- * @data_offset:	offset of the firmware/bootloader payload in the file
- * @data_size:		size of the payload
- *
- * This header is located at the beginning of the HS firmware and HS bootloader
- * files, to describe where the headers and data can be found.
- */
-struct fw_bin_header {
-	u32 bin_magic;
-	u32 bin_ver;
-	u32 bin_size;
-	u32 header_offset;
-	u32 data_offset;
-	u32 data_size;
-};
-
-/**
- * struct fw_bl_desc - firmware bootloader descriptor
- * @start_tag:		starting tag of bootloader
- * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
- * @code_off:		offset of code section
- * @code_size:		size of code section
- * @data_off:		offset of data section
- * @data_size:		size of data section
- *
- * This structure is embedded in bootloader firmware files at to describe the
- * IMEM and DMEM layout expected by the bootloader.
- */
-struct fw_bl_desc {
-	u32 start_tag;
-	u32 dmem_load_off;
-	u32 code_off;
-	u32 code_size;
-	u32 data_off;
-	u32 data_size;
-};
-
-
-/*
- *
- * LS blob structures
- *
- */
-
-/**
- * struct lsf_ucode_desc - LS falcon signatures
- * @prd_keys:		signature to use when the GPU is in production mode
- * @dgb_keys:		signature to use when the GPU is in debug mode
- * @b_prd_present:	whether the production key is present
- * @b_dgb_present:	whether the debug key is present
- * @falcon_id:		ID of the falcon the ucode applies to
- *
- * Directly loaded from a signature file.
- */
-struct lsf_ucode_desc {
-	u8  prd_keys[2][16];
-	u8  dbg_keys[2][16];
-	u32 b_prd_present;
-	u32 b_dbg_present;
-	u32 falcon_id;
-};
-
-/**
- * struct lsf_lsb_header - LS firmware header
- * @signature:		signature to verify the firmware against
- * @ucode_off:		offset of the ucode blob in the WPR region. The ucode
- *                      blob contains the bootloader, code and data of the
- *                      LS falcon
- * @ucode_size:		size of the ucode blob, including bootloader
- * @data_size:		size of the ucode blob data
- * @bl_code_size:	size of the bootloader code
- * @bl_imem_off:	offset in imem of the bootloader
- * @bl_data_off:	offset of the bootloader data in WPR region
- * @bl_data_size:	size of the bootloader data
- * @app_code_off:	offset of the app code relative to ucode_off
- * @app_code_size:	size of the app code
- * @app_data_off:	offset of the app data relative to ucode_off
- * @app_data_size:	size of the app data
- * @flags:		flags for the secure bootloader
- *
- * This structure is written into the WPR region for each managed falcon. Each
- * instance is referenced by the lsb_offset member of the corresponding
- * lsf_wpr_header.
- */
-struct lsf_lsb_header {
-	struct lsf_ucode_desc signature;
-	u32 ucode_off;
-	u32 ucode_size;
-	u32 data_size;
-	u32 bl_code_size;
-	u32 bl_imem_off;
-	u32 bl_data_off;
-	u32 bl_data_size;
-	u32 app_code_off;
-	u32 app_code_size;
-	u32 app_data_off;
-	u32 app_data_size;
-	u32 flags;
-#define LSF_FLAG_LOAD_CODE_AT_0		1
-#define LSF_FLAG_DMACTL_REQ_CTX		4
-#define LSF_FLAG_FORCE_PRIV_LOAD	8
-};
-
-/**
- * struct lsf_wpr_header - LS blob WPR Header
- * @falcon_id:		LS falcon ID
- * @lsb_offset:		offset of the lsb_lsf_header in the WPR region
- * @bootstrap_owner:	secure falcon reponsible for bootstrapping the LS falcon
- * @lazy_bootstrap:	skip bootstrapping by ACR
- * @status:		bootstrapping status
- *
- * An array of these is written at the beginning of the WPR region, one for
- * each managed falcon. The array is terminated by an instance which falcon_id
- * is LSF_FALCON_ID_INVALID.
- */
-struct lsf_wpr_header {
-	u32  falcon_id;
-	u32  lsb_offset;
-	u32  bootstrap_owner;
-	u32  lazy_bootstrap;
-	u32  status;
-#define LSF_IMAGE_STATUS_NONE				0
-#define LSF_IMAGE_STATUS_COPY				1
-#define LSF_IMAGE_STATUS_VALIDATION_CODE_FAILED		2
-#define LSF_IMAGE_STATUS_VALIDATION_DATA_FAILED		3
-#define LSF_IMAGE_STATUS_VALIDATION_DONE		4
-#define LSF_IMAGE_STATUS_VALIDATION_SKIPPED		5
-#define LSF_IMAGE_STATUS_BOOTSTRAP_READY		6
-};
-
-
-/**
- * struct ls_ucode_img_desc - descriptor of firmware image
- * @descriptor_size:		size of this descriptor
- * @image_size:			size of the whole image
- * @bootloader_start_offset:	start offset of the bootloader in ucode image
- * @bootloader_size:		size of the bootloader
- * @bootloader_imem_offset:	start off set of the bootloader in IMEM
- * @bootloader_entry_point:	entry point of the bootloader in IMEM
- * @app_start_offset:		start offset of the LS firmware
- * @app_size:			size of the LS firmware's code and data
- * @app_imem_offset:		offset of the app in IMEM
- * @app_imem_entry:		entry point of the app in IMEM
- * @app_dmem_offset:		offset of the data in DMEM
- * @app_resident_code_offset:	offset of app code from app_start_offset
- * @app_resident_code_size:	size of the code
- * @app_resident_data_offset:	offset of data from app_start_offset
- * @app_resident_data_size:	size of data
- *
- * A firmware image contains the code, data, and bootloader of a given LS
- * falcon in a single blob. This structure describes where everything is.
- *
- * This can be generated from a (bootloader, code, data) set if they have
- * been loaded separately, or come directly from a file.
- */
-struct ls_ucode_img_desc {
-	u32 descriptor_size;
-	u32 image_size;
-	u32 tools_version;
-	u32 app_version;
-	char date[64];
-	u32 bootloader_start_offset;
-	u32 bootloader_size;
-	u32 bootloader_imem_offset;
-	u32 bootloader_entry_point;
-	u32 app_start_offset;
-	u32 app_size;
-	u32 app_imem_offset;
-	u32 app_imem_entry;
-	u32 app_dmem_offset;
-	u32 app_resident_code_offset;
-	u32 app_resident_code_size;
-	u32 app_resident_data_offset;
-	u32 app_resident_data_size;
-	u32 nb_overlays;
-	struct {u32 start; u32 size; } load_ovl[64];
-	u32 compressed;
-};
-
-/**
- * struct ls_ucode_img - temporary storage for loaded LS firmwares
- * @node:		to link within lsf_ucode_mgr
- * @falcon_id:		ID of the falcon this LS firmware is for
- * @ucode_desc:		loaded or generated map of ucode_data
- * @ucode_header:	header of the firmware
- * @ucode_data:		firmware payload (code and data)
- * @ucode_size:		size in bytes of data in ucode_data
- * @wpr_header:		WPR header to be written to the LS blob
- * @lsb_header:		LSB header to be written to the LS blob
- *
- * Preparing the WPR LS blob requires information about all the LS firmwares
- * (size, etc) to be known. This structure contains all the data of one LS
- * firmware.
- */
-struct ls_ucode_img {
-	struct list_head node;
-	enum nvkm_secboot_falcon falcon_id;
-
-	struct ls_ucode_img_desc ucode_desc;
-	u32 *ucode_header;
-	u8 *ucode_data;
-	u32 ucode_size;
-
-	struct lsf_wpr_header wpr_header;
-	struct lsf_lsb_header lsb_header;
-};
-
-/**
- * struct ls_ucode_mgr - manager for all LS falcon firmwares
- * @count:	number of managed LS falcons
- * @wpr_size:	size of the required WPR region in bytes
- * @img_list:	linked list of lsf_ucode_img
- */
-struct ls_ucode_mgr {
-	u16 count;
-	u32 wpr_size;
-	struct list_head img_list;
-};
-
-
-/*
- *
- * HS blob structures
- *
- */
-
-/**
- * struct hsf_fw_header - HS firmware descriptor
- * @sig_dbg_offset:	offset of the debug signature
- * @sig_dbg_size:	size of the debug signature
- * @sig_prod_offset:	offset of the production signature
- * @sig_prod_size:	size of the production signature
- * @patch_loc:		offset of the offset (sic) of where the signature is
- * @patch_sig:		offset of the offset (sic) to add to sig_*_offset
- * @hdr_offset:		offset of the load header (see struct hs_load_header)
- * @hdr_size:		size of above header
- *
- * This structure is embedded in the HS firmware image at
- * hs_bin_hdr.header_offset.
- */
-struct hsf_fw_header {
-	u32 sig_dbg_offset;
-	u32 sig_dbg_size;
-	u32 sig_prod_offset;
-	u32 sig_prod_size;
-	u32 patch_loc;
-	u32 patch_sig;
-	u32 hdr_offset;
-	u32 hdr_size;
-};
-
-/**
- * struct hsf_load_header - HS firmware load header
- */
-struct hsf_load_header {
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 data_dma_base;
-	u32 data_size;
-	u32 num_apps;
-	struct {
-		u32 sec_code_off;
-		u32 sec_code_size;
-	} app[0];
-};
-
-/**
- * Convenience function to duplicate a firmware file in memory and check that
- * it has the required minimum size.
- */
-static void *
-gm200_secboot_load_firmware(struct nvkm_subdev *subdev, const char *name,
-		    size_t min_size)
-{
-	const struct firmware *fw;
-	void *blob;
-	int ret;
-
-	ret = nvkm_firmware_get(subdev->device, name, &fw);
-	if (ret)
-		return ERR_PTR(ret);
-	if (fw->size < min_size) {
-		nvkm_error(subdev, "%s is smaller than expected size %zu\n",
-			   name, min_size);
-		nvkm_firmware_put(fw);
-		return ERR_PTR(-EINVAL);
-	}
-	blob = kmemdup(fw->data, fw->size, GFP_KERNEL);
-	nvkm_firmware_put(fw);
-	if (!blob)
-		return ERR_PTR(-ENOMEM);
-
-	return blob;
-}
-
-
-/*
- * Low-secure blob creation
- */
-
-#define BL_DESC_BLK_SIZE 256
-/**
- * Build a ucode image and descriptor from provided bootloader, code and data.
- *
- * @bl:		bootloader image, including 16-bytes descriptor
- * @code:	LS firmware code segment
- * @data:	LS firmware data segment
- * @desc:	ucode descriptor to be written
- *
- * Return: allocated ucode image with corresponding descriptor information. desc
- *         is also updated to contain the right offsets within returned image.
- */
-static void *
-ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
-		   const struct firmware *data, struct ls_ucode_img_desc *desc)
-{
-	struct fw_bin_header *bin_hdr = (void *)bl->data;
-	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
-	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
-	u32 pos = 0;
-	void *image;
-
-	desc->bootloader_start_offset = pos;
-	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
-	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
-	desc->bootloader_entry_point = bl_desc->start_tag * 256;
-
-	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
-	desc->app_start_offset = pos;
-	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
-			 ALIGN(data->size, BL_DESC_BLK_SIZE);
-	desc->app_imem_offset = 0;
-	desc->app_imem_entry = 0;
-	desc->app_dmem_offset = 0;
-	desc->app_resident_code_offset = 0;
-	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
-
-	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
-	desc->app_resident_data_offset = pos - desc->app_start_offset;
-	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
-
-	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
-			   desc->app_size;
-
-	image = kzalloc(desc->image_size, GFP_KERNEL);
-	if (!image)
-		return ERR_PTR(-ENOMEM);
-
-	memcpy(image + desc->bootloader_start_offset, bl_data,
-	       bl_desc->code_size);
-	memcpy(image + desc->app_start_offset, code->data, code->size);
-	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
-	       data->data, data->size);
-
-	return image;
-}
-
-/**
- * ls_ucode_img_load_generic() - load and prepare a LS ucode image
- *
- * Load the LS microcode, bootloader and signature and pack them into a single
- * blob. Also generate the corresponding ucode descriptor.
- */
-static int
-ls_ucode_img_load_generic(struct nvkm_subdev *subdev,
-			  struct ls_ucode_img *img, const char *falcon_name,
-			  const u32 falcon_id)
-{
-	const struct firmware *bl, *code, *data;
-	struct lsf_ucode_desc *lsf_desc;
-	char f[64];
-	int ret;
-
-	img->ucode_header = NULL;
-
-	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &bl);
-	if (ret)
-		goto error;
-
-	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &code);
-	if (ret)
-		goto free_bl;
-
-	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
-	ret = nvkm_firmware_get(subdev->device, f, &data);
-	if (ret)
-		goto free_inst;
-
-	img->ucode_data = ls_ucode_img_build(bl, code, data,
-					     &img->ucode_desc);
-	if (IS_ERR(img->ucode_data)) {
-		ret = PTR_ERR(img->ucode_data);
-		goto free_data;
-	}
-	img->ucode_size = img->ucode_desc.image_size;
-
-	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
-	lsf_desc = gm200_secboot_load_firmware(subdev, f, sizeof(*lsf_desc));
-	if (IS_ERR(lsf_desc)) {
-		ret = PTR_ERR(lsf_desc);
-		goto free_image;
-	}
-	/* not needed? the signature should already have the right value */
-	lsf_desc->falcon_id = falcon_id;
-	memcpy(&img->lsb_header.signature, lsf_desc, sizeof(*lsf_desc));
-	img->falcon_id = lsf_desc->falcon_id;
-	kfree(lsf_desc);
-
-	/* success path - only free requested firmware files */
-	goto free_data;
-
-free_image:
-	kfree(img->ucode_data);
-free_data:
-	nvkm_firmware_put(data);
-free_inst:
-	nvkm_firmware_put(code);
-free_bl:
-	nvkm_firmware_put(bl);
-error:
-	return ret;
-}
-
-typedef int (*lsf_load_func)(struct nvkm_subdev *, struct ls_ucode_img *);
-
-static int
-ls_ucode_img_load_fecs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "fecs",
-					 NVKM_SECBOOT_FALCON_FECS);
-}
-
-static int
-ls_ucode_img_load_gpccs(struct nvkm_subdev *subdev, struct ls_ucode_img *img)
-{
-	return ls_ucode_img_load_generic(subdev, img, "gpccs",
-					 NVKM_SECBOOT_FALCON_GPCCS);
-}
-
-/**
- * ls_ucode_img_load() - create a lsf_ucode_img and load it
- */
-static struct ls_ucode_img *
-ls_ucode_img_load(struct nvkm_subdev *subdev, lsf_load_func load_func)
-{
-	struct ls_ucode_img *img;
-	int ret;
-
-	img = kzalloc(sizeof(*img), GFP_KERNEL);
-	if (!img)
-		return ERR_PTR(-ENOMEM);
-
-	ret = load_func(subdev, img);
-	if (ret) {
-		kfree(img);
-		return ERR_PTR(ret);
-	}
-
-	return img;
-}
-
-static const lsf_load_func lsf_load_funcs[] = {
-	[NVKM_SECBOOT_FALCON_END] = NULL, /* reserve enough space */
-	[NVKM_SECBOOT_FALCON_FECS] = ls_ucode_img_load_fecs,
-	[NVKM_SECBOOT_FALCON_GPCCS] = ls_ucode_img_load_gpccs,
-};
-
-/**
- * ls_ucode_img_populate_bl_desc() - populate a DMEM BL descriptor for LS image
- * @img:	ucode image to generate against
- * @desc:	descriptor to populate
- * @sb:		secure boot state to use for base addresses
- *
- * Populate the DMEM BL descriptor with the information contained in a
- * ls_ucode_desc.
- *
- */
-static void
-ls_ucode_img_populate_bl_desc(struct ls_ucode_img *img, u64 wpr_addr,
-			      struct gm200_flcn_bl_desc *desc)
-{
-	struct ls_ucode_img_desc *pdesc = &img->ucode_desc;
-	u64 addr_base;
-
-	addr_base = wpr_addr + img->lsb_header.ucode_off +
-		    pdesc->app_start_offset;
-
-	memset(desc, 0, sizeof(*desc));
-	desc->ctx_dma = FALCON_DMAIDX_UCODE;
-	desc->code_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->code_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_code_offset));
-	desc->non_sec_code_size = pdesc->app_resident_code_size;
-	desc->data_dma_base.lo = lower_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_dma_base.hi = upper_32_bits(
-		(addr_base + pdesc->app_resident_data_offset));
-	desc->data_size = pdesc->app_resident_data_size;
-	desc->code_entry_point = pdesc->app_imem_entry;
-}
-
-#define LSF_LSB_HEADER_ALIGN 256
-#define LSF_BL_DATA_ALIGN 256
-#define LSF_BL_DATA_SIZE_ALIGN 256
-#define LSF_BL_CODE_SIZE_ALIGN 256
-#define LSF_UCODE_DATA_ALIGN 4096
-
-/**
- * ls_ucode_img_fill_headers - fill the WPR and LSB headers of an image
- * @gsb:	secure boot device used
- * @img:	image to generate for
- * @offset:	offset in the WPR region where this image starts
- *
- * Allocate space in the WPR area from offset and write the WPR and LSB headers
- * accordingly.
- *
- * Return: offset at the end of this image.
- */
-static u32
-ls_ucode_img_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_img *img,
-			  u32 offset)
-{
-	struct lsf_wpr_header *whdr = &img->wpr_header;
-	struct lsf_lsb_header *lhdr = &img->lsb_header;
-	struct ls_ucode_img_desc *desc = &img->ucode_desc;
-
-	if (img->ucode_header) {
-		nvkm_fatal(&gsb->base.subdev,
-			    "images withough loader are not supported yet!\n");
-		return offset;
-	}
-
-	/* Fill WPR header */
-	whdr->falcon_id = img->falcon_id;
-	whdr->bootstrap_owner = gsb->base.func->boot_falcon;
-	whdr->status = LSF_IMAGE_STATUS_COPY;
-
-	/* Align, save off, and include an LSB header size */
-	offset = ALIGN(offset, LSF_LSB_HEADER_ALIGN);
-	whdr->lsb_offset = offset;
-	offset += sizeof(struct lsf_lsb_header);
-
-	/*
-	 * Align, save off, and include the original (static) ucode
-	 * image size
-	 */
-	offset = ALIGN(offset, LSF_UCODE_DATA_ALIGN);
-	lhdr->ucode_off = offset;
-	offset += img->ucode_size;
-
-	/*
-	 * For falcons that use a boot loader (BL), we append a loader
-	 * desc structure on the end of the ucode image and consider
-	 * this the boot loader data. The host will then copy the loader
-	 * desc args to this space within the WPR region (before locking
-	 * down) and the HS bin will then copy them to DMEM 0 for the
-	 * loader.
-	 */
-	lhdr->bl_code_size = ALIGN(desc->bootloader_size,
-				   LSF_BL_CODE_SIZE_ALIGN);
-	lhdr->ucode_size = ALIGN(desc->app_resident_data_offset,
-				 LSF_BL_CODE_SIZE_ALIGN) + lhdr->bl_code_size;
-	lhdr->data_size = ALIGN(desc->app_size, LSF_BL_CODE_SIZE_ALIGN) +
-				lhdr->bl_code_size - lhdr->ucode_size;
-	/*
-	 * Though the BL is located at 0th offset of the image, the VA
-	 * is different to make sure that it doesn't collide the actual
-	 * OS VA range
-	 */
-	lhdr->bl_imem_off = desc->bootloader_imem_offset;
-	lhdr->app_code_off = desc->app_start_offset +
-			     desc->app_resident_code_offset;
-	lhdr->app_code_size = desc->app_resident_code_size;
-	lhdr->app_data_off = desc->app_start_offset +
-			     desc->app_resident_data_offset;
-	lhdr->app_data_size = desc->app_resident_data_size;
-
-	lhdr->flags = 0;
-	if (img->falcon_id == gsb->base.func->boot_falcon)
-		lhdr->flags = LSF_FLAG_DMACTL_REQ_CTX;
-
-	/* GPCCS will be loaded using PRI */
-	if (img->falcon_id == NVKM_SECBOOT_FALCON_GPCCS)
-		lhdr->flags |= LSF_FLAG_FORCE_PRIV_LOAD;
-
-	/* Align (size bloat) and save off BL descriptor size */
-	lhdr->bl_data_size = ALIGN(sizeof(struct gm200_flcn_bl_desc),
-				   LSF_BL_DATA_SIZE_ALIGN);
-	/*
-	 * Align, save off, and include the additional BL data
-	 */
-	offset = ALIGN(offset, LSF_BL_DATA_ALIGN);
-	lhdr->bl_data_off = offset;
-	offset += lhdr->bl_data_size;
-
-	return offset;
-}
-
-static void
-ls_ucode_mgr_init(struct ls_ucode_mgr *mgr)
-{
-	memset(mgr, 0, sizeof(*mgr));
-	INIT_LIST_HEAD(&mgr->img_list);
-}
-
-static void
-ls_ucode_mgr_cleanup(struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img, *t;
-
-	list_for_each_entry_safe(img, t, &mgr->img_list, node) {
-		kfree(img->ucode_data);
-		kfree(img->ucode_header);
-		kfree(img);
-	}
-}
-
-static void
-ls_ucode_mgr_add_img(struct ls_ucode_mgr *mgr, struct ls_ucode_img *img)
-{
-	mgr->count++;
-	list_add_tail(&img->node, &mgr->img_list);
-}
-
-/**
- * ls_ucode_mgr_fill_headers - fill WPR and LSB headers of all managed images
- */
-static void
-ls_ucode_mgr_fill_headers(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr)
-{
-	struct ls_ucode_img *img;
-	u32 offset;
-
-	/*
-	 * Start with an array of WPR headers at the base of the WPR.
-	 * The expectation here is that the secure falcon will do a single DMA
-	 * read of this array and cache it internally so it's ok to pack these.
-	 * Also, we add 1 to the falcon count to indicate the end of the array.
-	 */
-	offset = sizeof(struct lsf_wpr_header) * (mgr->count + 1);
-
-	/*
-	 * Walk the managed falcons, accounting for the LSB structs
-	 * as well as the ucode images.
-	 */
-	list_for_each_entry(img, &mgr->img_list, node) {
-		offset = ls_ucode_img_fill_headers(gsb, img, offset);
-	}
-
-	mgr->wpr_size = offset;
-}
+#include <engine/falcon.h>
+#include <subdev/mc.h>
 
 /**
- * ls_ucode_mgr_write_wpr - write the WPR blob contents
- */
-static int
-ls_ucode_mgr_write_wpr(struct gm200_secboot *gsb, struct ls_ucode_mgr *mgr,
-		       struct nvkm_gpuobj *wpr_blob)
-{
-	struct ls_ucode_img *img;
-	u32 pos = 0;
-
-	nvkm_kmap(wpr_blob);
-
-	list_for_each_entry(img, &mgr->img_list, node) {
-		nvkm_gpuobj_memcpy_to(wpr_blob, pos, &img->wpr_header,
-				      sizeof(img->wpr_header));
-
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->wpr_header.lsb_offset,
-				     &img->lsb_header, sizeof(img->lsb_header));
-
-		/* Generate and write BL descriptor */
-		if (!img->ucode_header) {
-			u8 desc[gsb->func->bl_desc_size];
-			struct gm200_flcn_bl_desc gdesc;
-
-			ls_ucode_img_populate_bl_desc(img, gsb->wpr_addr,
-						      &gdesc);
-			gsb->func->fixup_bl_desc(&gdesc, &desc);
-			nvkm_gpuobj_memcpy_to(wpr_blob,
-					      img->lsb_header.bl_data_off,
-					      &desc, gsb->func->bl_desc_size);
-		}
-
-		/* Copy ucode */
-		nvkm_gpuobj_memcpy_to(wpr_blob, img->lsb_header.ucode_off,
-				      img->ucode_data, img->ucode_size);
-
-		pos += sizeof(img->wpr_header);
-	}
-
-	nvkm_wo32(wpr_blob, pos, NVKM_SECBOOT_FALCON_INVALID);
-
-	nvkm_done(wpr_blob);
-
-	return 0;
-}
-
-/* Both size and address of WPR need to be 128K-aligned */
-#define WPR_ALIGNMENT	0x20000
-/**
- * gm200_secboot_prepare_ls_blob() - prepare the LS blob
+ * gm200_secboot_run_blob() - run the given high-secure blob
  *
- * For each securely managed falcon, load the FW, signatures and bootloaders and
- * prepare a ucode blob. Then, compute the offsets in the WPR region for each
- * blob, and finally write the headers and ucode blobs into a GPU object that
- * will be copied into the WPR region by the HS firmware.
- */
-static int
-gm200_secboot_prepare_ls_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct nvkm_device *device = sb->subdev.device;
-	struct ls_ucode_mgr mgr;
-	int falcon_id;
-	int ret;
-
-	ls_ucode_mgr_init(&mgr);
-
-	/* Load all LS blobs */
-	for_each_set_bit(falcon_id, &gsb->base.func->managed_falcons,
-			 NVKM_SECBOOT_FALCON_END) {
-		struct ls_ucode_img *img;
-
-		img = ls_ucode_img_load(&sb->subdev, lsf_load_funcs[falcon_id]);
-
-		if (IS_ERR(img)) {
-			ret = PTR_ERR(img);
-			goto cleanup;
-		}
-		ls_ucode_mgr_add_img(&mgr, img);
-	}
-
-	/*
-	 * Fill the WPR and LSF headers with the right offsets and compute
-	 * required WPR size
-	 */
-	ls_ucode_mgr_fill_headers(gsb, &mgr);
-	mgr.wpr_size = ALIGN(mgr.wpr_size, WPR_ALIGNMENT);
-
-	/* Allocate GPU object that will contain the WPR region */
-	ret = nvkm_gpuobj_new(device, mgr.wpr_size, WPR_ALIGNMENT, false, NULL,
-			      &gsb->ls_blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_debug(&sb->subdev, "%d managed LS falcons, WPR size is %d bytes\n",
-		    mgr.count, mgr.wpr_size);
-
-	/* If WPR address and size are not fixed, set them to fit the LS blob */
-	if (!gsb->wpr_size) {
-		gsb->wpr_addr = gsb->ls_blob->addr;
-		gsb->wpr_size = gsb->ls_blob->size;
-	}
-
-	/* Write LS blob */
-	ret = ls_ucode_mgr_write_wpr(gsb, &mgr, gsb->ls_blob);
-	if (ret)
-		nvkm_gpuobj_del(&gsb->ls_blob);
-
-cleanup:
-	ls_ucode_mgr_cleanup(&mgr);
-
-	return ret;
-}
-
-/*
- * High-secure blob creation
- */
-
-/**
- * gm200_secboot_hsf_patch_signature() - patch HS blob with correct signature
- */
-static void
-gm200_secboot_hsf_patch_signature(struct gm200_secboot *gsb, void *acr_image)
-{
-	struct nvkm_secboot *sb = &gsb->base;
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	void *hs_data = acr_image + hsbin_hdr->data_offset;
-	void *sig;
-	u32 sig_size;
-
-	/* Falcon in debug or production mode? */
-	if ((nvkm_rd32(sb->subdev.device, sb->base + 0xc08) >> 20) & 0x1) {
-		sig = acr_image + fw_hdr->sig_dbg_offset;
-		sig_size = fw_hdr->sig_dbg_size;
-	} else {
-		sig = acr_image + fw_hdr->sig_prod_offset;
-		sig_size = fw_hdr->sig_prod_size;
-	}
-
-	/* Patch signature */
-	memcpy(hs_data + fw_hdr->patch_loc, sig + fw_hdr->patch_sig, sig_size);
-}
-
-/**
- * gm200_secboot_populate_hsf_bl_desc() - populate BL descriptor for HS image
- */
-static void
-gm200_secboot_populate_hsf_bl_desc(void *acr_image,
-				   struct gm200_flcn_bl_desc *bl_desc)
-{
-	struct fw_bin_header *hsbin_hdr = acr_image;
-	struct hsf_fw_header *fw_hdr = acr_image + hsbin_hdr->header_offset;
-	struct hsf_load_header *load_hdr = acr_image + fw_hdr->hdr_offset;
-
-	/*
-	 * Descriptor for the bootloader that will load the ACR image into
-	 * IMEM/DMEM memory.
-	 */
-	fw_hdr = acr_image + hsbin_hdr->header_offset;
-	load_hdr = acr_image + fw_hdr->hdr_offset;
-	memset(bl_desc, 0, sizeof(*bl_desc));
-	bl_desc->ctx_dma = FALCON_DMAIDX_VIRT;
-	bl_desc->non_sec_code_off = load_hdr->non_sec_code_off;
-	bl_desc->non_sec_code_size = load_hdr->non_sec_code_size;
-	bl_desc->sec_code_off = load_hdr->app[0].sec_code_off;
-	bl_desc->sec_code_size = load_hdr->app[0].sec_code_size;
-	bl_desc->code_entry_point = 0;
-	/*
-	 * We need to set code_dma_base to the virtual address of the acr_blob,
-	 * and add this address to data_dma_base before writing it into DMEM
-	 */
-	bl_desc->code_dma_base.lo = 0;
-	bl_desc->data_dma_base.lo = load_hdr->data_dma_base;
-	bl_desc->data_size = load_hdr->data_size;
-}
-
-/**
- * gm200_secboot_prepare_hs_blob - load and prepare a HS blob and BL descriptor
- *
- * @gsb secure boot instance to prepare for
- * @fw name of the HS firmware to load
- * @blob pointer to gpuobj that will be allocated to receive the HS FW payload
- * @bl_desc pointer to the BL descriptor to write for this firmware
- * @patch whether we should patch the HS descriptor (only for HS loaders)
- */
-static int
-gm200_secboot_prepare_hs_blob(struct gm200_secboot *gsb, const char *fw,
-			      struct nvkm_gpuobj **blob,
-			      struct gm200_flcn_bl_desc *bl_desc, bool patch)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	void *acr_image;
-	struct fw_bin_header *hsbin_hdr;
-	struct hsf_fw_header *fw_hdr;
-	void *acr_data;
-	struct hsf_load_header *load_hdr;
-	struct hsflcn_acr_desc *desc;
-	int ret;
-
-	acr_image = gm200_secboot_load_firmware(subdev, fw, 0);
-	if (IS_ERR(acr_image))
-		return PTR_ERR(acr_image);
-	hsbin_hdr = acr_image;
-
-	/* Patch signature */
-	gm200_secboot_hsf_patch_signature(gsb, acr_image);
-
-	acr_data = acr_image + hsbin_hdr->data_offset;
-
-	/* Patch descriptor? */
-	if (patch) {
-		fw_hdr = acr_image + hsbin_hdr->header_offset;
-		load_hdr = acr_image + fw_hdr->hdr_offset;
-		desc = acr_data + load_hdr->data_dma_base;
-		gsb->func->fixup_hs_desc(gsb, desc);
-	}
-
-	/* Generate HS BL descriptor */
-	gm200_secboot_populate_hsf_bl_desc(acr_image, bl_desc);
-
-	/* Create ACR blob and copy HS data to it */
-	ret = nvkm_gpuobj_new(subdev->device, ALIGN(hsbin_hdr->data_size, 256),
-			      0x1000, false, NULL, blob);
-	if (ret)
-		goto cleanup;
-
-	nvkm_kmap(*blob);
-	nvkm_gpuobj_memcpy_to(*blob, 0, acr_data, hsbin_hdr->data_size);
-	nvkm_done(*blob);
-
-cleanup:
-	kfree(acr_image);
-
-	return ret;
-}
-
-/*
- * High-secure bootloader blob creation
- */
-
-static int
-gm200_secboot_prepare_hsbl_blob(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-
-	gsb->hsbl_blob = gm200_secboot_load_firmware(subdev, "acr/bl", 0);
-	if (IS_ERR(gsb->hsbl_blob)) {
-		int ret = PTR_ERR(gsb->hsbl_blob);
-
-		gsb->hsbl_blob = NULL;
-		return ret;
-	}
-
-	return 0;
-}
-
-/**
- * gm20x_secboot_prepare_blobs - load blobs common to all GM20X GPUs.
- *
- * This includes the LS blob, HS ucode loading blob, and HS bootloader.
- *
- * The HS ucode unload blob is only used on dGPU.
  */
 int
-gm20x_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	/* Load and prepare the managed falcon's firmwares */
-	if (!gsb->ls_blob) {
-		ret = gm200_secboot_prepare_ls_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware that will load the LS firmwares */
-	if (!gsb->acr_load_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_load",
-						&gsb->acr_load_blob,
-						&gsb->acr_load_bl_desc, true);
-		if (ret)
-			return ret;
-	}
-
-	/* Load the HS firmware bootloader */
-	if (!gsb->hsbl_blob) {
-		ret = gm200_secboot_prepare_hsbl_blob(gsb);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	/* dGPU only: load the HS firmware that unprotects the WPR region */
-	if (!gsb->acr_unload_blob) {
-		ret = gm200_secboot_prepare_hs_blob(gsb, "acr/ucode_unload",
-					       &gsb->acr_unload_blob,
-					       &gsb->acr_unload_bl_desc, false);
-		if (ret)
-			return ret;
-	}
-
-	return 0;
-}
-
-static int
-gm200_secboot_blobs_ready(struct gm200_secboot *gsb)
+gm200_secboot_run_blob(struct nvkm_secboot *sb, struct nvkm_gpuobj *blob)
 {
+	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_subdev *subdev = &gsb->base.subdev;
+	struct nvkm_falcon *falcon = gsb->base.boot_falcon;
+	struct nvkm_vma vma;
 	int ret;
 
-	/* firmware already loaded, nothing to do... */
-	if (gsb->firmware_ok)
-		return 0;
-
-	ret = gsb->func->prepare_blobs(gsb);
-	if (ret) {
-		nvkm_error(subdev, "failed to load secure firmware\n");
-		return ret;
-	}
-
-	gsb->firmware_ok = true;
-
-	return 0;
-}
-
-
-/*
- * Secure Boot Execution
- */
-
-/**
- * gm200_secboot_load_hs_bl() - load HS bootloader into DMEM and IMEM
- */
-static void
-gm200_secboot_load_hs_bl(struct gm200_secboot *gsb, void *data, u32 data_size)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	void *blob_data = gsb->hsbl_blob + hdr->data_offset;
-	void *hsbl_code = blob_data + hsbl_desc->code_off;
-	void *hsbl_data = blob_data + hsbl_desc->data_off;
-	u32 code_size = ALIGN(hsbl_desc->code_size, 256);
-	const u32 base = gsb->base.base;
-	u32 blk;
-	u32 tag;
-	int i;
-
-	/*
-	 * Copy HS bootloader data
-	 */
-	nvkm_wr32(device, base + 0x1c0, (0x00000000 | (0x1 << 24)));
-	for (i = 0; i < hsbl_desc->data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)hsbl_data)[i]);
-
-	/*
-	 * Copy HS bootloader interface structure where the HS descriptor
-	 * expects it to be
-	 */
-	nvkm_wr32(device, base + 0x1c0,
-		  (hsbl_desc->dmem_load_off | (0x1 << 24)));
-	for (i = 0; i < data_size / 4; i++)
-		nvkm_wr32(device, base + 0x1c4, ((u32 *)data)[i]);
-
-	/* Copy HS bootloader code to end of IMEM */
-	blk = (nvkm_rd32(device, base + 0x108) & 0x1ff) - (code_size >> 8);
-	tag = hsbl_desc->start_tag;
-	nvkm_wr32(device, base + 0x180, ((blk & 0xff) << 8) | (0x1 << 24));
-	for (i = 0; i < code_size / 4; i++) {
-		/* write new tag every 256B */
-		if ((i & 0x3f) == 0) {
-			nvkm_wr32(device, base + 0x188, tag & 0xffff);
-			tag++;
-		}
-		nvkm_wr32(device, base + 0x184, ((u32 *)hsbl_code)[i]);
-	}
-	nvkm_wr32(device, base + 0x188, 0);
-}
-
-/**
- * gm200_secboot_setup_falcon() - set up the secure falcon for secure boot
- */
-static int
-gm200_secboot_setup_falcon(struct gm200_secboot *gsb)
-{
-	struct nvkm_device *device = gsb->base.subdev.device;
-	struct fw_bin_header *hdr = gsb->hsbl_blob;
-	struct fw_bl_desc *hsbl_desc = gsb->hsbl_blob + hdr->header_offset;
-	/* virtual start address for boot vector */
-	u32 virt_addr = hsbl_desc->start_tag << 8;
-	const u32 base = gsb->base.base;
-	const u32 reg_base = base + 0xe00;
-	u32 inst_loc;
-	int ret;
-
-	ret = nvkm_secboot_falcon_reset(&gsb->base);
+	ret = nvkm_falcon_get(falcon, subdev);
 	if (ret)
 		return ret;
 
-	/* setup apertures - virtual */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_UCODE), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_VIRT), 0x0);
-	/* setup apertures - physical */
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_VID), 0x4);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_COH),
-		  0x4 | 0x1);
-	nvkm_wr32(device, reg_base + 4 * (FALCON_DMAIDX_PHYS_SYS_NCOH),
-		  0x4 | 0x2);
-
-	/* Set context */
-	if (nvkm_memory_target(gsb->inst->memory) == NVKM_MEM_TARGET_VRAM)
-		inst_loc = 0x0; /* FB */
-	else
-		inst_loc = 0x3; /* Non-coherent sysmem */
-
-	nvkm_mask(device, base + 0x048, 0x1, 0x1);
-	nvkm_wr32(device, base + 0x480,
-		  ((gsb->inst->addr >> 12) & 0xfffffff) |
-		  (inst_loc << 28) | (1 << 30));
-
-	/* Set boot vector to code's starting virtual address */
-	nvkm_wr32(device, base + 0x104, virt_addr);
-
-	return 0;
-}
-
-/**
- * gm200_secboot_run_hs_blob() - run the given high-secure blob
- */
-static int
-gm200_secboot_run_hs_blob(struct gm200_secboot *gsb, struct nvkm_gpuobj *blob,
-			  struct gm200_flcn_bl_desc *desc)
-{
-	struct nvkm_vma vma;
-	u64 vma_addr;
-	const u32 bl_desc_size = gsb->func->bl_desc_size;
-	u8 bl_desc[bl_desc_size];
-	int ret;
-
 	/* Map the HS firmware so the HS bootloader can see it */
 	ret = nvkm_gpuobj_map(blob, gsb->vm, NV_MEM_ACCESS_RW, &vma);
-	if (ret)
+	if (ret) {
+		nvkm_falcon_put(falcon, subdev);
 		return ret;
+	}
 
-	/* Add the mapping address to the DMA bases */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) + vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) + vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
-
-	/* Fixup the BL header */
-	gsb->func->fixup_bl_desc(desc, &bl_desc);
-
-	/* Reset the falcon and make it ready to run the HS bootloader */
-	ret = gm200_secboot_setup_falcon(gsb);
+	/* Reset and set the falcon up */
+	ret = nvkm_falcon_reset(falcon);
 	if (ret)
-		goto done;
+		goto end;
+	nvkm_falcon_bind_context(falcon, gsb->inst);
 
 	/* Load the HS bootloader into the falcon's IMEM/DMEM */
-	gm200_secboot_load_hs_bl(gsb, &bl_desc, bl_desc_size);
-
-	/* Start the HS bootloader */
-	ret = nvkm_secboot_falcon_run(&gsb->base);
+	ret = sb->acr->func->load(sb->acr, &gsb->base, blob, vma.offset);
 	if (ret)
-		goto done;
+		goto end;
 
-done:
-	/* Restore the original DMA addresses */
-	vma_addr = flcn64_to_u64(desc->code_dma_base) - vma.offset;
-	desc->code_dma_base.lo = lower_32_bits(vma_addr);
-	desc->code_dma_base.hi = upper_32_bits(vma_addr);
-	vma_addr = flcn64_to_u64(desc->data_dma_base) - vma.offset;
-	desc->data_dma_base.lo = lower_32_bits(vma_addr);
-	desc->data_dma_base.hi = upper_32_bits(vma_addr);
+	/* Disable interrupts as we will poll for the HALT bit */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, false);
 
-	/* We don't need the ACR firmware anymore */
-	nvkm_gpuobj_unmap(&vma);
-
-	return ret;
-}
+	/* Set default error value in mailbox register */
+	nvkm_falcon_wr32(falcon, 0x040, 0xdeada5a5);
 
-/*
- * gm200_secboot_reset() - execute secure boot from the prepared state
- *
- * Load the HS bootloader and ask the falcon to run it. This will in turn
- * load the HS firmware and run it, so once the falcon stops all the managed
- * falcons should have their LS firmware loaded and be ready to run.
- */
-int
-gm200_secboot_reset(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int ret;
-
-	/* Make sure all blobs are ready */
-	ret = gm200_secboot_blobs_ready(gsb);
+	/* Start the HS bootloader */
+	nvkm_falcon_set_start_addr(falcon, sb->acr->start_address);
+	nvkm_falcon_start(falcon);
+	ret = nvkm_falcon_wait_for_halt(falcon, 100);
 	if (ret)
-		return ret;
-
-	/*
-	 * Dummy GM200 implementation: perform secure boot each time we are
-	 * called on FECS. Since only FECS and GPCCS are managed and started
-	 * together, this ought to be safe.
-	 *
-	 * Once we have proper PMU firmware and support, this will be changed
-	 * to a proper call to the PMU method.
-	 */
-	if (falcon != NVKM_SECBOOT_FALCON_FECS)
 		goto end;
 
-	/* If WPR is set and we have an unload blob, run it to unlock WPR */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE) {
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
-		if (ret)
-			return ret;
+	/* If mailbox register contains an error code, then ACR has failed */
+	ret = nvkm_falcon_rd32(falcon, 0x040);
+	if (ret) {
+		nvkm_error(subdev, "ACR boot failed, ret 0x%08x", ret);
+		ret = -EINVAL;
+		goto end;
 	}
 
-	/* Reload all managed falcons */
-	ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_load_blob,
-					&gsb->acr_load_bl_desc);
-	if (ret)
-		return ret;
-
 end:
-	gsb->falcon_state[falcon] = RESET;
-	return 0;
-}
+	/* Reenable interrupts */
+	nvkm_mc_intr_mask(sb->subdev.device, falcon->owner->index, true);
 
-int
-gm200_secboot_start(struct nvkm_secboot *sb, enum nvkm_secboot_falcon falcon)
-{
-	struct gm200_secboot *gsb = gm200_secboot(sb);
-	int base;
-
-	switch (falcon) {
-	case NVKM_SECBOOT_FALCON_FECS:
-		base = 0x409000;
-		break;
-	case NVKM_SECBOOT_FALCON_GPCCS:
-		base = 0x41a000;
-		break;
-	default:
-		nvkm_error(&sb->subdev, "cannot start unhandled falcon!\n");
-		return -EINVAL;
-	}
-
-	nvkm_wr32(sb->subdev.device, base + 0x130, 0x00000002);
-	gsb->falcon_state[falcon] = RUNNING;
+	/* We don't need the ACR firmware anymore */
+	nvkm_gpuobj_unmap(&vma);
+	nvkm_falcon_put(falcon, subdev);
 
-	return 0;
+	return ret;
 }
 
-
-
 int
-gm200_secboot_init(struct nvkm_secboot *sb)
+gm200_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	struct nvkm_device *device = sb->subdev.device;
@@ -1361,24 +132,22 @@ gm200_secboot_init(struct nvkm_secboot *sb)
 	nvkm_wo32(gsb->inst, 0x20c, upper_32_bits(vm_area_len - 1));
 	nvkm_done(gsb->inst);
 
+	if (sb->acr->func->oneinit) {
+		ret = sb->acr->func->oneinit(sb->acr, sb);
+		if (ret)
+			return ret;
+	}
+
 	return 0;
 }
 
 int
 gm200_secboot_fini(struct nvkm_secboot *sb, bool suspend)
 {
-	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret = 0;
-	int i;
-
-	/* Run the unload blob to unprotect the WPR region */
-	if (gsb->acr_unload_blob &&
-	    gsb->falcon_state[NVKM_SECBOOT_FALCON_FECS] != NON_SECURE)
-		ret = gm200_secboot_run_hs_blob(gsb, gsb->acr_unload_blob,
-						&gsb->acr_unload_bl_desc);
 
-	for (i = 0; i < NVKM_SECBOOT_FALCON_END; i++)
-		gsb->falcon_state[i] = NON_SECURE;
+	if (sb->acr->func->fini)
+		ret = sb->acr->func->fini(sb->acr, sb, suspend);
 
 	return ret;
 }
@@ -1388,11 +157,7 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 
-	nvkm_gpuobj_del(&gsb->acr_unload_blob);
-
-	kfree(gsb->hsbl_blob);
-	nvkm_gpuobj_del(&gsb->acr_load_blob);
-	nvkm_gpuobj_del(&gsb->ls_blob);
+	sb->acr->func->dtor(sb->acr);
 
 	nvkm_vm_ref(NULL, &gsb->vm, gsb->pgd);
 	nvkm_gpuobj_del(&gsb->pgd);
@@ -1405,50 +170,9 @@ gm200_secboot_dtor(struct nvkm_secboot *sb)
 static const struct nvkm_secboot_func
 gm200_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm200_secboot_init,
+	.oneinit = gm200_secboot_oneinit,
 	.fini = gm200_secboot_fini,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS) |
-			   BIT(NVKM_SECBOOT_FALCON_GPCCS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
-};
-
-/**
- * gm200_fixup_bl_desc - just copy the BL descriptor
- *
- * Use the GM200 descriptor format by default.
- */
-static void
-gm200_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	memcpy(ret, desc, sizeof(*desc));
-}
-
-static void
-gm200_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-
-	/* WPR region information for the HS binary to set up */
-	desc->wpr_region_id = 1;
-	desc->regions.no_regions = 1;
-	desc->regions.region_props[0].region_id = 1;
-	desc->regions.region_props[0].start_addr = gsb->wpr_addr >> 8;
-	desc->regions.region_props[0].end_addr =
-		(gsb->wpr_addr + gsb->wpr_size) >> 8;
-}
-
-static const struct gm200_secboot_func
-gm200_secboot_func = {
-	.bl_desc_size = sizeof(struct gm200_flcn_bl_desc),
-	.fixup_bl_desc = gm200_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm200_secboot_fixup_hs_desc,
-	.prepare_blobs = gm200_secboot_prepare_blobs,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -1457,6 +181,12 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r361_new(BIT(NVKM_SECBOOT_FALCON_FECS) |
+			   BIT(NVKM_SECBOOT_FALCON_GPCCS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -1465,15 +195,14 @@ gm200_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm200_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm200_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm200_secboot_func;
-
 	return 0;
 }
 
+
 MODULE_FIRMWARE("nvidia/gm200/acr/bl.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_load.bin");
 MODULE_FIRMWARE("nvidia/gm200/acr/ucode_unload.bin");
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
new file mode 100644
index 000000000000..45adf1a3bc20
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm200.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_GM200_H__
+#define __NVKM_SECBOOT_GM200_H__
+
+#include "priv.h"
+
+struct gm200_secboot {
+	struct nvkm_secboot base;
+
+	/* Instance block & address space used for HS FW execution */
+	struct nvkm_gpuobj *inst;
+	struct nvkm_gpuobj *pgd;
+	struct nvkm_vm *vm;
+};
+#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
+
+int gm200_secboot_oneinit(struct nvkm_secboot *);
+int gm200_secboot_fini(struct nvkm_secboot *, bool);
+void *gm200_secboot_dtor(struct nvkm_secboot *);
+int gm200_secboot_run_blob(struct nvkm_secboot *, struct nvkm_gpuobj *);
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
index d5395ebfe8d3..6707b8edc086 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/gm20b.c
@@ -20,103 +20,8 @@
  * DEALINGS IN THE SOFTWARE.
  */
 
-#include "priv.h"
-
-#include <core/gpuobj.h>
-
-/*
- * The BL header format used by GM20B's firmware is slightly different
- * from the one of GM200. Fix the differences here.
- */
-struct gm20b_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	u32 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	u32 data_dma_base;
-	u32 data_size;
-};
-
-static int
-gm20b_secboot_prepare_blobs(struct gm200_secboot *gsb)
-{
-	struct nvkm_subdev *subdev = &gsb->base.subdev;
-	int acr_size;
-	int ret;
-
-	ret = gm20x_secboot_prepare_blobs(gsb);
-	if (ret)
-		return ret;
-
-	acr_size = gsb->acr_load_blob->size;
-	/*
-	 * On Tegra the WPR region is set by the bootloader. It is illegal for
-	 * the HS blob to be larger than this region.
-	 */
-	if (acr_size > gsb->wpr_size) {
-		nvkm_error(subdev, "WPR region too small for FW blob!\n");
-		nvkm_error(subdev, "required: %dB\n", acr_size);
-		nvkm_error(subdev, "WPR size: %dB\n", gsb->wpr_size);
-		return -ENOSPC;
-	}
-
-	return 0;
-}
-
-/**
- * gm20b_secboot_fixup_bl_desc - adapt BL descriptor to format used by GM20B FW
- *
- * There is only a slight format difference (DMA addresses being 32-bits and
- * 256B-aligned) to address.
- */
-static void
-gm20b_secboot_fixup_bl_desc(const struct gm200_flcn_bl_desc *desc, void *ret)
-{
-	struct gm20b_flcn_bl_desc *gdesc = ret;
-	u64 addr;
-
-	memcpy(gdesc->reserved, desc->reserved, sizeof(gdesc->reserved));
-	memcpy(gdesc->signature, desc->signature, sizeof(gdesc->signature));
-	gdesc->ctx_dma = desc->ctx_dma;
-	addr = desc->code_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->code_dma_base.lo;
-	gdesc->code_dma_base = lower_32_bits(addr >> 8);
-	gdesc->non_sec_code_off = desc->non_sec_code_off;
-	gdesc->non_sec_code_size = desc->non_sec_code_size;
-	gdesc->sec_code_off = desc->sec_code_off;
-	gdesc->sec_code_size = desc->sec_code_size;
-	gdesc->code_entry_point = desc->code_entry_point;
-	addr = desc->data_dma_base.hi;
-	addr <<= 32;
-	addr |= desc->data_dma_base.lo;
-	gdesc->data_dma_base = lower_32_bits(addr >> 8);
-	gdesc->data_size = desc->data_size;
-}
-
-static void
-gm20b_secboot_fixup_hs_desc(struct gm200_secboot *gsb,
-			    struct hsflcn_acr_desc *desc)
-{
-	desc->ucode_blob_base = gsb->ls_blob->addr;
-	desc->ucode_blob_size = gsb->ls_blob->size;
-
-	desc->wpr_offset = 0;
-}
-
-static const struct gm200_secboot_func
-gm20b_secboot_func = {
-	.bl_desc_size = sizeof(struct gm20b_flcn_bl_desc),
-	.fixup_bl_desc = gm20b_secboot_fixup_bl_desc,
-	.fixup_hs_desc = gm20b_secboot_fixup_hs_desc,
-	.prepare_blobs = gm20b_secboot_prepare_blobs,
-};
-
+#include "acr.h"
+#include "gm200.h"
 
 #ifdef CONFIG_ARCH_TEGRA
 #define TEGRA_MC_BASE				0x70019000
@@ -144,15 +49,15 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 		nvkm_error(&sb->subdev, "Cannot map Tegra MC registers\n");
 		return PTR_ERR(mc);
 	}
-	gsb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
+	sb->wpr_addr = ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_0) |
 	      ((u64)ioread32_native(mc + MC_SECURITY_CARVEOUT2_BOM_HI_0) << 32);
-	gsb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
+	sb->wpr_size = ioread32_native(mc + MC_SECURITY_CARVEOUT2_SIZE_128K)
 		<< 17;
 	cfg = ioread32_native(mc + MC_SECURITY_CARVEOUT2_CFG0);
 	iounmap(mc);
 
 	/* Check that WPR settings are valid */
-	if (gsb->wpr_size == 0) {
+	if (sb->wpr_size == 0) {
 		nvkm_error(&sb->subdev, "WPR region is empty\n");
 		return -EINVAL;
 	}
@@ -174,7 +79,7 @@ gm20b_tegra_read_wpr(struct gm200_secboot *gsb)
 #endif
 
 static int
-gm20b_secboot_init(struct nvkm_secboot *sb)
+gm20b_secboot_oneinit(struct nvkm_secboot *sb)
 {
 	struct gm200_secboot *gsb = gm200_secboot(sb);
 	int ret;
@@ -183,17 +88,15 @@ gm20b_secboot_init(struct nvkm_secboot *sb)
 	if (ret)
 		return ret;
 
-	return gm200_secboot_init(sb);
+	return gm200_secboot_oneinit(sb);
 }
 
 static const struct nvkm_secboot_func
 gm20b_secboot = {
 	.dtor = gm200_secboot_dtor,
-	.init = gm20b_secboot_init,
-	.reset = gm200_secboot_reset,
-	.start = gm200_secboot_start,
-	.managed_falcons = BIT(NVKM_SECBOOT_FALCON_FECS),
-	.boot_falcon = NVKM_SECBOOT_FALCON_PMU,
+	.oneinit = gm20b_secboot_oneinit,
+	.fini = gm200_secboot_fini,
+	.run_blob = gm200_secboot_run_blob,
 };
 
 int
@@ -202,6 +105,11 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 {
 	int ret;
 	struct gm200_secboot *gsb;
+	struct nvkm_acr *acr;
+
+	acr = acr_r352_new(BIT(NVKM_SECBOOT_FALCON_FECS));
+	if (IS_ERR(acr))
+		return PTR_ERR(acr);
 
 	gsb = kzalloc(sizeof(*gsb), GFP_KERNEL);
 	if (!gsb) {
@@ -210,12 +118,10 @@ gm20b_secboot_new(struct nvkm_device *device, int index,
 	}
 	*psb = &gsb->base;
 
-	ret = nvkm_secboot_ctor(&gm20b_secboot, device, index, &gsb->base);
+	ret = nvkm_secboot_ctor(&gm20b_secboot, acr, device, index, &gsb->base);
 	if (ret)
 		return ret;
 
-	gsb->func = &gm20b_secboot_func;
-
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
new file mode 100644
index 000000000000..00886cee57eb
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode.h
@@ -0,0 +1,151 @@
+/*
+ * Copyright (c) 2014, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __NVKM_SECBOOT_LS_UCODE_H__
+#define __NVKM_SECBOOT_LS_UCODE_H__
+
+#include <core/os.h>
+#include <core/subdev.h>
+#include <subdev/secboot.h>
+
+
+/**
+ * struct ls_ucode_img_desc - descriptor of firmware image
+ * @descriptor_size:		size of this descriptor
+ * @image_size:			size of the whole image
+ * @bootloader_start_offset:	start offset of the bootloader in ucode image
+ * @bootloader_size:		size of the bootloader
+ * @bootloader_imem_offset:	start off set of the bootloader in IMEM
+ * @bootloader_entry_point:	entry point of the bootloader in IMEM
+ * @app_start_offset:		start offset of the LS firmware
+ * @app_size:			size of the LS firmware's code and data
+ * @app_imem_offset:		offset of the app in IMEM
+ * @app_imem_entry:		entry point of the app in IMEM
+ * @app_dmem_offset:		offset of the data in DMEM
+ * @app_resident_code_offset:	offset of app code from app_start_offset
+ * @app_resident_code_size:	size of the code
+ * @app_resident_data_offset:	offset of data from app_start_offset
+ * @app_resident_data_size:	size of data
+ *
+ * A firmware image contains the code, data, and bootloader of a given LS
+ * falcon in a single blob. This structure describes where everything is.
+ *
+ * This can be generated from a (bootloader, code, data) set if they have
+ * been loaded separately, or come directly from a file.
+ */
+struct ls_ucode_img_desc {
+	u32 descriptor_size;
+	u32 image_size;
+	u32 tools_version;
+	u32 app_version;
+	char date[64];
+	u32 bootloader_start_offset;
+	u32 bootloader_size;
+	u32 bootloader_imem_offset;
+	u32 bootloader_entry_point;
+	u32 app_start_offset;
+	u32 app_size;
+	u32 app_imem_offset;
+	u32 app_imem_entry;
+	u32 app_dmem_offset;
+	u32 app_resident_code_offset;
+	u32 app_resident_code_size;
+	u32 app_resident_data_offset;
+	u32 app_resident_data_size;
+	u32 nb_overlays;
+	struct {u32 start; u32 size; } load_ovl[64];
+	u32 compressed;
+};
+
+/**
+ * struct ls_ucode_img - temporary storage for loaded LS firmwares
+ * @node:		to link within lsf_ucode_mgr
+ * @falcon_id:		ID of the falcon this LS firmware is for
+ * @ucode_desc:		loaded or generated map of ucode_data
+ * @ucode_data:		firmware payload (code and data)
+ * @ucode_size:		size in bytes of data in ucode_data
+ * @sig:		signature for this firmware
+ * @sig:size:		size of the signature in bytes
+ *
+ * Preparing the WPR LS blob requires information about all the LS firmwares
+ * (size, etc) to be known. This structure contains all the data of one LS
+ * firmware.
+ */
+struct ls_ucode_img {
+	struct list_head node;
+	enum nvkm_secboot_falcon falcon_id;
+
+	struct ls_ucode_img_desc ucode_desc;
+	u8 *ucode_data;
+	u32 ucode_size;
+
+	u8 *sig;
+	u32 sig_size;
+};
+
+/**
+ * struct fw_bin_header - header of firmware files
+ * @bin_magic:		always 0x3b1d14f0
+ * @bin_ver:		version of the bin format
+ * @bin_size:		entire image size including this header
+ * @header_offset:	offset of the firmware/bootloader header in the file
+ * @data_offset:	offset of the firmware/bootloader payload in the file
+ * @data_size:		size of the payload
+ *
+ * This header is located at the beginning of the HS firmware and HS bootloader
+ * files, to describe where the headers and data can be found.
+ */
+struct fw_bin_header {
+	u32 bin_magic;
+	u32 bin_ver;
+	u32 bin_size;
+	u32 header_offset;
+	u32 data_offset;
+	u32 data_size;
+};
+
+/**
+ * struct fw_bl_desc - firmware bootloader descriptor
+ * @start_tag:		starting tag of bootloader
+ * @desc_dmem_load_off:	DMEM offset of flcn_bl_dmem_desc
+ * @code_off:		offset of code section
+ * @code_size:		size of code section
+ * @data_off:		offset of data section
+ * @data_size:		size of data section
+ *
+ * This structure is embedded in bootloader firmware files at to describe the
+ * IMEM and DMEM layout expected by the bootloader.
+ */
+struct fw_bl_desc {
+	u32 start_tag;
+	u32 dmem_load_off;
+	u32 code_off;
+	u32 code_size;
+	u32 data_off;
+	u32 data_size;
+};
+
+int acr_ls_ucode_load_fecs(const struct nvkm_subdev *, struct ls_ucode_img *);
+int acr_ls_ucode_load_gpccs(const struct nvkm_subdev *, struct ls_ucode_img *);
+
+
+#endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
new file mode 100644
index 000000000000..40a6df77bb8a
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/ls_ucode_gr.c
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include "ls_ucode.h"
+#include "acr.h"
+
+#include <core/firmware.h>
+
+#define BL_DESC_BLK_SIZE 256
+/**
+ * Build a ucode image and descriptor from provided bootloader, code and data.
+ *
+ * @bl:		bootloader image, including 16-bytes descriptor
+ * @code:	LS firmware code segment
+ * @data:	LS firmware data segment
+ * @desc:	ucode descriptor to be written
+ *
+ * Return: allocated ucode image with corresponding descriptor information. desc
+ *         is also updated to contain the right offsets within returned image.
+ */
+static void *
+ls_ucode_img_build(const struct firmware *bl, const struct firmware *code,
+		   const struct firmware *data, struct ls_ucode_img_desc *desc)
+{
+	struct fw_bin_header *bin_hdr = (void *)bl->data;
+	struct fw_bl_desc *bl_desc = (void *)bl->data + bin_hdr->header_offset;
+	void *bl_data = (void *)bl->data + bin_hdr->data_offset;
+	u32 pos = 0;
+	void *image;
+
+	desc->bootloader_start_offset = pos;
+	desc->bootloader_size = ALIGN(bl_desc->code_size, sizeof(u32));
+	desc->bootloader_imem_offset = bl_desc->start_tag * 256;
+	desc->bootloader_entry_point = bl_desc->start_tag * 256;
+
+	pos = ALIGN(pos + desc->bootloader_size, BL_DESC_BLK_SIZE);
+	desc->app_start_offset = pos;
+	desc->app_size = ALIGN(code->size, BL_DESC_BLK_SIZE) +
+			 ALIGN(data->size, BL_DESC_BLK_SIZE);
+	desc->app_imem_offset = 0;
+	desc->app_imem_entry = 0;
+	desc->app_dmem_offset = 0;
+	desc->app_resident_code_offset = 0;
+	desc->app_resident_code_size = ALIGN(code->size, BL_DESC_BLK_SIZE);
+
+	pos = ALIGN(pos + desc->app_resident_code_size, BL_DESC_BLK_SIZE);
+	desc->app_resident_data_offset = pos - desc->app_start_offset;
+	desc->app_resident_data_size = ALIGN(data->size, BL_DESC_BLK_SIZE);
+
+	desc->image_size = ALIGN(bl_desc->code_size, BL_DESC_BLK_SIZE) +
+			   desc->app_size;
+
+	image = kzalloc(desc->image_size, GFP_KERNEL);
+	if (!image)
+		return ERR_PTR(-ENOMEM);
+
+	memcpy(image + desc->bootloader_start_offset, bl_data,
+	       bl_desc->code_size);
+	memcpy(image + desc->app_start_offset, code->data, code->size);
+	memcpy(image + desc->app_start_offset + desc->app_resident_data_offset,
+	       data->data, data->size);
+
+	return image;
+}
+
+/**
+ * ls_ucode_img_load_gr() - load and prepare a LS GR ucode image
+ *
+ * Load the LS microcode, bootloader and signature and pack them into a single
+ * blob. Also generate the corresponding ucode descriptor.
+ */
+static int
+ls_ucode_img_load_gr(const struct nvkm_subdev *subdev, struct ls_ucode_img *img,
+		     const char *falcon_name)
+{
+	const struct firmware *bl, *code, *data, *sig;
+	char f[64];
+	int ret;
+
+	snprintf(f, sizeof(f), "gr/%s_bl", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &bl);
+	if (ret)
+		goto error;
+
+	snprintf(f, sizeof(f), "gr/%s_inst", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &code);
+	if (ret)
+		goto free_bl;
+
+	snprintf(f, sizeof(f), "gr/%s_data", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &data);
+	if (ret)
+		goto free_inst;
+
+	snprintf(f, sizeof(f), "gr/%s_sig", falcon_name);
+	ret = nvkm_firmware_get(subdev->device, f, &sig);
+	if (ret)
+		goto free_data;
+	img->sig = kmemdup(sig->data, sig->size, GFP_KERNEL);
+	if (!img->sig) {
+		ret = -ENOMEM;
+		goto free_sig;
+	}
+	img->sig_size = sig->size;
+
+	img->ucode_data = ls_ucode_img_build(bl, code, data,
+					     &img->ucode_desc);
+	if (IS_ERR(img->ucode_data)) {
+		ret = PTR_ERR(img->ucode_data);
+		goto free_data;
+	}
+	img->ucode_size = img->ucode_desc.image_size;
+
+free_sig:
+	nvkm_firmware_put(sig);
+free_data:
+	nvkm_firmware_put(data);
+free_inst:
+	nvkm_firmware_put(code);
+free_bl:
+	nvkm_firmware_put(bl);
+error:
+	return ret;
+}
+
+int
+acr_ls_ucode_load_fecs(const struct nvkm_subdev *subdev,
+		       struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "fecs");
+}
+
+int
+acr_ls_ucode_load_gpccs(const struct nvkm_subdev *subdev,
+			struct ls_ucode_img *img)
+{
+	return ls_ucode_img_load_gr(subdev, img, "gpccs");
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
index a9a8a0e1017e..936a65f5658c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/secboot/priv.h
@@ -27,20 +27,16 @@
 #include <subdev/mmu.h>
 
 struct nvkm_secboot_func {
-	int (*init)(struct nvkm_secboot *);
+	int (*oneinit)(struct nvkm_secboot *);
 	int (*fini)(struct nvkm_secboot *, bool suspend);
 	void *(*dtor)(struct nvkm_secboot *);
-	int (*reset)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-	int (*start)(struct nvkm_secboot *, enum nvkm_secboot_falcon);
-
-	/* ID of the falcon that will perform secure boot */
-	enum nvkm_secboot_falcon boot_falcon;
-	/* Bit-mask of IDs of managed falcons */
-	unsigned long managed_falcons;
+	int (*run_blob)(struct nvkm_secboot *, struct nvkm_gpuobj *);
 };
 
-int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_device *,
-		      int index, struct nvkm_secboot *);
+extern const char *nvkm_secboot_falcon_name[];
+
+int nvkm_secboot_ctor(const struct nvkm_secboot_func *, struct nvkm_acr *,
+		      struct nvkm_device *, int, struct nvkm_secboot *);
 int nvkm_secboot_falcon_reset(struct nvkm_secboot *);
 int nvkm_secboot_falcon_run(struct nvkm_secboot *);
 
@@ -48,187 +44,20 @@ struct flcn_u64 {
 	u32 lo;
 	u32 hi;
 };
+
 static inline u64 flcn64_to_u64(const struct flcn_u64 f)
 {
 	return ((u64)f.hi) << 32 | f.lo;
 }
 
-/**
- * struct gm200_flcn_bl_desc - DMEM bootloader descriptor
- * @signature:		16B signature for secure code. 0s if no secure code
- * @ctx_dma:		DMA context to be used by BL while loading code/data
- * @code_dma_base:	256B-aligned Physical FB Address where code is located
- *			(falcon's $xcbase register)
- * @non_sec_code_off:	offset from code_dma_base where the non-secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @non_sec_code_size:	the size of the nonSecure code part.
- * @sec_code_off:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @sec_code_size:	offset from code_dma_base where the secure code is
- *                      located. The offset must be multiple of 256 to help perf
- * @code_entry_point:	code entry point which will be invoked by BL after
- *                      code is loaded.
- * @data_dma_base:	256B aligned Physical FB Address where data is located.
- *			(falcon's $xdbase register)
- * @data_size:		size of data block. Should be multiple of 256B
- *
- * Structure used by the bootloader to load the rest of the code. This has
- * to be filled by host and copied into DMEM at offset provided in the
- * hsflcn_bl_desc.bl_desc_dmem_load_off.
- */
-struct gm200_flcn_bl_desc {
-	u32 reserved[4];
-	u32 signature[4];
-	u32 ctx_dma;
-	struct flcn_u64 code_dma_base;
-	u32 non_sec_code_off;
-	u32 non_sec_code_size;
-	u32 sec_code_off;
-	u32 sec_code_size;
-	u32 code_entry_point;
-	struct flcn_u64 data_dma_base;
-	u32 data_size;
-};
-
-/**
- * struct hsflcn_acr_desc - data section of the HS firmware
- *
- * This header is to be copied at the beginning of DMEM by the HS bootloader.
- *
- * @signature:		signature of ACR ucode
- * @wpr_region_id:	region ID holding the WPR header and its details
- * @wpr_offset:		offset from the WPR region holding the wpr header
- * @regions:		region descriptors
- * @nonwpr_ucode_blob_size:	size of LS blob
- * @nonwpr_ucode_blob_start:	FB location of LS blob is
- */
-struct hsflcn_acr_desc {
-	union {
-		u8 reserved_dmem[0x200];
-		u32 signatures[4];
-	} ucode_reserved_space;
-	u32 wpr_region_id;
-	u32 wpr_offset;
-	u32 mmu_mem_range;
-#define FLCN_ACR_MAX_REGIONS 2
-	struct {
-		u32 no_regions;
-		struct {
-			u32 start_addr;
-			u32 end_addr;
-			u32 region_id;
-			u32 read_mask;
-			u32 write_mask;
-			u32 client_mask;
-		} region_props[FLCN_ACR_MAX_REGIONS];
-	} regions;
-	u32 ucode_blob_size;
-	u64 ucode_blob_base __aligned(8);
-	struct {
-		u32 vpr_enabled;
-		u32 vpr_start;
-		u32 vpr_end;
-		u32 hdcp_policies;
-	} vpr_desc;
-};
-
-/**
- * Contains the whole secure boot state, allowing it to be performed as needed
- * @wpr_addr:		physical address of the WPR region
- * @wpr_size:		size in bytes of the WPR region
- * @ls_blob:		LS blob of all the LS firmwares, signatures, bootloaders
- * @ls_blob_size:	size of the LS blob
- * @ls_blob_nb_regions:	number of LS firmwares that will be loaded
- * @acr_blob:		HS blob
- * @acr_blob_vma:	mapping of the HS blob into the secure falcon's VM
- * @acr_bl_desc:	bootloader descriptor of the HS blob
- * @hsbl_blob:		HS blob bootloader
- * @inst:		instance block for HS falcon
- * @pgd:		page directory for the HS falcon
- * @vm:			address space used by the HS falcon
- * @falcon_state:	current state of the managed falcons
- * @firmware_ok:	whether the firmware blobs have been created
- */
-struct gm200_secboot {
-	struct nvkm_secboot base;
-	const struct gm200_secboot_func *func;
-
-	/*
-	 * Address and size of the WPR region. On dGPU this will be the
-	 * address of the LS blob. On Tegra this is a fixed region set by the
-	 * bootloader
-	 */
-	u64 wpr_addr;
-	u32 wpr_size;
-
-	/*
-	 * HS FW - lock WPR region (dGPU only) and load LS FWs
-	 * on Tegra the HS FW copies the LS blob into the fixed WPR instead
-	 */
-	struct nvkm_gpuobj *acr_load_blob;
-	struct gm200_flcn_bl_desc acr_load_bl_desc;
-
-	/* HS FW - unlock WPR region (dGPU only) */
-	struct nvkm_gpuobj *acr_unload_blob;
-	struct gm200_flcn_bl_desc acr_unload_bl_desc;
-
-	/* HS bootloader */
-	void *hsbl_blob;
-
-	/* LS FWs, to be loaded by the HS ACR */
-	struct nvkm_gpuobj *ls_blob;
-
-	/* Instance block & address space used for HS FW execution */
-	struct nvkm_gpuobj *inst;
-	struct nvkm_gpuobj *pgd;
-	struct nvkm_vm *vm;
-
-	/* To keep track of the state of all managed falcons */
-	enum {
-		/* In non-secure state, no firmware loaded, no privileges*/
-		NON_SECURE = 0,
-		/* In low-secure mode and ready to be started */
-		RESET,
-		/* In low-secure mode and running */
-		RUNNING,
-	} falcon_state[NVKM_SECBOOT_FALCON_END];
-
-	bool firmware_ok;
-};
-#define gm200_secboot(sb) container_of(sb, struct gm200_secboot, base)
-
-/**
- * Contains functions we wish to abstract between GM200-like implementations
- * @bl_desc_size:	size of the BL descriptor used by this chip.
- * @fixup_bl_desc:	hook that generates the proper BL descriptor format from
- *			the generic GM200 format into a data array of size
- *			bl_desc_size
- * @fixup_hs_desc:	hook that twiddles the HS descriptor before it is used
- * @prepare_blobs:	prepares the various blobs needed for secure booting
- */
-struct gm200_secboot_func {
-	/*
-	 * Size of the bootloader descriptor for this chip. A block of this
-	 * size is allocated before booting a falcon and the fixup_bl_desc
-	 * callback is called on it
-	 */
-	u32 bl_desc_size;
-	void (*fixup_bl_desc)(const struct gm200_flcn_bl_desc *, void *);
-
-	/*
-	 * Chip-specific modifications of the HS descriptor can be done here.
-	 * On dGPU this is used to fill the information about the WPR region
-	 * we want the HS FW to set up.
-	 */
-	void (*fixup_hs_desc)(struct gm200_secboot *, struct hsflcn_acr_desc *);
-	int (*prepare_blobs)(struct gm200_secboot *);
-};
+static inline struct flcn_u64 u64_to_flcn64(u64 u)
+{
+	struct flcn_u64 ret;
 
-int gm200_secboot_init(struct nvkm_secboot *);
-void *gm200_secboot_dtor(struct nvkm_secboot *);
-int gm200_secboot_reset(struct nvkm_secboot *, u32);
-int gm200_secboot_start(struct nvkm_secboot *, u32);
+	ret.hi = upper_32_bits(u);
+	ret.lo = lower_32_bits(u);
 
-int gm20x_secboot_prepare_blobs(struct gm200_secboot *);
+	return ret;
+}
 
 #endif
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
index 8894fee30cbc..df949fa7d05d 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/therm/base.c
@@ -64,10 +64,9 @@ nvkm_therm_update_trip(struct nvkm_therm *therm)
 }
 
 static int
-nvkm_therm_update_linear(struct nvkm_therm *therm)
+nvkm_therm_compute_linear_duty(struct nvkm_therm *therm, u8 linear_min_temp,
+                               u8 linear_max_temp)
 {
-	u8  linear_min_temp = therm->fan->bios.linear_min_temp;
-	u8  linear_max_temp = therm->fan->bios.linear_max_temp;
 	u8  temp = therm->func->temp_get(therm);
 	u16 duty;
 
@@ -85,6 +84,21 @@ nvkm_therm_update_linear(struct nvkm_therm *therm)
 	return duty;
 }
 
+static int
+nvkm_therm_update_linear(struct nvkm_therm *therm)
+{
+	u8  min = therm->fan->bios.linear_min_temp;
+	u8  max = therm->fan->bios.linear_max_temp;
+	return nvkm_therm_compute_linear_duty(therm, min, max);
+}
+
+static int
+nvkm_therm_update_linear_fallback(struct nvkm_therm *therm)
+{
+	u8 max = therm->bios_sensor.thrs_fan_boost.temp;
+	return nvkm_therm_compute_linear_duty(therm, 30, max);
+}
+
 static void
 nvkm_therm_update(struct nvkm_therm *therm, int mode)
 {
@@ -119,6 +133,8 @@ nvkm_therm_update(struct nvkm_therm *therm, int mode)
 		case NVBIOS_THERM_FAN_OTHER:
 			if (therm->cstate)
 				duty = therm->cstate;
+			else
+				duty = nvkm_therm_update_linear_fallback(therm);
 			poll = false;
 			break;
 		}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
index fe063d5728e2..67ada1d9a28c 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/top/base.c
@@ -95,6 +95,20 @@ nvkm_top_intr(struct nvkm_device *device, u32 intr, u64 *psubdevs)
 	return intr & ~handled;
 }
 
+int
+nvkm_top_fault_id(struct nvkm_device *device, enum nvkm_devidx devidx)
+{
+	struct nvkm_top *top = device->top;
+	struct nvkm_top_device *info;
+
+	list_for_each_entry(info, &top->device, head) {
+		if (info->index == devidx && info->fault >= 0)
+			return info->fault;
+	}
+
+	return -ENOENT;
+}
+
 enum nvkm_devidx
 nvkm_top_fault(struct nvkm_device *device, int fault)
 {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
index c34076223b7b..bcd179ba11d0 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/Kbuild
@@ -1,6 +1,7 @@
 nvkm-y += nvkm/subdev/volt/base.o
 nvkm-y += nvkm/subdev/volt/gpio.o
 nvkm-y += nvkm/subdev/volt/nv40.o
+nvkm-y += nvkm/subdev/volt/gf100.o
 nvkm-y += nvkm/subdev/volt/gk104.o
 nvkm-y += nvkm/subdev/volt/gk20a.o
 nvkm-y += nvkm/subdev/volt/gm20b.o
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
index 1c3d23b0e84a..e344901cfdc7 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/base.c
@@ -26,6 +26,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/vmap.h>
 #include <subdev/bios/volt.h>
+#include <subdev/therm.h>
 
 int
 nvkm_volt_get(struct nvkm_volt *volt)
@@ -50,33 +51,45 @@ static int
 nvkm_volt_set(struct nvkm_volt *volt, u32 uv)
 {
 	struct nvkm_subdev *subdev = &volt->subdev;
-	int i, ret = -EINVAL;
+	int i, ret = -EINVAL, best_err = volt->max_uv, best = -1;
 
 	if (volt->func->volt_set)
 		return volt->func->volt_set(volt, uv);
 
 	for (i = 0; i < volt->vid_nr; i++) {
-		if (volt->vid[i].uv == uv) {
-			ret = volt->func->vid_set(volt, volt->vid[i].vid);
-			nvkm_debug(subdev, "set %duv: %d\n", uv, ret);
+		int err = volt->vid[i].uv - uv;
+		if (err < 0 || err > best_err)
+			continue;
+
+		best_err = err;
+		best = i;
+		if (best_err == 0)
 			break;
-		}
 	}
+
+	if (best == -1) {
+		nvkm_error(subdev, "couldn't set %iuv\n", uv);
+		return ret;
+	}
+
+	ret = volt->func->vid_set(volt, volt->vid[best].vid);
+	nvkm_debug(subdev, "set req %duv to %duv: %d\n", uv,
+		   volt->vid[best].uv, ret);
 	return ret;
 }
 
-static int
-nvkm_volt_map(struct nvkm_volt *volt, u8 id)
+int
+nvkm_volt_map_min(struct nvkm_volt *volt, u8 id)
 {
 	struct nvkm_bios *bios = volt->subdev.device->bios;
 	struct nvbios_vmap_entry info;
 	u8  ver, len;
-	u16 vmap;
+	u32 vmap;
 
 	vmap = nvbios_vmap_entry_parse(bios, id, &ver, &len, &info);
 	if (vmap) {
 		if (info.link != 0xff) {
-			int ret = nvkm_volt_map(volt, info.link);
+			int ret = nvkm_volt_map_min(volt, info.link);
 			if (ret < 0)
 				return ret;
 			info.min += ret;
@@ -88,19 +101,79 @@ nvkm_volt_map(struct nvkm_volt *volt, u8 id)
 }
 
 int
-nvkm_volt_set_id(struct nvkm_volt *volt, u8 id, int condition)
+nvkm_volt_map(struct nvkm_volt *volt, u8 id, u8 temp)
+{
+	struct nvkm_bios *bios = volt->subdev.device->bios;
+	struct nvbios_vmap_entry info;
+	u8  ver, len;
+	u32 vmap;
+
+	vmap = nvbios_vmap_entry_parse(bios, id, &ver, &len, &info);
+	if (vmap) {
+		s64 result;
+
+		if (volt->speedo < 0)
+			return volt->speedo;
+
+		if (ver == 0x10 || (ver == 0x20 && info.mode == 0)) {
+			result  = div64_s64((s64)info.arg[0], 10);
+			result += div64_s64((s64)info.arg[1] * volt->speedo, 10);
+			result += div64_s64((s64)info.arg[2] * volt->speedo * volt->speedo, 100000);
+		} else if (ver == 0x20) {
+			switch (info.mode) {
+			/* 0x0 handled above! */
+			case 0x1:
+				result =  ((s64)info.arg[0] * 15625) >> 18;
+				result += ((s64)info.arg[1] * volt->speedo * 15625) >> 18;
+				result += ((s64)info.arg[2] * temp * 15625) >> 10;
+				result += ((s64)info.arg[3] * volt->speedo * temp * 15625) >> 18;
+				result += ((s64)info.arg[4] * volt->speedo * volt->speedo * 15625) >> 30;
+				result += ((s64)info.arg[5] * temp * temp * 15625) >> 18;
+				break;
+			case 0x3:
+				result = (info.min + info.max) / 2;
+				break;
+			case 0x2:
+			default:
+				result = info.min;
+				break;
+			}
+		} else {
+			return -ENODEV;
+		}
+
+		result = min(max(result, (s64)info.min), (s64)info.max);
+
+		if (info.link != 0xff) {
+			int ret = nvkm_volt_map(volt, info.link, temp);
+			if (ret < 0)
+				return ret;
+			result += ret;
+		}
+		return result;
+	}
+
+	return id ? id * 10000 : -ENODEV;
+}
+
+int
+nvkm_volt_set_id(struct nvkm_volt *volt, u8 id, u8 min_id, u8 temp,
+		 int condition)
 {
 	int ret;
 
 	if (volt->func->set_id)
 		return volt->func->set_id(volt, id, condition);
 
-	ret = nvkm_volt_map(volt, id);
+	ret = nvkm_volt_map(volt, id, temp);
 	if (ret >= 0) {
 		int prev = nvkm_volt_get(volt);
 		if (!condition || prev < 0 ||
 		    (condition < 0 && ret < prev) ||
 		    (condition > 0 && ret > prev)) {
+			int min = nvkm_volt_map(volt, min_id, temp);
+			if (min >= 0)
+				ret = max(min, ret);
 			ret = nvkm_volt_set(volt, ret);
 		} else {
 			ret = 0;
@@ -112,14 +185,16 @@ nvkm_volt_set_id(struct nvkm_volt *volt, u8 id, int condition)
 static void
 nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 {
+	struct nvkm_subdev *subdev = &bios->subdev;
 	struct nvbios_volt_entry ivid;
 	struct nvbios_volt info;
 	u8  ver, hdr, cnt, len;
-	u16 data;
+	u32 data;
 	int i;
 
 	data = nvbios_volt_parse(bios, &ver, &hdr, &cnt, &len, &info);
-	if (data && info.vidmask && info.base && info.step) {
+	if (data && info.vidmask && info.base && info.step && info.ranged) {
+		nvkm_debug(subdev, "found ranged based VIDs\n");
 		volt->min_uv = info.min;
 		volt->max_uv = info.max;
 		for (i = 0; i < info.vidmask + 1; i++) {
@@ -132,7 +207,8 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 			info.base += info.step;
 		}
 		volt->vid_mask = info.vidmask;
-	} else if (data && info.vidmask) {
+	} else if (data && info.vidmask && !info.ranged) {
+		nvkm_debug(subdev, "found entry based VIDs\n");
 		volt->min_uv = 0xffffffff;
 		volt->max_uv = 0;
 		for (i = 0; i < cnt; i++) {
@@ -154,6 +230,14 @@ nvkm_volt_parse_bios(struct nvkm_bios *bios, struct nvkm_volt *volt)
 }
 
 static int
+nvkm_volt_speedo_read(struct nvkm_volt *volt)
+{
+	if (volt->func->speedo_read)
+		return volt->func->speedo_read(volt);
+	return -EINVAL;
+}
+
+static int
 nvkm_volt_init(struct nvkm_subdev *subdev)
 {
 	struct nvkm_volt *volt = nvkm_volt(subdev);
@@ -167,6 +251,21 @@ nvkm_volt_init(struct nvkm_subdev *subdev)
 	return 0;
 }
 
+static int
+nvkm_volt_oneinit(struct nvkm_subdev *subdev)
+{
+	struct nvkm_volt *volt = nvkm_volt(subdev);
+
+	volt->speedo = nvkm_volt_speedo_read(volt);
+	if (volt->speedo > 0)
+		nvkm_debug(&volt->subdev, "speedo %x\n", volt->speedo);
+
+	if (volt->func->oneinit)
+		return volt->func->oneinit(volt);
+
+	return 0;
+}
+
 static void *
 nvkm_volt_dtor(struct nvkm_subdev *subdev)
 {
@@ -177,6 +276,7 @@ static const struct nvkm_subdev_func
 nvkm_volt = {
 	.dtor = nvkm_volt_dtor,
 	.init = nvkm_volt_init,
+	.oneinit = nvkm_volt_oneinit,
 };
 
 void
@@ -191,9 +291,22 @@ nvkm_volt_ctor(const struct nvkm_volt_func *func, struct nvkm_device *device,
 
 	/* Assuming the non-bios device should build the voltage table later */
 	if (bios) {
+		u8 ver, hdr, cnt, len;
+		struct nvbios_vmap vmap;
+
 		nvkm_volt_parse_bios(bios, volt);
 		nvkm_debug(&volt->subdev, "min: %iuv max: %iuv\n",
 			   volt->min_uv, volt->max_uv);
+
+		if (nvbios_vmap_parse(bios, &ver, &hdr, &cnt, &len, &vmap)) {
+			volt->max0_id = vmap.max0;
+			volt->max1_id = vmap.max1;
+			volt->max2_id = vmap.max2;
+		} else {
+			volt->max0_id = 0xff;
+			volt->max1_id = 0xff;
+			volt->max2_id = 0xff;
+		}
 	}
 
 	if (volt->vid_nr) {
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf100.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf100.c
new file mode 100644
index 000000000000..d9ed6925ca64
--- /dev/null
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gf100.c
@@ -0,0 +1,70 @@
+/*
+ * Copyright 2016 Karol Herbst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Karol Herbst
+ */
+#include "priv.h"
+
+#include <subdev/fuse.h>
+
+static int
+gf100_volt_speedo_read(struct nvkm_volt *volt)
+{
+	struct nvkm_device *device = volt->subdev.device;
+	struct nvkm_fuse *fuse = device->fuse;
+
+	if (!fuse)
+		return -EINVAL;
+
+	return nvkm_fuse_read(fuse, 0x1cc);
+}
+
+int
+gf100_volt_oneinit(struct nvkm_volt *volt)
+{
+	struct nvkm_subdev *subdev = &volt->subdev;
+	if (volt->speedo <= 0)
+		nvkm_error(subdev, "couldn't find speedo value, volting not "
+			   "possible\n");
+	return 0;
+}
+
+static const struct nvkm_volt_func
+gf100_volt = {
+	.oneinit = gf100_volt_oneinit,
+	.vid_get = nvkm_voltgpio_get,
+	.vid_set = nvkm_voltgpio_set,
+	.speedo_read = gf100_volt_speedo_read,
+};
+
+int
+gf100_volt_new(struct nvkm_device *device, int index, struct nvkm_volt **pvolt)
+{
+	struct nvkm_volt *volt;
+	int ret;
+
+	ret = nvkm_volt_new_(&gf100_volt, device, index, &volt);
+	*pvolt = volt;
+	if (ret)
+		return ret;
+
+	return nvkm_voltgpio_init(volt);
+}
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
index 420bd84d8483..1c744e029454 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gk104.c
@@ -27,6 +27,7 @@
 #include <subdev/gpio.h>
 #include <subdev/bios.h>
 #include <subdev/bios/volt.h>
+#include <subdev/fuse.h>
 
 #define gk104_volt(p) container_of((p), struct gk104_volt, base)
 struct gk104_volt {
@@ -34,7 +35,7 @@ struct gk104_volt {
 	struct nvbios_volt bios;
 };
 
-int
+static int
 gk104_volt_get(struct nvkm_volt *base)
 {
 	struct nvbios_volt *bios = &gk104_volt(base)->bios;
@@ -47,7 +48,7 @@ gk104_volt_get(struct nvkm_volt *base)
 	return bios->base + bios->pwm_range * duty / div;
 }
 
-int
+static int
 gk104_volt_set(struct nvkm_volt *base, u32 uv)
 {
 	struct nvbios_volt *bios = &gk104_volt(base)->bios;
@@ -64,13 +65,33 @@ gk104_volt_set(struct nvkm_volt *base, u32 uv)
 	return 0;
 }
 
+static int
+gk104_volt_speedo_read(struct nvkm_volt *volt)
+{
+	struct nvkm_device *device = volt->subdev.device;
+	struct nvkm_fuse *fuse = device->fuse;
+	int ret;
+
+	if (!fuse)
+		return -EINVAL;
+
+	nvkm_wr32(device, 0x122634, 0x0);
+	ret = nvkm_fuse_read(fuse, 0x3a8);
+	nvkm_wr32(device, 0x122634, 0x41);
+	return ret;
+}
+
 static const struct nvkm_volt_func
 gk104_volt_pwm = {
+	.oneinit = gf100_volt_oneinit,
 	.volt_get = gk104_volt_get,
 	.volt_set = gk104_volt_set,
+	.speedo_read = gk104_volt_speedo_read,
 }, gk104_volt_gpio = {
+	.oneinit = gf100_volt_oneinit,
 	.vid_get = nvkm_voltgpio_get,
 	.vid_set = nvkm_voltgpio_set,
+	.speedo_read = gk104_volt_speedo_read,
 };
 
 int
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
index 74db4d28930f..2925b9cae681 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gm20b.c
@@ -25,7 +25,7 @@
 
 #include <core/tegra.h>
 
-const struct cvb_coef gm20b_cvb_coef[] = {
+static const struct cvb_coef gm20b_cvb_coef[] = {
 	/* KHz,             c0,      c1,   c2 */
 	/*  76800 */ { 1786666,  -85625, 1632 },
 	/* 153600 */ { 1846729,  -87525, 1632 },
@@ -58,7 +58,7 @@ static const struct cvb_coef gm20b_na_cvb_coef[] = {
 	/* 998400 */ { 1316991, 8144, -940, 808, -21583, 226 },
 };
 
-const u32 speedo_to_vmin[] = {
+static const u32 speedo_to_vmin[] = {
 	/*   0,      1,      2,      3,      4, */
 	950000, 840000, 818750, 840000, 810000,
 };
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gpio.c b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gpio.c
index d2bac1d77819..443c031b966b 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gpio.c
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/gpio.c
@@ -25,6 +25,7 @@
 #include <subdev/bios.h>
 #include <subdev/bios/gpio.h>
 #include <subdev/gpio.h>
+#include "priv.h"
 
 static const u8 tags[] = {
 	DCB_GPIO_VID0, DCB_GPIO_VID1, DCB_GPIO_VID2, DCB_GPIO_VID3,
diff --git a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
index d5140d991161..354bafe4b4e2 100644
--- a/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
+++ b/drivers/gpu/drm/nouveau/nvkm/subdev/volt/priv.h
@@ -9,11 +9,13 @@ int nvkm_volt_new_(const struct nvkm_volt_func *, struct nvkm_device *,
 		   int index, struct nvkm_volt **);
 
 struct nvkm_volt_func {
+	int (*oneinit)(struct nvkm_volt *);
 	int (*volt_get)(struct nvkm_volt *);
 	int (*volt_set)(struct nvkm_volt *, u32 uv);
 	int (*vid_get)(struct nvkm_volt *);
 	int (*vid_set)(struct nvkm_volt *, u8 vid);
 	int (*set_id)(struct nvkm_volt *, u8 id, int condition);
+	int (*speedo_read)(struct nvkm_volt *);
 };
 
 int nvkm_voltgpio_init(struct nvkm_volt *);
@@ -23,4 +25,6 @@ int nvkm_voltgpio_set(struct nvkm_volt *, u8);
 int nvkm_voltpwm_init(struct nvkm_volt *volt);
 int nvkm_voltpwm_get(struct nvkm_volt *volt);
 int nvkm_voltpwm_set(struct nvkm_volt *volt, u32 uv);
+
+int gf100_volt_oneinit(struct nvkm_volt *);
 #endif